Add inference script
Browse files- hindi_embeddings.py +13 -1
hindi_embeddings.py
CHANGED
@@ -510,6 +510,13 @@ class HindiEmbedder:
|
|
510 |
Returns:
|
511 |
Similarity scores
|
512 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
513 |
embeddings1 = self.encode(texts1)
|
514 |
|
515 |
if texts2 is None:
|
@@ -522,10 +529,15 @@ class HindiEmbedder:
|
|
522 |
|
523 |
if len(texts1) == len(texts2):
|
524 |
# Compute pairwise similarity when the number of texts match
|
525 |
-
|
526 |
cosine_similarity([e1], [e2])[0][0]
|
527 |
for e1, e2 in zip(embeddings1, embeddings2)
|
528 |
])
|
|
|
|
|
|
|
|
|
|
|
529 |
else:
|
530 |
# Return full similarity matrix
|
531 |
return cosine_similarity(embeddings1, embeddings2)
|
|
|
510 |
Returns:
|
511 |
Similarity scores
|
512 |
"""
|
513 |
+
# Convert single strings to lists for consistent handling
|
514 |
+
if isinstance(texts1, str):
|
515 |
+
texts1 = [texts1]
|
516 |
+
|
517 |
+
if texts2 is not None and isinstance(texts2, str):
|
518 |
+
texts2 = [texts2]
|
519 |
+
|
520 |
embeddings1 = self.encode(texts1)
|
521 |
|
522 |
if texts2 is None:
|
|
|
529 |
|
530 |
if len(texts1) == len(texts2):
|
531 |
# Compute pairwise similarity when the number of texts match
|
532 |
+
similarities = np.array([
|
533 |
cosine_similarity([e1], [e2])[0][0]
|
534 |
for e1, e2 in zip(embeddings1, embeddings2)
|
535 |
])
|
536 |
+
|
537 |
+
# If there's just one pair, return a scalar
|
538 |
+
if len(similarities) == 1:
|
539 |
+
return similarities[0]
|
540 |
+
return similarities
|
541 |
else:
|
542 |
# Return full similarity matrix
|
543 |
return cosine_similarity(embeddings1, embeddings2)
|