| import torch | |
| from sentence_transformers import SentenceTransformer | |
| model_name = "openbmb/MiniCPM-Embedding-Light" | |
| model = SentenceTransformer(model_name, trust_remote_code=True, model_kwargs={"torch_dtype": torch.float16}) | |
| # you can use flash_attention_2 for faster inference | |
| # model = SentenceTransformer(model_name, trust_remote_code=True, model_kwargs={"attn_implementation": "flash_attention_2", "torch_dtype": torch.float16}) | |
| queries = ["中国的首都是哪里?"] # "What is the capital of China?" | |
| passages = ["beijing", "shanghai"] # "北京", "上海" | |
| INSTRUCTION = "Query: " | |
| embeddings_query = model.encode(queries, prompt=INSTRUCTION) | |
| embeddings_doc = model.encode(passages) | |
| scores = (embeddings_query @ embeddings_doc.T) | |
| print(scores.tolist()) # [[0.40356746315956116, 0.36183440685272217]] |