KaLM-Embedding
/

KaLM-embedding-multilingual-mini-instruct-v2.5

@@ -32,7 +32,7 @@ tags:
 - Model Size: 0.5B
 - Embedding Dimension: 896
 - Max Input Tokens: 32k
-- MLR: 896 512 256 128 64
 - Attn: Bidirectional attention
 - Pooling: Mean pooling
@@ -93,20 +93,32 @@ Then you can use the model like this:
 ```python
 from sentence_transformers import SentenceTransformer
-sentences = ["This is an example sentence", "Each sentence is converted"]
-model = SentenceTransformer("{MODEL_NAME_OR_PATH}", trust_remote_code=True, model_kwargs={"torch_dtype": torch.bfloat16, "attn_implementation": "flash_attention_2"})
 model.max_seq_length = 512
 embeddings = model.encode(
-    sentences,
     normalize_embeddings=True,
-    batch_size=256,
-    show_progress_bar=True
-    )
 print(embeddings)
 ```
 We add task instructions for asymmetric tasks: retrieval, reranking, classification, and clustering.
@@ -115,22 +127,70 @@ If you want to add task instructions to the query, you can use the model like th
 ```python
 from sentence_transformers import SentenceTransformer
-sentences = ["This is an example sentence", "Each sentence is converted"]
-model = SentenceTransformer("{MODEL_NAME_OR_PATH}", trust_remote_code=True, model_kwargs={"torch_dtype": torch.bfloat16, "attn_implementation": "flash_attention_2"})
 model.max_seq_length = 512
-prompt = "Instruct: Classifying the category of french news. \n Query: "
 embeddings = model.encode(
-    sentences,
     prompt=prompt,
     normalize_embeddings=True,
-    batch_size=256,
-    show_progress_bar=True
-    )
 print(embeddings)
 ```
 ### vllm support

 - Model Size: 0.5B
 - Embedding Dimension: 896
 - Max Input Tokens: 32k
+- MRL dimensions: 896, 512, 256, 128, and 64
 - Attn: Bidirectional attention
 - Pooling: Mean pooling
 ```python
 from sentence_transformers import SentenceTransformer
+import torch
+model = SentenceTransformer(
+    "KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
+    trust_remote_code=True,
+    model_kwargs={
+        "torch_dtype": torch.bfloat16,
+        "attn_implementation": "flash_attention_2",  # Optional
+    },
+)
 model.max_seq_length = 512
+sentences = ["This is an example sentence", "Each sentence is converted"]
 embeddings = model.encode(
+    sentences,
     normalize_embeddings=True,
+    batch_size=256,
+    show_progress_bar=True,
+)
 print(embeddings)
+'''
+[[-0.01043701 -0.02172852  0.0100708  ... -0.02807617  0.00157166
+  -0.03637695]
+ [-0.00424194  0.02966309  0.03686523 ... -0.02587891  0.01953125
+  -0.00125122]]
+'''
 ```
 We add task instructions for asymmetric tasks: retrieval, reranking, classification, and clustering.
 ```python
 from sentence_transformers import SentenceTransformer
+import torch
+model = SentenceTransformer(
+    "KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
+    trust_remote_code=True,
+    model_kwargs={
+        "torch_dtype": torch.bfloat16,
+        "attn_implementation": "flash_attention_2",  # Optional
+    },
+)
 model.max_seq_length = 512
+sentences = ["This is an example sentence", "Each sentence is converted"]
+prompt = "Instruct: Classifying the category of french news.\nQuery:"
 embeddings = model.encode(
+    sentences,
     prompt=prompt,
     normalize_embeddings=True,
+    batch_size=256,
+    show_progress_bar=True,
+)
 print(embeddings)
+'''
+[[-0.01867676  0.02319336  0.00280762 ... -0.02075195  0.00196838
+  -0.0703125 ]
+ [-0.0067749   0.03491211  0.01434326 ... -0.0043335   0.00509644
+  -0.04174805]]
+'''
+```
+Or you can use `encode_query` and `encode_document` to automatically add the default prompt for queries (`"Instruct: Given a query, retrieve documents that answer the query \n Query: "`) and documents (`""`), respectively.
+```python
+from sentence_transformers import SentenceTransformer
+import torch
+model = SentenceTransformer(
+    "KaLM-Embedding/KaLM-embedding-multilingual-mini-instruct-v2.5",
+    trust_remote_code=True,
+    model_kwargs={
+        "torch_dtype": torch.bfloat16,
+        "attn_implementation": "flash_attention_2",  # Optional
+    },
+)
+model.max_seq_length = 512
+queries = [
+    "What is the capital of China?",
+    "Explain gravity",
+]
+documents = [
+    "The capital of China is Beijing.",
+    "Gravity is a force that attracts two bodies towards each other. It gives weight to physical objects and is responsible for the movement of planets around the sun.",
+]
+query_embeddings = model.encode_query(queries)
+document_embeddings = model.encode_document(documents)
+similarities = model.similarity(query_embeddings, document_embeddings)
+print(similarities)
+'''
+tensor([[0.9034, 0.2563],
+        [0.3153, 0.7396]])
+'''
 ```
 ### vllm support

config_sentence_transformers.json CHANGED Viewed

@@ -5,7 +5,7 @@
     "pytorch": "2.1.2+cu121"
   },
   "prompts": {
-    "query": "",
     "document": ""
   },
   "default_prompt_name": null,

     "pytorch": "2.1.2+cu121"
   },
   "prompts": {
+    "query": "Instruct: Given a query, retrieve documents that answer the query \n Query: ",
     "document": ""
   },
   "default_prompt_name": null,