xin0920 commited on 21 days ago

Commit

d9b770f

1 Parent(s): 665f6ff

First commit

Browse files

Files changed (20) hide show

1_Pooling/config.json +10 -0
3_CSRSparsity/config.json +8 -0
3_CSRSparsity/pytorch_model.bin +3 -0
README.md +2188 -0
config.json +101 -0
config_sentence_transformers.json +27 -0
configuration_nvembed.py +92 -0
instructions.json +99 -0
model-00001-of-00004.safetensors +3 -0
model-00002-of-00004.safetensors +3 -0
model-00003-of-00004.safetensors +3 -0
model-00004-of-00004.safetensors +3 -0
model.safetensors.index.json +311 -0
modeling_nvembed.py +441 -0
modules.json +26 -0
sentence_bert_config.json +4 -0
special_tokens_map.json +30 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +43 -0

1_Pooling/config.json ADDED Viewed

	@@ -0,0 +1,10 @@

+{
+  "word_embedding_dimension": 4096,
+  "pooling_mode_cls_token": false,
+  "pooling_mode_mean_tokens": true,
+  "pooling_mode_max_tokens": false,
+  "pooling_mode_mean_sqrt_len_tokens": false,
+  "pooling_mode_weightedmean_tokens": false,
+  "pooling_mode_lasttoken": false,
+  "include_prompt": false
+}

3_CSRSparsity/config.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "input_dim": 4096,
+    "hidden_dim": 16384,
+    "k": 32,
+    "k_aux": 512,
+    "normalize": false,
+    "dead_threshold": 30
+}

3_CSRSparsity/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5ef94bb69abc22145d0f36dccab1a7aac999a5b14227a82df03c37e019972784
+size 268650816

README.md ADDED Viewed

	@@ -0,0 +1,2188 @@

+---
+tags:
+- mteb
+- sentence-transformers
+model-index:
+- name: NV-Embed-v2
+  results:
+  - dataset:
+      config: en
+      name: MTEB AmazonCounterfactualClassification (en)
+      revision: e8379541af4e31359cca9fbcf4b00f2671dba205
+      split: test
+      type: mteb/amazon_counterfactual
+    metrics:
+    - type: accuracy
+      value: 94.28358208955224
+    - type: accuracy_stderr
+      value: 0.40076780842082305
+    - type: ap
+      value: 76.49097318319616
+    - type: ap_stderr
+      value: 1.2418692675183929
+    - type: f1
+      value: 91.41982003001168
+    - type: f1_stderr
+      value: 0.5043921413093579
+    - type: main_score
+      value: 94.28358208955224
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB AmazonPolarityClassification
+      revision: e2d317d38cd51312af73b3d32a06d1a08b442046
+      split: test
+      type: mteb/amazon_polarity
+    metrics:
+    - type: accuracy
+      value: 97.74185000000001
+    - type: accuracy_stderr
+      value: 0.07420471683120942
+    - type: ap
+      value: 96.4737144875525
+    - type: ap_stderr
+      value: 0.2977518241541558
+    - type: f1
+      value: 97.7417581594921
+    - type: f1_stderr
+      value: 0.07428763617010377
+    - type: main_score
+      value: 97.74185000000001
+    task:
+      type: Classification
+  - dataset:
+      config: en
+      name: MTEB AmazonReviewsClassification (en)
+      revision: 1399c76144fd37290681b995c656ef9b2e06e26d
+      split: test
+      type: mteb/amazon_reviews_multi
+    metrics:
+    - type: accuracy
+      value: 63.96000000000001
+    - type: accuracy_stderr
+      value: 1.815555011559825
+    - type: f1
+      value: 62.49361841640459
+    - type: f1_stderr
+      value: 2.829339314126457
+    - type: main_score
+      value: 63.96000000000001
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB ArguAna
+      revision: c22ab2a51041ffd869aaddef7af8d8215647e41a
+      split: test
+      type: mteb/arguana
+    metrics:
+    - type: map_at_1
+      value: 46.515
+    - type: map_at_10
+      value: 62.392
+    - type: map_at_100
+      value: 62.732
+    - type: map_at_1000
+      value: 62.733000000000004
+    - type: map_at_3
+      value: 58.701
+    - type: map_at_5
+      value: 61.027
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 46.515
+    - type: ndcg_at_10
+      value: 70.074
+    - type: ndcg_at_100
+      value: 71.395
+    - type: ndcg_at_1000
+      value: 71.405
+    - type: ndcg_at_3
+      value: 62.643
+    - type: ndcg_at_5
+      value: 66.803
+    - type: precision_at_1
+      value: 46.515
+    - type: precision_at_10
+      value: 9.41
+    - type: precision_at_100
+      value: 0.996
+    - type: precision_at_1000
+      value: 0.1
+    - type: precision_at_3
+      value: 24.68
+    - type: precision_at_5
+      value: 16.814
+    - type: recall_at_1
+      value: 46.515
+    - type: recall_at_10
+      value: 94.097
+    - type: recall_at_100
+      value: 99.57300000000001
+    - type: recall_at_1000
+      value: 99.644
+    - type: recall_at_3
+      value: 74.03999999999999
+    - type: recall_at_5
+      value: 84.068
+    - type: main_score
+      value: 70.074
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB ArxivClusteringP2P
+      revision: a122ad7f3f0291bf49cc6f4d32aa80929df69d5d
+      split: test
+      type: mteb/arxiv-clustering-p2p
+    metrics:
+    - type: main_score
+      value: 55.79933795955242
+    - type: v_measure
+      value: 55.79933795955242
+    - type: v_measure_std
+      value: 14.575108141916148
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB ArxivClusteringS2S
+      revision: f910caf1a6075f7329cdf8c1a6135696f37dbd53
+      split: test
+      type: mteb/arxiv-clustering-s2s
+    metrics:
+    - type: main_score
+      value: 51.262845995850334
+    - type: v_measure
+      value: 51.262845995850334
+    - type: v_measure_std
+      value: 14.727824473104173
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB AskUbuntuDupQuestions
+      revision: 2000358ca161889fa9c082cb41daa8dcfb161a54
+      split: test
+      type: mteb/askubuntudupquestions-reranking
+    metrics:
+    - type: map
+      value: 67.46477327480808
+    - type: mrr
+      value: 79.50160488941653
+    - type: main_score
+      value: 67.46477327480808
+    task:
+      type: Reranking
+  - dataset:
+      config: default
+      name: MTEB BIOSSES
+      revision: d3fb88f8f02e40887cd149695127462bbcf29b4a
+      split: test
+      type: mteb/biosses-sts
+    metrics:
+    - type: cosine_pearson
+      value: 89.74311007980987
+    - type: cosine_spearman
+      value: 87.41644967443246
+    - type: manhattan_pearson
+      value: 88.57457108347744
+    - type: manhattan_spearman
+      value: 87.59295972042997
+    - type: euclidean_pearson
+      value: 88.27108977118459
+    - type: euclidean_spearman
+      value: 87.41644967443246
+    - type: main_score
+      value: 87.41644967443246
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB Banking77Classification
+      revision: 0fd18e25b25c072e09e0d92ab615fda904d66300
+      split: test
+      type: mteb/banking77
+    metrics:
+    - type: accuracy
+      value: 92.41558441558443
+    - type: accuracy_stderr
+      value: 0.37701502251934443
+    - type: f1
+      value: 92.38130170447671
+    - type: f1_stderr
+      value: 0.39115151225617767
+    - type: main_score
+      value: 92.41558441558443
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB BiorxivClusteringP2P
+      revision: 65b79d1d13f80053f67aca9498d9402c2d9f1f40
+      split: test
+      type: mteb/biorxiv-clustering-p2p
+    metrics:
+    - type: main_score
+      value: 54.08649516394218
+    - type: v_measure
+      value: 54.08649516394218
+    - type: v_measure_std
+      value: 0.5303233693045373
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB BiorxivClusteringS2S
+      revision: 258694dd0231531bc1fd9de6ceb52a0853c6d908
+      split: test
+      type: mteb/biorxiv-clustering-s2s
+    metrics:
+    - type: main_score
+      value: 49.60352214167779
+    - type: v_measure
+      value: 49.60352214167779
+    - type: v_measure_std
+      value: 0.7176198612516721
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB CQADupstackRetrieval
+      revision: 46989137a86843e03a6195de44b09deda022eec7
+      split: test
+      type: CQADupstackRetrieval_is_a_combined_dataset
+    metrics:
+    - type: map_at_1
+      value: 31.913249999999998
+    - type: map_at_10
+      value: 43.87733333333334
+    - type: map_at_100
+      value: 45.249916666666664
+    - type: map_at_1000
+      value: 45.350583333333326
+    - type: map_at_3
+      value: 40.316833333333335
+    - type: map_at_5
+      value: 42.317083333333336
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 38.30616666666667
+    - type: ndcg_at_10
+      value: 50.24175000000001
+    - type: ndcg_at_100
+      value: 55.345333333333336
+    - type: ndcg_at_1000
+      value: 56.91225000000001
+    - type: ndcg_at_3
+      value: 44.67558333333333
+    - type: ndcg_at_5
+      value: 47.32333333333334
+    - type: precision_at_1
+      value: 38.30616666666667
+    - type: precision_at_10
+      value: 9.007416666666666
+    - type: precision_at_100
+      value: 1.3633333333333333
+    - type: precision_at_1000
+      value: 0.16691666666666666
+    - type: precision_at_3
+      value: 20.895666666666667
+    - type: precision_at_5
+      value: 14.871666666666666
+    - type: recall_at_1
+      value: 31.913249999999998
+    - type: recall_at_10
+      value: 64.11891666666666
+    - type: recall_at_100
+      value: 85.91133333333333
+    - type: recall_at_1000
+      value: 96.28225
+    - type: recall_at_3
+      value: 48.54749999999999
+    - type: recall_at_5
+      value: 55.44283333333334
+    - type: main_score
+      value: 50.24175000000001
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB ClimateFEVER
+      revision: 47f2ac6acb640fc46020b02a5b59fdda04d39380
+      split: test
+      type: mteb/climate-fever
+    metrics:
+    - type: map_at_1
+      value: 19.556
+    - type: map_at_10
+      value: 34.623
+    - type: map_at_100
+      value: 36.97
+    - type: map_at_1000
+      value: 37.123
+    - type: map_at_3
+      value: 28.904999999999998
+    - type: map_at_5
+      value: 31.955
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 44.104
+    - type: ndcg_at_10
+      value: 45.388
+    - type: ndcg_at_100
+      value: 52.793
+    - type: ndcg_at_1000
+      value: 55.108999999999995
+    - type: ndcg_at_3
+      value: 38.604
+    - type: ndcg_at_5
+      value: 40.806
+    - type: precision_at_1
+      value: 44.104
+    - type: precision_at_10
+      value: 14.143
+    - type: precision_at_100
+      value: 2.2190000000000003
+    - type: precision_at_1000
+      value: 0.266
+    - type: precision_at_3
+      value: 29.316
+    - type: precision_at_5
+      value: 21.98
+    - type: recall_at_1
+      value: 19.556
+    - type: recall_at_10
+      value: 52.120999999999995
+    - type: recall_at_100
+      value: 76.509
+    - type: recall_at_1000
+      value: 89.029
+    - type: recall_at_3
+      value: 34.919
+    - type: recall_at_5
+      value: 42.18
+    - type: main_score
+      value: 45.388
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB DBPedia
+      revision: c0f706b76e590d620bd6618b3ca8efdd34e2d659
+      split: test
+      type: mteb/dbpedia
+    metrics:
+    - type: map_at_1
+      value: 10.714
+    - type: map_at_10
+      value: 25.814999999999998
+    - type: map_at_100
+      value: 37.845
+    - type: map_at_1000
+      value: 39.974
+    - type: map_at_3
+      value: 17.201
+    - type: map_at_5
+      value: 21.062
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 66.0
+    - type: ndcg_at_10
+      value: 53.496
+    - type: ndcg_at_100
+      value: 58.053
+    - type: ndcg_at_1000
+      value: 64.886
+    - type: ndcg_at_3
+      value: 57.656
+    - type: ndcg_at_5
+      value: 55.900000000000006
+    - type: precision_at_1
+      value: 77.25
+    - type: precision_at_10
+      value: 43.65
+    - type: precision_at_100
+      value: 13.76
+    - type: precision_at_1000
+      value: 2.5940000000000003
+    - type: precision_at_3
+      value: 61.0
+    - type: precision_at_5
+      value: 54.65
+    - type: recall_at_1
+      value: 10.714
+    - type: recall_at_10
+      value: 31.173000000000002
+    - type: recall_at_100
+      value: 63.404
+    - type: recall_at_1000
+      value: 85.874
+    - type: recall_at_3
+      value: 18.249000000000002
+    - type: recall_at_5
+      value: 23.69
+    - type: main_score
+      value: 53.496
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB EmotionClassification
+      revision: 4f58c6b202a23cf9a4da393831edf4f9183cad37
+      split: test
+      type: mteb/emotion
+    metrics:
+    - type: accuracy
+      value: 93.38499999999999
+    - type: accuracy_stderr
+      value: 0.13793114224133846
+    - type: f1
+      value: 90.12141028353496
+    - type: f1_stderr
+      value: 0.174640257706043
+    - type: main_score
+      value: 93.38499999999999
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB FEVER
+      revision: bea83ef9e8fb933d90a2f1d5515737465d613e12
+      split: test
+      type: mteb/fever
+    metrics:
+    - type: map_at_1
+      value: 84.66900000000001
+    - type: map_at_10
+      value: 91.52799999999999
+    - type: map_at_100
+      value: 91.721
+    - type: map_at_1000
+      value: 91.73
+    - type: map_at_3
+      value: 90.752
+    - type: map_at_5
+      value: 91.262
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 91.20899999999999
+    - type: ndcg_at_10
+      value: 93.74900000000001
+    - type: ndcg_at_100
+      value: 94.279
+    - type: ndcg_at_1000
+      value: 94.408
+    - type: ndcg_at_3
+      value: 92.923
+    - type: ndcg_at_5
+      value: 93.376
+    - type: precision_at_1
+      value: 91.20899999999999
+    - type: precision_at_10
+      value: 11.059
+    - type: precision_at_100
+      value: 1.1560000000000001
+    - type: precision_at_1000
+      value: 0.11800000000000001
+    - type: precision_at_3
+      value: 35.129
+    - type: precision_at_5
+      value: 21.617
+    - type: recall_at_1
+      value: 84.66900000000001
+    - type: recall_at_10
+      value: 97.03399999999999
+    - type: recall_at_100
+      value: 98.931
+    - type: recall_at_1000
+      value: 99.65899999999999
+    - type: recall_at_3
+      value: 94.76299999999999
+    - type: recall_at_5
+      value: 95.968
+    - type: main_score
+      value: 93.74900000000001
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB FiQA2018
+      revision: 27a168819829fe9bcd655c2df245fb19452e8e06
+      split: test
+      type: mteb/fiqa
+    metrics:
+    - type: map_at_1
+      value: 34.866
+    - type: map_at_10
+      value: 58.06099999999999
+    - type: map_at_100
+      value: 60.028999999999996
+    - type: map_at_1000
+      value: 60.119
+    - type: map_at_3
+      value: 51.304
+    - type: map_at_5
+      value: 55.054
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 64.815
+    - type: ndcg_at_10
+      value: 65.729
+    - type: ndcg_at_100
+      value: 71.14
+    - type: ndcg_at_1000
+      value: 72.336
+    - type: ndcg_at_3
+      value: 61.973
+    - type: ndcg_at_5
+      value: 62.858000000000004
+    - type: precision_at_1
+      value: 64.815
+    - type: precision_at_10
+      value: 17.87
+    - type: precision_at_100
+      value: 2.373
+    - type: precision_at_1000
+      value: 0.258
+    - type: precision_at_3
+      value: 41.152
+    - type: precision_at_5
+      value: 29.568
+    - type: recall_at_1
+      value: 34.866
+    - type: recall_at_10
+      value: 72.239
+    - type: recall_at_100
+      value: 91.19
+    - type: recall_at_1000
+      value: 98.154
+    - type: recall_at_3
+      value: 56.472
+    - type: recall_at_5
+      value: 63.157
+    - type: main_score
+      value: 65.729
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB HotpotQA
+      revision: ab518f4d6fcca38d87c25209f94beba119d02014
+      split: test
+      type: mteb/hotpotqa
+    metrics:
+    - type: map_at_1
+      value: 44.651999999999994
+    - type: map_at_10
+      value: 79.95100000000001
+    - type: map_at_100
+      value: 80.51700000000001
+    - type: map_at_1000
+      value: 80.542
+    - type: map_at_3
+      value: 77.008
+    - type: map_at_5
+      value: 78.935
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 89.305
+    - type: ndcg_at_10
+      value: 85.479
+    - type: ndcg_at_100
+      value: 87.235
+    - type: ndcg_at_1000
+      value: 87.669
+    - type: ndcg_at_3
+      value: 81.648
+    - type: ndcg_at_5
+      value: 83.88600000000001
+    - type: precision_at_1
+      value: 89.305
+    - type: precision_at_10
+      value: 17.807000000000002
+    - type: precision_at_100
+      value: 1.9140000000000001
+    - type: precision_at_1000
+      value: 0.197
+    - type: precision_at_3
+      value: 53.756
+    - type: precision_at_5
+      value: 34.018
+    - type: recall_at_1
+      value: 44.651999999999994
+    - type: recall_at_10
+      value: 89.034
+    - type: recall_at_100
+      value: 95.719
+    - type: recall_at_1000
+      value: 98.535
+    - type: recall_at_3
+      value: 80.635
+    - type: recall_at_5
+      value: 85.044
+    - type: main_score
+      value: 85.479
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB ImdbClassification
+      revision: 3d86128a09e091d6018b6d26cad27f2739fc2db7
+      split: test
+      type: mteb/imdb
+    metrics:
+    - type: accuracy
+      value: 97.1376
+    - type: accuracy_stderr
+      value: 0.04571914259913447
+    - type: ap
+      value: 95.92783808558808
+    - type: ap_stderr
+      value: 0.05063782483358255
+    - type: f1
+      value: 97.13755519177172
+    - type: f1_stderr
+      value: 0.04575943074086138
+    - type: main_score
+      value: 97.1376
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB MSMARCO
+      revision: c5a29a104738b98a9e76336939199e264163d4a0
+      split: dev
+      type: mteb/msmarco
+    metrics:
+    - type: map_at_1
+      value: 0.0
+    - type: map_at_10
+      value: 38.342
+    - type: map_at_100
+      value: 0.0
+    - type: map_at_1000
+      value: 0.0
+    - type: map_at_3
+      value: 0.0
+    - type: map_at_5
+      value: 0.0
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 0.0
+    - type: ndcg_at_10
+      value: 45.629999999999995
+    - type: ndcg_at_100
+      value: 0.0
+    - type: ndcg_at_1000
+      value: 0.0
+    - type: ndcg_at_3
+      value: 0.0
+    - type: ndcg_at_5
+      value: 0.0
+    - type: precision_at_1
+      value: 0.0
+    - type: precision_at_10
+      value: 7.119000000000001
+    - type: precision_at_100
+      value: 0.0
+    - type: precision_at_1000
+      value: 0.0
+    - type: precision_at_3
+      value: 0.0
+    - type: precision_at_5
+      value: 0.0
+    - type: recall_at_1
+      value: 0.0
+    - type: recall_at_10
+      value: 67.972
+    - type: recall_at_100
+      value: 0.0
+    - type: recall_at_1000
+      value: 0.0
+    - type: recall_at_3
+      value: 0.0
+    - type: recall_at_5
+      value: 0.0
+    - type: main_score
+      value: 45.629999999999995
+    task:
+      type: Retrieval
+  - dataset:
+      config: en
+      name: MTEB MTOPDomainClassification (en)
+      revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf
+      split: test
+      type: mteb/mtop_domain
+    metrics:
+    - type: accuracy
+      value: 99.24988600091199
+    - type: accuracy_stderr
+      value: 0.04496826931900734
+    - type: f1
+      value: 99.15933275095276
+    - type: f1_stderr
+      value: 0.05565039139747446
+    - type: main_score
+      value: 99.24988600091199
+    task:
+      type: Classification
+  - dataset:
+      config: en
+      name: MTEB MTOPIntentClassification (en)
+      revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba
+      split: test
+      type: mteb/mtop_intent
+    metrics:
+    - type: accuracy
+      value: 94.3684450524396
+    - type: accuracy_stderr
+      value: 0.8436548701322188
+    - type: f1
+      value: 77.33022623133307
+    - type: f1_stderr
+      value: 0.9228425861187275
+    - type: main_score
+      value: 94.3684450524396
+    task:
+      type: Classification
+  - dataset:
+      config: en
+      name: MTEB MassiveIntentClassification (en)
+      revision: 31efe3c427b0bae9c22cbb560b8f15491cc6bed7
+      split: test
+      type: mteb/amazon_massive_intent
+    metrics:
+    - type: accuracy
+      value: 86.09616677874916
+    - type: accuracy_stderr
+      value: 0.9943208055590853
+    - type: f1
+      value: 83.4902056490062
+    - type: f1_stderr
+      value: 0.7626189310074184
+    - type: main_score
+      value: 86.09616677874916
+    task:
+      type: Classification
+  - dataset:
+      config: en
+      name: MTEB MassiveScenarioClassification (en)
+      revision: 7d571f92784cd94a019292a1f45445077d0ef634
+      split: test
+      type: mteb/amazon_massive_scenario
+    metrics:
+    - type: accuracy
+      value: 92.17215870880968
+    - type: accuracy_stderr
+      value: 0.25949941333658166
+    - type: f1
+      value: 91.36757392422702
+    - type: f1_stderr
+      value: 0.29139507298154815
+    - type: main_score
+      value: 92.17215870880968
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB MedrxivClusteringP2P
+      revision: e7a26af6f3ae46b30dde8737f02c07b1505bcc73
+      split: test
+      type: mteb/medrxiv-clustering-p2p
+    metrics:
+    - type: main_score
+      value: 46.09497344077905
+    - type: v_measure
+      value: 46.09497344077905
+    - type: v_measure_std
+      value: 1.44871520869784
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB MedrxivClusteringS2S
+      revision: 35191c8c0dca72d8ff3efcd72aa802307d469663
+      split: test
+      type: mteb/medrxiv-clustering-s2s
+    metrics:
+    - type: main_score
+      value: 44.861049989560684
+    - type: v_measure
+      value: 44.861049989560684
+    - type: v_measure_std
+      value: 1.432199293162203
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB MindSmallReranking
+      revision: 3bdac13927fdc888b903db93b2ffdbd90b295a69
+      split: test
+      type: mteb/mind_small
+    metrics:
+    - type: map
+      value: 31.75936162919999
+    - type: mrr
+      value: 32.966812736541236
+    - type: main_score
+      value: 31.75936162919999
+    task:
+      type: Reranking
+  - dataset:
+      config: default
+      name: MTEB NFCorpus
+      revision: ec0fa4fe99da2ff19ca1214b7966684033a58814
+      split: test
+      type: mteb/nfcorpus
+    metrics:
+    - type: map_at_1
+      value: 7.893999999999999
+    - type: map_at_10
+      value: 17.95
+    - type: map_at_100
+      value: 23.474
+    - type: map_at_1000
+      value: 25.412000000000003
+    - type: map_at_3
+      value: 12.884
+    - type: map_at_5
+      value: 15.171000000000001
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 55.728
+    - type: ndcg_at_10
+      value: 45.174
+    - type: ndcg_at_100
+      value: 42.18
+    - type: ndcg_at_1000
+      value: 50.793
+    - type: ndcg_at_3
+      value: 50.322
+    - type: ndcg_at_5
+      value: 48.244
+    - type: precision_at_1
+      value: 57.276
+    - type: precision_at_10
+      value: 33.437
+    - type: precision_at_100
+      value: 10.671999999999999
+    - type: precision_at_1000
+      value: 2.407
+    - type: precision_at_3
+      value: 46.646
+    - type: precision_at_5
+      value: 41.672
+    - type: recall_at_1
+      value: 7.893999999999999
+    - type: recall_at_10
+      value: 22.831000000000003
+    - type: recall_at_100
+      value: 43.818
+    - type: recall_at_1000
+      value: 75.009
+    - type: recall_at_3
+      value: 14.371
+    - type: recall_at_5
+      value: 17.752000000000002
+    - type: main_score
+      value: 45.174
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB NQ
+      revision: b774495ed302d8c44a3a7ea25c90dbce03968f31
+      split: test
+      type: mteb/nq
+    metrics:
+    - type: map_at_1
+      value: 49.351
+    - type: map_at_10
+      value: 66.682
+    - type: map_at_100
+      value: 67.179
+    - type: map_at_1000
+      value: 67.18499999999999
+    - type: map_at_3
+      value: 62.958999999999996
+    - type: map_at_5
+      value: 65.364
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 55.417
+    - type: ndcg_at_10
+      value: 73.568
+    - type: ndcg_at_100
+      value: 75.35
+    - type: ndcg_at_1000
+      value: 75.478
+    - type: ndcg_at_3
+      value: 67.201
+    - type: ndcg_at_5
+      value: 70.896
+    - type: precision_at_1
+      value: 55.417
+    - type: precision_at_10
+      value: 11.036999999999999
+    - type: precision_at_100
+      value: 1.204
+    - type: precision_at_1000
+      value: 0.121
+    - type: precision_at_3
+      value: 29.654000000000003
+    - type: precision_at_5
+      value: 20.006
+    - type: recall_at_1
+      value: 49.351
+    - type: recall_at_10
+      value: 91.667
+    - type: recall_at_100
+      value: 98.89
+    - type: recall_at_1000
+      value: 99.812
+    - type: recall_at_3
+      value: 75.715
+    - type: recall_at_5
+      value: 84.072
+    - type: main_score
+      value: 73.568
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB QuoraRetrieval
+      revision: e4e08e0b7dbe3c8700f0daef558ff32256715259
+      split: test
+      type: mteb/quora
+    metrics:
+    - type: map_at_1
+      value: 71.358
+    - type: map_at_10
+      value: 85.474
+    - type: map_at_100
+      value: 86.101
+    - type: map_at_1000
+      value: 86.114
+    - type: map_at_3
+      value: 82.562
+    - type: map_at_5
+      value: 84.396
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 82.12
+    - type: ndcg_at_10
+      value: 89.035
+    - type: ndcg_at_100
+      value: 90.17399999999999
+    - type: ndcg_at_1000
+      value: 90.243
+    - type: ndcg_at_3
+      value: 86.32300000000001
+    - type: ndcg_at_5
+      value: 87.85
+    - type: precision_at_1
+      value: 82.12
+    - type: precision_at_10
+      value: 13.55
+    - type: precision_at_100
+      value: 1.54
+    - type: precision_at_1000
+      value: 0.157
+    - type: precision_at_3
+      value: 37.89
+    - type: precision_at_5
+      value: 24.9
+    - type: recall_at_1
+      value: 71.358
+    - type: recall_at_10
+      value: 95.855
+    - type: recall_at_100
+      value: 99.711
+    - type: recall_at_1000
+      value: 99.994
+    - type: recall_at_3
+      value: 88.02
+    - type: recall_at_5
+      value: 92.378
+    - type: main_score
+      value: 89.035
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB RedditClustering
+      revision: 24640382cdbf8abc73003fb0fa6d111a705499eb
+      split: test
+      type: mteb/reddit-clustering
+    metrics:
+    - type: main_score
+      value: 71.0984522742521
+    - type: v_measure
+      value: 71.0984522742521
+    - type: v_measure_std
+      value: 3.5668139917058044
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB RedditClusteringP2P
+      revision: 385e3cb46b4cfa89021f56c4380204149d0efe33
+      split: test
+      type: mteb/reddit-clustering-p2p
+    metrics:
+    - type: main_score
+      value: 74.94499641904133
+    - type: v_measure
+      value: 74.94499641904133
+    - type: v_measure_std
+      value: 11.419672879389248
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB SCIDOCS
+      revision: f8c2fcf00f625baaa80f62ec5bd9e1fff3b8ae88
+      split: test
+      type: mteb/scidocs
+    metrics:
+    - type: map_at_1
+      value: 5.343
+    - type: map_at_10
+      value: 13.044
+    - type: map_at_100
+      value: 15.290999999999999
+    - type: map_at_1000
+      value: 15.609
+    - type: map_at_3
+      value: 9.227
+    - type: map_at_5
+      value: 11.158
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 26.3
+    - type: ndcg_at_10
+      value: 21.901
+    - type: ndcg_at_100
+      value: 30.316
+    - type: ndcg_at_1000
+      value: 35.547000000000004
+    - type: ndcg_at_3
+      value: 20.560000000000002
+    - type: ndcg_at_5
+      value: 18.187
+    - type: precision_at_1
+      value: 26.3
+    - type: precision_at_10
+      value: 11.34
+    - type: precision_at_100
+      value: 2.344
+    - type: precision_at_1000
+      value: 0.359
+    - type: precision_at_3
+      value: 18.967
+    - type: precision_at_5
+      value: 15.920000000000002
+    - type: recall_at_1
+      value: 5.343
+    - type: recall_at_10
+      value: 22.997
+    - type: recall_at_100
+      value: 47.562
+    - type: recall_at_1000
+      value: 72.94500000000001
+    - type: recall_at_3
+      value: 11.533
+    - type: recall_at_5
+      value: 16.148
+    - type: main_score
+      value: 21.901
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB SICK-R
+      revision: 20a6d6f312dd54037fe07a32d58e5e168867909d
+      split: test
+      type: mteb/sickr-sts
+    metrics:
+    - type: cosine_pearson
+      value: 87.3054603493591
+    - type: cosine_spearman
+      value: 82.14763206055602
+    - type: manhattan_pearson
+      value: 84.78737790237557
+    - type: manhattan_spearman
+      value: 81.88455356002758
+    - type: euclidean_pearson
+      value: 85.00668629311117
+    - type: euclidean_spearman
+      value: 82.14763037860851
+    - type: main_score
+      value: 82.14763206055602
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB STS12
+      revision: a0d554a64d88156834ff5ae9920b964011b16384
+      split: test
+      type: mteb/sts12-sts
+    metrics:
+    - type: cosine_pearson
+      value: 86.6911864687294
+    - type: cosine_spearman
+      value: 77.89286260403269
+    - type: manhattan_pearson
+      value: 82.87240347680857
+    - type: manhattan_spearman
+      value: 78.10055393740326
+    - type: euclidean_pearson
+      value: 82.72282535777123
+    - type: euclidean_spearman
+      value: 77.89256648406325
+    - type: main_score
+      value: 77.89286260403269
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB STS13
+      revision: 7e90230a92c190f1bf69ae9002b8cea547a64cca
+      split: test
+      type: mteb/sts13-sts
+    metrics:
+    - type: cosine_pearson
+      value: 87.7220832598633
+    - type: cosine_spearman
+      value: 88.30238972017452
+    - type: manhattan_pearson
+      value: 87.88214789140248
+    - type: manhattan_spearman
+      value: 88.24770220032391
+    - type: euclidean_pearson
+      value: 87.98610386257103
+    - type: euclidean_spearman
+      value: 88.30238972017452
+    - type: main_score
+      value: 88.30238972017452
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB STS14
+      revision: 6031580fec1f6af667f0bd2da0a551cf4f0b2375
+      split: test
+      type: mteb/sts14-sts
+    metrics:
+    - type: cosine_pearson
+      value: 85.70614623247714
+    - type: cosine_spearman
+      value: 84.29920990970672
+    - type: manhattan_pearson
+      value: 84.9836190531721
+    - type: manhattan_spearman
+      value: 84.40933470597638
+    - type: euclidean_pearson
+      value: 84.96652336693347
+    - type: euclidean_spearman
+      value: 84.29920989531965
+    - type: main_score
+      value: 84.29920990970672
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB STS15
+      revision: ae752c7c21bf194d8b67fd573edf7ae58183cbe3
+      split: test
+      type: mteb/sts15-sts
+    metrics:
+    - type: cosine_pearson
+      value: 88.4169972425264
+    - type: cosine_spearman
+      value: 89.03555007807218
+    - type: manhattan_pearson
+      value: 88.83068699455478
+    - type: manhattan_spearman
+      value: 89.21877175674125
+    - type: euclidean_pearson
+      value: 88.7251052947544
+    - type: euclidean_spearman
+      value: 89.03557389893083
+    - type: main_score
+      value: 89.03555007807218
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB STS16
+      revision: 4d8694f8f0e0100860b497b999b3dbed754a0513
+      split: test
+      type: mteb/sts16-sts
+    metrics:
+    - type: cosine_pearson
+      value: 85.63830579034632
+    - type: cosine_spearman
+      value: 86.77353371581373
+    - type: manhattan_pearson
+      value: 86.24830492396637
+    - type: manhattan_spearman
+      value: 86.96754348626189
+    - type: euclidean_pearson
+      value: 86.09837038778359
+    - type: euclidean_spearman
+      value: 86.77353371581373
+    - type: main_score
+      value: 86.77353371581373
+    task:
+      type: STS
+  - dataset:
+      config: en-en
+      name: MTEB STS17 (en-en)
+      revision: af5e6fb845001ecf41f4c1e033ce921939a2a68d
+      split: test
+      type: mteb/sts17-crosslingual-sts
+    metrics:
+    - type: cosine_pearson
+      value: 91.2204675588959
+    - type: cosine_spearman
+      value: 90.66976712249057
+    - type: manhattan_pearson
+      value: 91.11007808242346
+    - type: manhattan_spearman
+      value: 90.51739232964488
+    - type: euclidean_pearson
+      value: 91.19588941007903
+    - type: euclidean_spearman
+      value: 90.66976712249057
+    - type: main_score
+      value: 90.66976712249057
+    task:
+      type: STS
+  - dataset:
+      config: en
+      name: MTEB STS22 (en)
+      revision: eea2b4fe26a775864c896887d910b76a8098ad3f
+      split: test
+      type: mteb/sts22-crosslingual-sts
+    metrics:
+    - type: cosine_pearson
+      value: 69.34416749707114
+    - type: cosine_spearman
+      value: 68.11632448161046
+    - type: manhattan_pearson
+      value: 68.99243488935281
+    - type: manhattan_spearman
+      value: 67.8398546438258
+    - type: euclidean_pearson
+      value: 69.06376010216088
+    - type: euclidean_spearman
+      value: 68.11632448161046
+    - type: main_score
+      value: 68.11632448161046
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB STSBenchmark
+      revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
+      split: test
+      type: mteb/stsbenchmark-sts
+    metrics:
+    - type: cosine_pearson
+      value: 88.10309739429758
+    - type: cosine_spearman
+      value: 88.40520383147418
+    - type: manhattan_pearson
+      value: 88.50753383813232
+    - type: manhattan_spearman
+      value: 88.66382629460927
+    - type: euclidean_pearson
+      value: 88.35050664609376
+    - type: euclidean_spearman
+      value: 88.40520383147418
+    - type: main_score
+      value: 88.40520383147418
+    task:
+      type: STS
+  - dataset:
+      config: default
+      name: MTEB SciDocsRR
+      revision: d3c5e1fc0b855ab6097bf1cda04dd73947d7caab
+      split: test
+      type: mteb/scidocs-reranking
+    metrics:
+    - type: map
+      value: 87.58627126942797
+    - type: mrr
+      value: 97.01098103058887
+    - type: main_score
+      value: 87.58627126942797
+    task:
+      type: Reranking
+  - dataset:
+      config: default
+      name: MTEB SciFact
+      revision: 0228b52cf27578f30900b9e5271d331663a030d7
+      split: test
+      type: mteb/scifact
+    metrics:
+    - type: map_at_1
+      value: 62.883
+    - type: map_at_10
+      value: 75.371
+    - type: map_at_100
+      value: 75.66000000000001
+    - type: map_at_1000
+      value: 75.667
+    - type: map_at_3
+      value: 72.741
+    - type: map_at_5
+      value: 74.74
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 66.0
+    - type: ndcg_at_10
+      value: 80.12700000000001
+    - type: ndcg_at_100
+      value: 81.291
+    - type: ndcg_at_1000
+      value: 81.464
+    - type: ndcg_at_3
+      value: 76.19
+    - type: ndcg_at_5
+      value: 78.827
+    - type: precision_at_1
+      value: 66.0
+    - type: precision_at_10
+      value: 10.567
+    - type: precision_at_100
+      value: 1.117
+    - type: precision_at_1000
+      value: 0.11299999999999999
+    - type: precision_at_3
+      value: 30.333
+    - type: precision_at_5
+      value: 20.133000000000003
+    - type: recall_at_1
+      value: 62.883
+    - type: recall_at_10
+      value: 93.556
+    - type: recall_at_100
+      value: 98.667
+    - type: recall_at_1000
+      value: 100.0
+    - type: recall_at_3
+      value: 83.322
+    - type: recall_at_5
+      value: 89.756
+    - type: main_score
+      value: 80.12700000000001
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB SprintDuplicateQuestions
+      revision: d66bd1f72af766a5cc4b0ca5e00c162f89e8cc46
+      split: test
+      type: mteb/sprintduplicatequestions-pairclassification
+    metrics:
+    - type: cos_sim_accuracy
+      value: 99.87524752475248
+    - type: cos_sim_accuracy_threshold
+      value: 74.86587762832642
+    - type: cos_sim_ap
+      value: 97.02222446606328
+    - type: cos_sim_f1
+      value: 93.66197183098592
+    - type: cos_sim_f1_threshold
+      value: 74.74223375320435
+    - type: cos_sim_precision
+      value: 94.23076923076923
+    - type: cos_sim_recall
+      value: 93.10000000000001
+    - type: dot_accuracy
+      value: 99.87524752475248
+    - type: dot_accuracy_threshold
+      value: 74.86587762832642
+    - type: dot_ap
+      value: 97.02222688043362
+    - type: dot_f1
+      value: 93.66197183098592
+    - type: dot_f1_threshold
+      value: 74.74223375320435
+    - type: dot_precision
+      value: 94.23076923076923
+    - type: dot_recall
+      value: 93.10000000000001
+    - type: euclidean_accuracy
+      value: 99.87524752475248
+    - type: euclidean_accuracy_threshold
+      value: 70.9000825881958
+    - type: euclidean_ap
+      value: 97.02222446606329
+    - type: euclidean_f1
+      value: 93.66197183098592
+    - type: euclidean_f1_threshold
+      value: 71.07426524162292
+    - type: euclidean_precision
+      value: 94.23076923076923
+    - type: euclidean_recall
+      value: 93.10000000000001
+    - type: manhattan_accuracy
+      value: 99.87623762376238
+    - type: manhattan_accuracy_threshold
+      value: 3588.5040283203125
+    - type: manhattan_ap
+      value: 97.09194643777883
+    - type: manhattan_f1
+      value: 93.7375745526839
+    - type: manhattan_f1_threshold
+      value: 3664.3760681152344
+    - type: manhattan_precision
+      value: 93.18181818181817
+    - type: manhattan_recall
+      value: 94.3
+    - type: max_accuracy
+      value: 99.87623762376238
+    - type: max_ap
+      value: 97.09194643777883
+    - type: max_f1
+      value: 93.7375745526839
+    task:
+      type: PairClassification
+  - dataset:
+      config: default
+      name: MTEB StackExchangeClustering
+      revision: 6cbc1f7b2bc0622f2e39d2c77fa502909748c259
+      split: test
+      type: mteb/stackexchange-clustering
+    metrics:
+    - type: main_score
+      value: 82.10134099988541
+    - type: v_measure
+      value: 82.10134099988541
+    - type: v_measure_std
+      value: 2.7926349897769533
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB StackExchangeClusteringP2P
+      revision: 815ca46b2622cec33ccafc3735d572c266efdb44
+      split: test
+      type: mteb/stackexchange-clustering-p2p
+    metrics:
+    - type: main_score
+      value: 48.357450742397404
+    - type: v_measure
+      value: 48.357450742397404
+    - type: v_measure_std
+      value: 1.520118876440547
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB StackOverflowDupQuestions
+      revision: e185fbe320c72810689fc5848eb6114e1ef5ec69
+      split: test
+      type: mteb/stackoverflowdupquestions-reranking
+    metrics:
+    - type: map
+      value: 55.79277200802986
+    - type: mrr
+      value: 56.742517082590616
+    - type: main_score
+      value: 55.79277200802986
+    task:
+      type: Reranking
+  - dataset:
+      config: default
+      name: MTEB SummEval
+      revision: cda12ad7615edc362dbf25a00fdd61d3b1eaf93c
+      split: test
+      type: mteb/summeval
+    metrics:
+    - type: cosine_spearman
+      value: 30.701215774712693
+    - type: cosine_pearson
+      value: 31.26740037278488
+    - type: dot_spearman
+      value: 30.701215774712693
+    - type: dot_pearson
+      value: 31.267404144879997
+    - type: main_score
+      value: 30.701215774712693
+    task:
+      type: Summarization
+  - dataset:
+      config: default
+      name: MTEB TRECCOVID
+      revision: bb9466bac8153a0349341eb1b22e06409e78ef4e
+      split: test
+      type: mteb/trec-covid
+    metrics:
+    - type: map_at_1
+      value: 0.23800000000000002
+    - type: map_at_10
+      value: 2.31
+    - type: map_at_100
+      value: 15.495000000000001
+    - type: map_at_1000
+      value: 38.829
+    - type: map_at_3
+      value: 0.72
+    - type: map_at_5
+      value: 1.185
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 91.0
+    - type: ndcg_at_10
+      value: 88.442
+    - type: ndcg_at_100
+      value: 71.39
+    - type: ndcg_at_1000
+      value: 64.153
+    - type: ndcg_at_3
+      value: 89.877
+    - type: ndcg_at_5
+      value: 89.562
+    - type: precision_at_1
+      value: 92.0
+    - type: precision_at_10
+      value: 92.60000000000001
+    - type: precision_at_100
+      value: 73.74000000000001
+    - type: precision_at_1000
+      value: 28.222
+    - type: precision_at_3
+      value: 94.0
+    - type: precision_at_5
+      value: 93.60000000000001
+    - type: recall_at_1
+      value: 0.23800000000000002
+    - type: recall_at_10
+      value: 2.428
+    - type: recall_at_100
+      value: 18.099999999999998
+    - type: recall_at_1000
+      value: 60.79599999999999
+    - type: recall_at_3
+      value: 0.749
+    - type: recall_at_5
+      value: 1.238
+    - type: main_score
+      value: 88.442
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB Touche2020
+      revision: a34f9a33db75fa0cbb21bb5cfc3dae8dc8bec93f
+      split: test
+      type: mteb/touche2020
+    metrics:
+    - type: map_at_1
+      value: 3.4939999999999998
+    - type: map_at_10
+      value: 12.531999999999998
+    - type: map_at_100
+      value: 19.147
+    - type: map_at_1000
+      value: 20.861
+    - type: map_at_3
+      value: 7.558
+    - type: map_at_5
+      value: 9.49
+    - type: mrr_at_1
+      value: 0.0
+    - type: mrr_at_10
+      value: 0.0
+    - type: mrr_at_100
+      value: 0.0
+    - type: mrr_at_1000
+      value: 0.0
+    - type: mrr_at_3
+      value: 0.0
+    - type: mrr_at_5
+      value: 0.0
+    - type: ndcg_at_1
+      value: 47.959
+    - type: ndcg_at_10
+      value: 31.781
+    - type: ndcg_at_100
+      value: 42.131
+    - type: ndcg_at_1000
+      value: 53.493
+    - type: ndcg_at_3
+      value: 39.204
+    - type: ndcg_at_5
+      value: 34.635
+    - type: precision_at_1
+      value: 48.980000000000004
+    - type: precision_at_10
+      value: 27.143
+    - type: precision_at_100
+      value: 8.224
+    - type: precision_at_1000
+      value: 1.584
+    - type: precision_at_3
+      value: 38.775999999999996
+    - type: precision_at_5
+      value: 33.061
+    - type: recall_at_1
+      value: 3.4939999999999998
+    - type: recall_at_10
+      value: 18.895
+    - type: recall_at_100
+      value: 50.192
+    - type: recall_at_1000
+      value: 85.167
+    - type: recall_at_3
+      value: 8.703
+    - type: recall_at_5
+      value: 11.824
+    - type: main_score
+      value: 31.781
+    task:
+      type: Retrieval
+  - dataset:
+      config: default
+      name: MTEB ToxicConversationsClassification
+      revision: edfaf9da55d3dd50d43143d90c1ac476895ae6de
+      split: test
+      type: mteb/toxic_conversations_50k
+    metrics:
+    - type: accuracy
+      value: 92.7402
+    - type: accuracy_stderr
+      value: 1.020764595781027
+    - type: ap
+      value: 44.38594756333084
+    - type: ap_stderr
+      value: 1.817150701258273
+    - type: f1
+      value: 79.95699280019547
+    - type: f1_stderr
+      value: 1.334582498702029
+    - type: main_score
+      value: 92.7402
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB TweetSentimentExtractionClassification
+      revision: d604517c81ca91fe16a244d1248fc021f9ecee7a
+      split: test
+      type: mteb/tweet_sentiment_extraction
+    metrics:
+    - type: accuracy
+      value: 80.86870401810978
+    - type: accuracy_stderr
+      value: 0.22688467782004712
+    - type: f1
+      value: 81.1829040745744
+    - type: f1_stderr
+      value: 0.19774920574849694
+    - type: main_score
+      value: 80.86870401810978
+    task:
+      type: Classification
+  - dataset:
+      config: default
+      name: MTEB TwentyNewsgroupsClustering
+      revision: 6125ec4e24fa026cec8a478383ee943acfbd5449
+      split: test
+      type: mteb/twentynewsgroups-clustering
+    metrics:
+    - type: main_score
+      value: 64.82048869927482
+    - type: v_measure
+      value: 64.82048869927482
+    - type: v_measure_std
+      value: 0.9170394252450564
+    task:
+      type: Clustering
+  - dataset:
+      config: default
+      name: MTEB TwitterSemEval2015
+      revision: 70970daeab8776df92f5ea462b6173c0b46fd2d1
+      split: test
+      type: mteb/twittersemeval2015-pairclassification
+    metrics:
+    - type: cos_sim_accuracy
+      value: 88.44251057996067
+    - type: cos_sim_accuracy_threshold
+      value: 70.2150285243988
+    - type: cos_sim_ap
+      value: 81.11422351199913
+    - type: cos_sim_f1
+      value: 73.71062868615887
+    - type: cos_sim_f1_threshold
+      value: 66.507488489151
+    - type: cos_sim_precision
+      value: 70.2799712849964
+    - type: cos_sim_recall
+      value: 77.4934036939314
+    - type: dot_accuracy
+      value: 88.44251057996067
+    - type: dot_accuracy_threshold
+      value: 70.2150285243988
+    - type: dot_ap
+      value: 81.11420529068658
+    - type: dot_f1
+      value: 73.71062868615887
+    - type: dot_f1_threshold
+      value: 66.50749444961548
+    - type: dot_precision
+      value: 70.2799712849964
+    - type: dot_recall
+      value: 77.4934036939314
+    - type: euclidean_accuracy
+      value: 88.44251057996067
+    - type: euclidean_accuracy_threshold
+      value: 77.18156576156616
+    - type: euclidean_ap
+      value: 81.11422421732487
+    - type: euclidean_f1
+      value: 73.71062868615887
+    - type: euclidean_f1_threshold
+      value: 81.84436559677124
+    - type: euclidean_precision
+      value: 70.2799712849964
+    - type: euclidean_recall
+      value: 77.4934036939314
+    - type: manhattan_accuracy
+      value: 88.26369434344639
+    - type: manhattan_accuracy_threshold
+      value: 3837.067413330078
+    - type: manhattan_ap
+      value: 80.81442360477725
+    - type: manhattan_f1
+      value: 73.39883099117024
+    - type: manhattan_f1_threshold
+      value: 4098.833847045898
+    - type: manhattan_precision
+      value: 69.41896024464832
+    - type: manhattan_recall
+      value: 77.86279683377309
+    - type: max_accuracy
+      value: 88.44251057996067
+    - type: max_ap
+      value: 81.11422421732487
+    - type: max_f1
+      value: 73.71062868615887
+    task:
+      type: PairClassification
+  - dataset:
+      config: default
+      name: MTEB TwitterURLCorpus
+      revision: 8b6510b0b1fa4e4c4f879467980e9be563ec1cdf
+      split: test
+      type: mteb/twitterurlcorpus-pairclassification
+    metrics:
+    - type: cos_sim_accuracy
+      value: 90.03182365040556
+    - type: cos_sim_accuracy_threshold
+      value: 64.46443796157837
+    - type: cos_sim_ap
+      value: 87.86649113691112
+    - type: cos_sim_f1
+      value: 80.45644844577821
+    - type: cos_sim_f1_threshold
+      value: 61.40774488449097
+    - type: cos_sim_precision
+      value: 77.54052702992216
+    - type: cos_sim_recall
+      value: 83.60024638127503
+    - type: dot_accuracy
+      value: 90.03182365040556
+    - type: dot_accuracy_threshold
+      value: 64.46444988250732
+    - type: dot_ap
+      value: 87.86649011954319
+    - type: dot_f1
+      value: 80.45644844577821
+    - type: dot_f1_threshold
+      value: 61.407750844955444
+    - type: dot_precision
+      value: 77.54052702992216
+    - type: dot_recall
+      value: 83.60024638127503
+    - type: euclidean_accuracy
+      value: 90.03182365040556
+    - type: euclidean_accuracy_threshold
+      value: 84.30368900299072
+    - type: euclidean_ap
+      value: 87.86649114275045
+    - type: euclidean_f1
+      value: 80.45644844577821
+    - type: euclidean_f1_threshold
+      value: 87.8547191619873
+    - type: euclidean_precision
+      value: 77.54052702992216
+    - type: euclidean_recall
+      value: 83.60024638127503
+    - type: manhattan_accuracy
+      value: 89.99883572010712
+    - type: manhattan_accuracy_threshold
+      value: 4206.838607788086
+    - type: manhattan_ap
+      value: 87.8600826607838
+    - type: manhattan_f1
+      value: 80.44054508120217
+    - type: manhattan_f1_threshold
+      value: 4372.755432128906
+    - type: manhattan_precision
+      value: 78.08219178082192
+    - type: manhattan_recall
+      value: 82.94579611949491
+    - type: max_accuracy
+      value: 90.03182365040556
+    - type: max_ap
+      value: 87.86649114275045
+    - type: max_f1
+      value: 80.45644844577821
+    task:
+      type: PairClassification
+language:
+- en
+license: cc-by-nc-4.0
+library_name: transformers
+---
+## Introduction
+We present NV-Embed-v2, a generalist embedding model that ranks No. 1 on the Massive Text Embedding Benchmark ([MTEB benchmark](https://huggingface.co/spaces/mteb/leaderboard))(as of Aug 30, 2024) with a score of 72.31 across 56 text embedding tasks. It also holds the No. 1 in the retrieval sub-category (a score of 62.65 across 15 tasks) in the leaderboard, which is essential to the development of RAG technology.
+NV-Embed-v2 presents several new designs, including having the LLM attend to latent vectors for better pooled embedding output, and demonstrating a two-staged instruction tuning method to enhance the accuracy of both retrieval and non-retrieval tasks. Additionally, NV-Embed-v2 incorporates a novel hard-negative mining methods that take into account the positive relevance score for better false negatives removal.
+For more technical details, refer to our paper: [NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models](https://arxiv.org/pdf/2405.17428).
+## Model Details
+- Base Decoder-only LLM: [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1)
+- Pooling Type: Latent-Attention
+- Embedding Dimension: 4096
+## How to use
+Here is an example of how to encode queries and passages using Huggingface-transformer and Sentence-transformer. Please find the required package version [here](https://huggingface.co/nvidia/NV-Embed-v2#2-required-packages).
+### Usage (HuggingFace Transformers)
+```python
+import torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer, AutoModel
+# Each query needs to be accompanied by an corresponding instruction describing the task.
+task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
+query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
+queries = [
+    'are judo throws allowed in wrestling?',
+    'how to become a radiology technician in michigan?'
+    ]
+# No instruction needed for retrieval passages
+passage_prefix = ""
+passages = [
+    "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
+    "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
+]
+# load model with tokenizer
+model = AutoModel.from_pretrained('nvidia/NV-Embed-v2', trust_remote_code=True)
+# get the embeddings
+max_length = 32768
+query_embeddings = model.encode(queries, instruction=query_prefix, max_length=max_length)
+passage_embeddings = model.encode(passages, instruction=passage_prefix, max_length=max_length)
+# normalize embeddings
+query_embeddings = F.normalize(query_embeddings, p=2, dim=1)
+passage_embeddings = F.normalize(passage_embeddings, p=2, dim=1)
+# get the embeddings with DataLoader (spliting the datasets into multiple mini-batches)
+# batch_size=2
+# query_embeddings = model._do_encode(queries, batch_size=batch_size, instruction=query_prefix, max_length=max_length, num_workers=32, return_numpy=True)
+# passage_embeddings = model._do_encode(passages, batch_size=batch_size, instruction=passage_prefix, max_length=max_length, num_workers=32, return_numpy=True)
+scores = (query_embeddings @ passage_embeddings.T) * 100
+print(scores.tolist())
+# [[87.42693328857422, 0.46283677220344543], [0.965264618396759, 86.03721618652344]]
+```
+### Usage (Sentence-Transformers)
+```python
+import torch
+from sentence_transformers import SentenceTransformer
+# Each query needs to be accompanied by an corresponding instruction describing the task.
+task_name_to_instruct = {"example": "Given a question, retrieve passages that answer the question",}
+query_prefix = "Instruct: "+task_name_to_instruct["example"]+"\nQuery: "
+queries = [
+    'are judo throws allowed in wrestling?',
+    'how to become a radiology technician in michigan?'
+    ]
+# No instruction needed for retrieval passages
+passages = [
+    "Since you're reading this, you are probably someone from a judo background or someone who is just wondering how judo techniques can be applied under wrestling rules. So without further ado, let's get to the question. Are Judo throws allowed in wrestling? Yes, judo throws are allowed in freestyle and folkstyle wrestling. You only need to be careful to follow the slam rules when executing judo throws. In wrestling, a slam is lifting and returning an opponent to the mat with unnecessary force.",
+    "Below are the basic steps to becoming a radiologic technologist in Michigan:Earn a high school diploma. As with most careers in health care, a high school education is the first step to finding entry-level employment. Taking classes in math and science, such as anatomy, biology, chemistry, physiology, and physics, can help prepare students for their college studies and future careers.Earn an associate degree. Entry-level radiologic positions typically require at least an Associate of Applied Science. Before enrolling in one of these degree programs, students should make sure it has been properly accredited by the Joint Review Committee on Education in Radiologic Technology (JRCERT).Get licensed or certified in the state of Michigan."
+]
+# load model with tokenizer
+model = SentenceTransformer('nvidia/NV-Embed-v2', trust_remote_code=True)
+model.max_seq_length = 32768
+model.tokenizer.padding_side="right"
+def add_eos(input_examples):
+  input_examples = [input_example + model.tokenizer.eos_token for input_example in input_examples]
+  return input_examples
+# get the embeddings
+batch_size = 2
+query_embeddings = model.encode(add_eos(queries), batch_size=batch_size, prompt=query_prefix, normalize_embeddings=True)
+passage_embeddings = model.encode(add_eos(passages), batch_size=batch_size, normalize_embeddings=True)
+scores = (query_embeddings @ passage_embeddings.T) * 100
+print(scores.tolist())
+```
+## License
+This model should not be used for any commercial purpose. Refer the [license](https://spdx.org/licenses/CC-BY-NC-4.0) for the detailed terms.
+For commercial purpose, we recommend you to use the models of [NeMo Retriever Microservices (NIMs)](https://build.nvidia.com/explore/retrieval).
+## Correspondence to
+Chankyu Lee (chankyul@nvidia.com), Rajarshi Roy (rajarshir@nvidia.com), Wei Ping (wping@nvidia.com)
+## Citation
+If you find this code useful in your research, please consider citing:
+```bibtex
+@article{lee2024nv,
+  title={NV-Embed: Improved Techniques for Training LLMs as Generalist Embedding Models},
+  author={Lee, Chankyu and Roy, Rajarshi and Xu, Mengyao and Raiman, Jonathan and Shoeybi, Mohammad and Catanzaro, Bryan and Ping, Wei},
+  journal={arXiv preprint arXiv:2405.17428},
+  year={2024}
+}
+```
+```bibtex
+@article{moreira2024nv,
+  title={NV-Retriever: Improving text embedding models with effective hard-negative mining},
+  author={Moreira, Gabriel de Souza P and Osmulski, Radek and Xu, Mengyao and Ak, Ronay and Schifferer, Benedikt and Oldridge, Even},
+  journal={arXiv preprint arXiv:2407.15831},
+  year={2024}
+}
+```
+## Troubleshooting
+#### 1. Instruction template for MTEB benchmarks
+For MTEB sub-tasks for retrieval, STS, summarization, please use the instruction prefix template in [instructions.json](https://huggingface.co/nvidia/NV-Embed-v2/blob/main/instructions.json). For classification, clustering and reranking, please use the instructions provided in Table. 7 in [NV-Embed paper](https://arxiv.org/pdf/2405.17428).
+#### 2. Required Packages
+If you have trouble, try installing the python packages as below
+```python
+pip uninstall -y transformer-engine
+pip install torch==2.2.0
+pip install transformers==4.42.4
+pip install flash-attn==2.2.0
+pip install sentence-transformers==2.7.0
+```
+#### 3. How to enable Multi-GPU (Note, this is the case for HuggingFace Transformers)
+```python
+from transformers import AutoModel
+from torch.nn import DataParallel
+embedding_model = AutoModel.from_pretrained("nvidia/NV-Embed-v2")
+for module_key, module in embedding_model._modules.items():
+    embedding_model._modules[module_key] = DataParallel(module)
+```
+#### 4. Fixing "nvidia/NV-Embed-v2 is not the path to a directory containing a file named config.json"
+Switch to your local model path，and open config.json and change the value of **"_name_or_path"** and replace it with your local model path.
+#### 5. Access to model nvidia/NV-Embed-v2 is restricted. You must be authenticated to access it
+Use your huggingface access [token](https://huggingface.co/settings/tokens) to execute *"huggingface-cli login"*.
+#### 6. How to resolve slight mismatch in Sentence transformer results.
+A slight mismatch in the Sentence Transformer implementation is caused by a discrepancy in the calculation of the instruction prefix length within the Sentence Transformer package.
+To fix this issue, you need to build the Sentence Transformer package from source, making the necessary modification in this [line](https://github.com/UKPLab/sentence-transformers/blob/v2.7-release/sentence_transformers/SentenceTransformer.py#L353) as below.
+```python
+git clone https://github.com/UKPLab/sentence-transformers.git
+cd sentence-transformers
+git checkout v2.7-release
+# Modify L353 in SentenceTransformer.py to **'extra_features["prompt_length"] = tokenized_prompt["input_ids"].shape[-1]'**.
+pip install -e .
+```

config.json ADDED Viewed

	@@ -0,0 +1,101 @@

+{
+  "_name_or_path": "nvidia/NV-Embed-v2",
+  "add_eos": true,
+  "add_pad_token": true,
+  "architectures": [
+    "NVEmbedModel"
+  ],
+  "auto_map": {
+    "AutoConfig": "configuration_nvembed.NVEmbedConfig",
+    "AutoModel": "modeling_nvembed.NVEmbedModel"
+  },
+  "hidden_size": 4096,
+  "is_mask_instruction": true,
+  "latent_attention_config": {
+    "model_type": "latent_attention"
+  },
+  "mask_type": "b",
+  "model_type": "nvembed",
+  "padding_side": "right",
+  "text_config": {
+    "_name_or_path": "nvidia/NV-Embed-v2",
+    "add_cross_attention": false,
+    "architectures": [
+      "MistralModel"
+    ],
+    "attention_dropout": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": 1,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "early_stopping": false,
+    "encoder_no_repeat_ngram_size": 0,
+    "eos_token_id": 2,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "silu",
+    "hidden_size": 4096,
+    "id2label": {
+      "0": "LABEL_0",
+      "1": "LABEL_1"
+    },
+    "initializer_range": 0.02,
+    "intermediate_size": 14336,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "LABEL_0": 0,
+      "LABEL_1": 1
+    },
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "max_position_embeddings": 32768,
+    "min_length": 0,
+    "model_type": "bidir_mistral",
+    "no_repeat_ngram_size": 0,
+    "num_attention_heads": 32,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_hidden_layers": 32,
+    "num_key_value_heads": 8,
+    "num_return_sequences": 1,
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "rms_norm_eps": 1e-05,
+    "rope_theta": 10000.0,
+    "sep_token_id": null,
+    "sliding_window": 4096,
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": false,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "typical_p": 1.0,
+    "use_bfloat16": false,
+    "use_cache": true,
+    "vocab_size": 32000
+  },
+  "torch_dtype": "float16",
+  "transformers_version": "4.42.4"
+}

config_sentence_transformers.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "__version__": {
+    "sentence_transformers": "4.2.0",
+    "transformers": "4.47.0",
+    "pytorch": "2.5.1+cu12"
+  },
+  "prompts": {
+    "Banking77Classification": "Instruct: Given a question, please describe the intent of this question. \n Question: ",
+    "MTOPIntentClassification": "Instruct: Given a question, please describe the intent of this question. \n Question: ",
+    "TweetSentimentClassification": "Classify the sentiment of a given tweet as either positive, negative, or neutral.",
+    "BiorxivClusteringP2P.v2": "Identify the main category of Biorxiv papers based on the titles and abstracts",
+    "BiorxivClusteringS2S.v2": "Identify the main category of Biorxiv papers based on the titles",
+    "TwentyNewsgroupsClustering": "Identify the topic or theme of the given news articles",
+    "FiQA2018": {
+      "query": "Given a financial question, retrieve relevant passages that answer the query"
+    },
+    "SciFact": {
+      "query": "Given a scientific claim, retrieve documents that support or refute the claim"
+    },
+    "NFCorpus": {
+      "query": "Given a question, retrieve relevant documents that answer the question"
+    }
+  },
+  "default_prompt_name": null,
+  "model_type": "SparseEncoder",
+  "similarity_fn_name": "dot"
+}

configuration_nvembed.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from typing import Literal
+from transformers import AutoConfig
+from transformers.configuration_utils import PretrainedConfig
+from transformers.models.auto import CONFIG_MAPPING
+from transformers.models.mistral import MistralConfig
+NVEMBED_TYPE = "nvembed"
+LATENT_ATTENTION_TYPE = "latent_attention"
+BIDIR_MISTRAL_TYPE = "bidir_mistral"
+class NVEmbedConfig(PretrainedConfig):
+    model_type = "nvembed"
+    is_composition = False
+    def __init__(
+        self,
+        latent_attention_config=None,
+        text_config=None,
+        padding_side: Literal["right", "left"]="right",
+        add_pad_token: bool=True,
+        is_mask_instruction: bool = True,
+        add_eos: bool=True,
+        mask_type: str="b",
+        **kwargs,
+    ):
+        if isinstance(latent_attention_config, dict):
+            latent_attention_config["model_type"] = (
+                latent_attention_config["model_type"] if "model_type" in latent_attention_config else LATENT_ATTENTION_TYPE
+            )
+            latent_attention_config = CONFIG_MAPPING[latent_attention_config["model_type"]](**latent_attention_config)
+        elif latent_attention_config is None:
+            latent_attention_config = CONFIG_MAPPING[LATENT_ATTENTION_TYPE]()
+        self.latent_attention_config = latent_attention_config
+        if isinstance(text_config, dict):
+            text_config["model_type"] = text_config["model_type"] if "model_type" in text_config else "llama"
+            text_config = CONFIG_MAPPING[text_config["model_type"]](**text_config)
+        elif text_config is None:
+            text_config = None
+        self.text_config = text_config
+        self.padding_side = padding_side
+        self.is_mask_instruction = is_mask_instruction
+        self.add_pad_token = add_pad_token
+        self.add_eos = add_eos
+        self.mask_type = mask_type
+        if "hidden_size" in kwargs:
+            self.hidden_size = kwargs["hidden_size"]
+        else:
+            self.hidden_size = 4096
+        super().__init__(**kwargs)
+class LatentAttentionConfig(PretrainedConfig):
+    model_type = LATENT_ATTENTION_TYPE
+    is_composition = False
+    _name_or_path = "latent_attention"
+    def __init__(
+        self,
+        num_latents_value: int=512,
+        num_cross_heads: int=8,
+        output_normalize: bool=True,
+        hidden_dim: int=4096,
+        latent_dim: int=4096,
+        cross_dim_head: int=4096,
+        **kwargs,
+    ):
+        self.num_latents_value = num_latents_value
+        self.num_cross_heads = num_cross_heads
+        self.output_normalize = output_normalize
+        self.hidden_dim = hidden_dim
+        self.latent_dim = latent_dim
+        self.cross_dim_head = cross_dim_head
+        super().__init__(**kwargs)
+class BidirectionalMistralConfig(MistralConfig):
+    model_type = BIDIR_MISTRAL_TYPE
+    keys_to_ignore_at_inference = ["past_key_values"]
+AutoConfig.register(NVEMBED_TYPE, NVEmbedConfig)
+AutoConfig.register(LATENT_ATTENTION_TYPE, LatentAttentionConfig)
+AutoConfig.register(BIDIR_MISTRAL_TYPE, BidirectionalMistralConfig)
+NVEmbedConfig.register_for_auto_class()
+LatentAttentionConfig.register_for_auto_class()
+BidirectionalMistralConfig.register_for_auto_class()

instructions.json ADDED Viewed

	@@ -0,0 +1,99 @@

+{
+    "ClimateFEVER":
+            {
+                "query": "Given a claim about climate change, retrieve documents that support or refute the claim",
+                "corpus": ""
+            },
+    "HotpotQA":
+        {
+            "query": "Given a multi-hop question, retrieve documents that can help answer the question",
+            "corpus": ""
+        },
+    "FEVER":
+        {
+            "query": "Given a claim, retrieve documents that support or refute the claim",
+            "corpus": ""
+        },
+    "MSMARCO":
+        {
+            "query": "Given a web search query, retrieve relevant passages that answer the query",
+            "corpus": ""
+        },
+    "DBPedia":
+        {
+            "query": "Given a query, retrieve relevant entity descriptions from DBPedia",
+            "corpus": ""
+        },
+    "NQ":
+        {
+            "query": "Given a question, retrieve passages that answer the question",
+            "corpus": ""
+        },
+    "QuoraRetrieval":
+        {
+            "query": "Given a question, retrieve questions that are semantically equivalent to the given question",
+            "corpus": "Given a question, retrieve questions that are semantically equivalent to the given question"
+        },
+    "SCIDOCS":
+        {
+            "query": "Given a scientific paper title, retrieve paper abstracts that are cited by the given paper",
+            "corpus": ""
+        },
+    "TRECCOVID":
+        {
+            "query": "Given a query on COVID-19, retrieve documents that answer the query",
+            "corpus": ""
+        },
+    "Touche2020":
+        {
+            "query": "Given a question, retrieve passages that answer the question",
+            "corpus": ""
+        },
+    "SciFact":
+        {
+            "query": "Given a scientific claim, retrieve documents that support or refute the claim",
+            "corpus": ""
+        },
+    "NFCorpus":
+        {
+            "query": "Given a question, retrieve relevant documents that answer the question",
+            "corpus": ""
+        },
+    "ArguAna":
+        {
+            "query": "Given a claim, retrieve documents that support or refute the claim",
+            "corpus": ""
+        },
+    "FiQA2018":
+        {
+            "query": "Given a financial question, retrieve relevant passages that answer the query",
+            "corpus": ""
+        },
+    "STS":
+        {
+            "text": "Retrieve semantically similar text"
+        },
+    "SUMM":
+        {
+            "text": "Given a news summary, retrieve other semantically similar summaries"
+        }
+    ,
+    "Banking77Classification": {
+      "text": "Instruct: Given a question, please describe the intent of this question. \n Question: "
+    },
+    "MTOPIntentClassification": {
+      "text": "Instruct: Given a question, please describe the intent of this question. \n Question: "
+    },
+    "TweetSentimentClassification": {
+      "text": "Classify the sentiment of a given tweet as either positive, negative, or neutral."
+    },
+    "BiorxivClusteringP2P.v2": {
+      "text": "Identify the main category of Biorxiv papers based on the titles and abstracts"
+    },
+    "BiorxivClusteringS2S.v2": {
+      "text": "Identify the main category of Biorxiv papers based on the titles"
+    },
+    "TwentyNewsgroupsClustering.v2": {
+      "text": "Identify the topic or theme of the given news articles"
+    }
+}

model-00001-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0ce5651268058d961eaeabd4f65a5cb5d003ac7e0e34b7095658b5d5a4802f6a
+size 4997761248

model-00002-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bbd7e85b57afbc74fab67e50a572590ce57dde8b5fa76fe7527c42189074d57d
+size 4915917048

model-00003-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:87c90f033107075c9531ed8163d4b087ce77e63596c8510821da15a4d892a85c
+size 4999820296

model-00004-of-00004.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44ff251c6b33ed89101915eb82a92575fd7d7daf9db953205f3bb4b982c4c3f5
+size 788571960

model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,311 @@

+{
+  "metadata": {
+    "total_size": 15702032384
+  },
+  "weight_map": {
+    "embedding_model.embed_tokens.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.28.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.input_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.29.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.30.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.30.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.30.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.30.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.30.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.30.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.30.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.30.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.30.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
+    "embedding_model.layers.31.input_layernorm.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.31.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
+    "embedding_model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
+    "embedding_model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
+    "embedding_model.norm.weight": "model-00004-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.fn.to_kv.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.fn.to_out.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.fn.to_q.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm.bias": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm_context.bias": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.0.norm_context.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.0.bias": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.0.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.2.bias": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.fn.net.2.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.norm.bias": "model-00001-of-00004.safetensors",
+    "latent_attention_model.cross_attend_blocks.1.norm.weight": "model-00001-of-00004.safetensors",
+    "latent_attention_model.latents": "model-00001-of-00004.safetensors"
+  }
+}

modeling_nvembed.py ADDED Viewed

	@@ -0,0 +1,441 @@

+from typing import List, Union, Dict, Mapping, Optional, Tuple, TypedDict
+import torch
+import os
+import json
+import numpy as np
+from functools import partial
+from contextlib import nullcontext
+from transformers import AutoModel, PreTrainedTokenizerFast, BatchEncoding, DataCollatorWithPadding
+from transformers.modeling_utils import PreTrainedModel
+from transformers.models.auto import AutoTokenizer
+from transformers.models.mistral.modeling_mistral import MISTRAL_INPUTS_DOCSTRING
+from transformers.modeling_outputs import BaseModelOutputWithPast, BaseModelOutputWithNoAttention
+from transformers.modeling_attn_mask_utils import _prepare_4d_attention_mask, _prepare_4d_attention_mask_for_sdpa
+from transformers import MistralModel, MistralConfig
+from transformers.cache_utils import Cache, DynamicCache
+from transformers.utils import (
+    add_start_docstrings_to_model_forward,
+    logging,
+)
+from einops import rearrange, repeat
+from tqdm.auto import tqdm
+from datasets import Dataset
+from torch.utils.data import DataLoader
+from .configuration_nvembed import NVEmbedConfig, LatentAttentionConfig, BidirectionalMistralConfig
+logger = logging.get_logger(__name__)
+class NVEmbedFeatures(TypedDict):
+    input_dict: torch.Tensor
+    attention_mask: torch.Tensor
+    pool_mask: torch.Tensor
+class BidirectionalMistralModel(MistralModel):
+    config_class = BidirectionalMistralConfig
+    def __init__(self, config: MistralConfig):
+        super().__init__(config)
+        for layer in self.layers:
+            layer.self_attn.is_causal = False
+        self._attn_implementation = "eager"
+    @add_start_docstrings_to_model_forward(MISTRAL_INPUTS_DOCSTRING)
+    def forward(
+        self,
+        input_ids: torch.LongTensor = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        position_ids: Optional[torch.LongTensor] = None,
+        past_key_values: Optional[List[torch.FloatTensor]] = None,
+        inputs_embeds: Optional[torch.FloatTensor] = None,
+        use_cache: Optional[bool] = None,
+        output_attentions: Optional[bool] = None,
+        output_hidden_states: Optional[bool] = None,
+        return_dict: Optional[bool] = None,
+    ) -> Union[Tuple, BaseModelOutputWithPast]:
+        output_attentions = output_attentions if output_attentions is not None else self.config.output_attentions
+        output_hidden_states = (
+            output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
+        )
+        use_cache = use_cache if use_cache is not None else self.config.use_cache
+        return_dict = return_dict if return_dict is not None else self.config.use_return_dict
+        # retrieve input_ids and inputs_embeds
+        if input_ids is not None and inputs_embeds is not None:
+            raise ValueError("You cannot specify both decoder_input_ids and decoder_inputs_embeds at the same time")
+        elif input_ids is not None:
+            batch_size, seq_length = input_ids.shape
+        elif inputs_embeds is not None:
+            batch_size, seq_length, _ = inputs_embeds.shape
+        else:
+            raise ValueError("You have to specify either decoder_input_ids or decoder_inputs_embeds")
+        if self.gradient_checkpointing and self.training:
+            if use_cache:
+                logger.warning_once(
+                    "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
+                )
+                use_cache = False
+        past_key_values_length = 0
+        if use_cache:
+            use_legacy_cache = not isinstance(past_key_values, Cache)
+            if use_legacy_cache:
+                past_key_values = DynamicCache.from_legacy_cache(past_key_values)
+            past_key_values_length = past_key_values.get_usable_length(seq_length)
+        if position_ids is None:
+            device = input_ids.device if input_ids is not None else inputs_embeds.device
+            position_ids = torch.arange(
+                past_key_values_length, seq_length + past_key_values_length, dtype=torch.long, device=device
+            )
+            position_ids = position_ids.unsqueeze(0).view(-1, seq_length)
+        else:
+            position_ids = position_ids.view(-1, seq_length).long()
+        if inputs_embeds is None:
+            inputs_embeds = self.embed_tokens(input_ids)
+        if attention_mask is not None and self._attn_implementation == "flash_attention_2" and use_cache:
+            is_padding_right = attention_mask[:, -1].sum().item() != batch_size
+            if is_padding_right:
+                raise ValueError(
+                    "You are attempting to perform batched generation with padding_side='right'"
+                    " this may lead to unexpected behaviour for Flash Attention version of Mistral. Make sure to "
+                    " call `tokenizer.padding_side  = 'left'` before tokenizing the input. "
+                )
+        if self._attn_implementation == "flash_attention_2":
+            # 2d mask is passed through the layers
+            attention_mask = attention_mask if (attention_mask is not None and 0 in attention_mask) else None
+        elif self._attn_implementation == "sdpa" and not output_attentions:
+            # output_attentions=True can not be supported when using SDPA, and we fall back on
+            # the manual implementation that requires a 4D causal mask in all cases.
+            attention_mask = _prepare_4d_attention_mask_for_sdpa(
+                attention_mask, inputs_embeds.dtype
+            )
+        else:
+            # 4d mask is passed through the layers
+            attention_mask = _prepare_4d_attention_mask(
+                attention_mask, inputs_embeds.dtype,
+            )
+        hidden_states = inputs_embeds
+        # decoder layers
+        all_hidden_states = () if output_hidden_states else None
+        all_self_attns = () if output_attentions else None
+        next_decoder_cache = None
+        for decoder_layer in self.layers:
+            if output_hidden_states:
+                all_hidden_states += (hidden_states,)
+            if self.gradient_checkpointing and self.training:
+                layer_outputs = self._gradient_checkpointing_func(
+                    decoder_layer.__call__,
+                    hidden_states,
+                    attention_mask,
+                    position_ids,
+                    past_key_values,
+                    output_attentions,
+                    use_cache,
+                )
+            else:
+                layer_outputs = decoder_layer(
+                    hidden_states,
+                    attention_mask=attention_mask,
+                    position_ids=position_ids,
+                    past_key_value=past_key_values,
+                    output_attentions=output_attentions,
+                    use_cache=use_cache,
+                )
+            hidden_states = layer_outputs[0]
+            if use_cache:
+                next_decoder_cache = layer_outputs[2 if output_attentions else 1]
+            if output_attentions:
+                all_self_attns += (layer_outputs[1],)
+        hidden_states = self.norm(hidden_states)
+        # add hidden states from the last decoder layer
+        if output_hidden_states:
+            all_hidden_states += (hidden_states,)
+        next_cache = None
+        if use_cache:
+            next_cache = next_decoder_cache.to_legacy_cache() if use_legacy_cache else next_decoder_cache
+        if not return_dict:
+            return tuple(v for v in [hidden_states, next_cache, all_hidden_states, all_self_attns] if v is not None)
+        return BaseModelOutputWithPast(
+            last_hidden_state=hidden_states,
+            past_key_values=next_cache,
+            hidden_states=all_hidden_states,
+            attentions=all_self_attns,
+        )
+def _move_to_device(maybe_tensor, device: torch.device):
+    if torch.is_tensor(maybe_tensor):
+        return maybe_tensor.to(device, non_blocking=device.type == "cuda")
+    elif isinstance(maybe_tensor, dict):
+        return {key: _move_to_device(value, device) for key, value in maybe_tensor.items()}
+    elif isinstance(maybe_tensor, list):
+        return [_move_to_device(x, device) for x in maybe_tensor]
+    elif isinstance(maybe_tensor, tuple):
+        return tuple([_move_to_device(x, device) for x in maybe_tensor])
+    elif isinstance(maybe_tensor, Mapping):
+        return type(maybe_tensor)({k: _move_to_device(v, device) for k, v in maybe_tensor.items()})
+    else:
+        return maybe_tensor
+def move_to_device(sample, device: torch.device):
+    if device.type == "cpu":
+        return sample
+    if len(sample) == 0:
+        return {}
+    return _move_to_device(sample, device)
+def input_transform_func(
+    tokenizer: PreTrainedTokenizerFast,
+    examples: Dict[str, List],
+    always_add_eos: bool,
+    max_length: int,
+    instruction: str,
+) -> BatchEncoding:
+    if always_add_eos:
+        examples['input_texts'] = [instruction + input_example + tokenizer.eos_token for input_example in examples['input_texts']]
+    batch_dict = tokenizer(
+        examples['input_texts'],
+        max_length=max_length,
+        padding=True,
+        return_token_type_ids=False,
+        return_tensors="pt",
+        truncation=True)
+    return batch_dict
+class PreNorm(torch.nn.Module):
+    def __init__(self, dim, fn, context_dim = None):
+        super().__init__()
+        self.fn = fn
+        self.norm = torch.nn.LayerNorm(dim)
+        self.norm_context = torch.nn.LayerNorm(context_dim) if exists(context_dim) else None
+    def forward(self, x, **kwargs):
+        x = self.norm(x)
+        if exists(self.norm_context):
+            context = kwargs['context']
+            normed_context = self.norm_context(context)
+            kwargs.update(context = normed_context)
+        return self.fn(x, **kwargs)
+class GEGLU(torch.nn.Module):
+    def forward(self, x):
+        x, gates = x.chunk(2, dim = -1)
+        return x * torch.nn.functional.gelu(gates)
+class FeedForward(torch.nn.Module):
+    def __init__(self, dim, mult = 4):
+        super().__init__()
+        self.net = torch.nn.Sequential(torch.nn.Linear(dim, dim * mult * 2),
+            GEGLU(),
+            torch.nn.Linear(dim * mult, dim))
+    def forward(self, x):
+        return self.net(x)
+def exists(val):
+    return val is not None
+def default(val, d):
+    return val if exists(val) else d
+class Attention(torch.nn.Module):
+    def __init__(self, query_dim, context_dim = None, heads = 8, dim_head = 64):
+        super().__init__()
+        inner_dim = dim_head * heads
+        context_dim = default(context_dim, query_dim)
+        self.scale = dim_head ** -0.5
+        self.heads = heads
+        self.to_q = torch.nn.Linear(query_dim, inner_dim, bias = False)
+        self.to_kv = torch.nn.Linear(context_dim, inner_dim * 2, bias = False)
+        self.to_out = torch.nn.Linear(inner_dim, query_dim, bias = False)
+    def forward(self, x, context = None, mask = None):
+        h = self.heads
+        q = self.to_q(x)
+        context = default(context, x)
+        k, v = self.to_kv(context).chunk(2, dim = -1)
+        q, k, v = map(lambda t: rearrange(t, 'b n (h d) -> (b h) n d', h = h), (q, k, v))
+        with torch.backends.cuda.sdp_kernel(enable_flash=True, enable_mem_efficient=True):
+            out = torch.nn.functional.scaled_dot_product_attention(q, k, v)
+        out = rearrange(out, '(b h) n d -> b n (h d)', h = h)
+        return self.to_out(out)
+class LatentAttentionModel(PreTrainedModel):
+    config_class = LatentAttentionConfig
+    def __init__(self, config: LatentAttentionConfig):
+        super().__init__(config)
+        ## cross-attention block
+        num_latents, latent_dim, cross_heads, cross_dim_head = config.num_latents_value, config.latent_dim, config.num_cross_heads, config.cross_dim_head
+        dim = config.hidden_dim
+        # init latent_attention and latents
+        self.cross_attend_blocks = torch.nn.ModuleList([
+            PreNorm(latent_dim, Attention(latent_dim, dim, heads = cross_heads, dim_head = cross_dim_head),
+                    context_dim = dim),
+            PreNorm(latent_dim, FeedForward(latent_dim)),
+        ])
+        self.output_normalize = config.output_normalize
+        self.register_parameter("latents", torch.nn.Parameter(torch.randn(num_latents, latent_dim)))
+    def forward(self, hiddens, attention_mask: torch.Tensor=None):
+        ## cross-attention block
+        cross_attn, cross_ff = self.cross_attend_blocks
+        b, *_, device = *hiddens.shape, hiddens.device
+        x = repeat(self.latents, 'n d -> b n d', b = b)
+        hiddens = cross_attn(hiddens, context = x, mask = None) + hiddens
+        hiddens = cross_ff(hiddens) + hiddens
+        if attention_mask !=None:
+            s = torch.sum(hiddens * attention_mask.unsqueeze(-1).float(), dim=1)
+            d = attention_mask.sum(dim=1, keepdim=True).float()
+            hiddens = s / d
+            if self.output_normalize:
+                hiddens = torch.nn.functional.normalize(hiddens, p=2, dim=-1)
+        return hiddens
+class NVEmbedModel(PreTrainedModel):
+    config_class = NVEmbedConfig
+    _no_split_modules = ["MistralDecoderLayer", "LatentAttentionModel"]
+    def __init__(self, config: NVEmbedConfig):
+        super().__init__(config)
+        self.latent_attention_model = AutoModel.from_config(config.latent_attention_config)
+        self.embedding_model = AutoModel.from_config(
+            config.text_config,
+        ) if config.text_config is not None else None
+        self.tokenizer = AutoTokenizer.from_pretrained(config.text_config._name_or_path) if config.text_config is not None else None
+        self.padding_side = config.padding_side
+        self.is_mask_instruction = config.is_mask_instruction
+        self.add_eos = config.add_eos
+        self.mask_type = config.mask_type
+        if config.add_pad_token and self.tokenizer is not None:
+            self.add_pad_token()
+    def add_pad_token(self):
+        self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.tokenizer.padding_side = self.padding_side
+    def prepare_kwargs_from_batch(self, batch_dict: dict, instruction_lens: int, device: torch.device):
+        batch_dict = move_to_device(batch_dict, device)
+        attention_mask = batch_dict['attention_mask'].clone() if 'attention_mask' in batch_dict else None
+        if (attention_mask is not None and
+            self.padding_side == "right" and
+            self.is_mask_instruction == True and
+            instruction_lens > 0):
+            # Mask out the instruction tokens for mean-pooling
+            attention_mask[:, :instruction_lens] = 0
+        features: NVEmbedFeatures = {
+            'input_ids': torch.tensor(batch_dict.get('input_ids').to(batch_dict.get('input_ids')).long()),
+            'attention_mask': batch_dict['attention_mask'],
+            'pool_mask': attention_mask,
+        }
+        return features
+    @torch.no_grad()
+    def _do_encode(self,
+        prompts: List[str],
+        batch_size: int=1,
+        instruction: str="",
+        max_length: int=4096,
+        num_workers: int=32,
+        **kwargs
+    ) -> Union[np.ndarray, torch.FloatTensor]:
+        dataset: Dataset = Dataset.from_dict({'input_texts': prompts})
+        dataset.set_transform(partial(input_transform_func,
+                                      self.tokenizer,
+                                      always_add_eos=True,
+                                      max_length=max_length,
+                                      instruction=instruction))
+        data_collator = DataCollatorWithPadding(self.tokenizer)
+        data_loader = DataLoader(
+            dataset,
+            batch_size=batch_size,
+            shuffle=False,
+            drop_last=False,
+            num_workers=num_workers,
+            collate_fn=data_collator,
+            pin_memory=True)
+        if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
+            instruction_lens = len(self.tokenizer.tokenize(instruction))
+        else:
+            instruction_lens = 0
+        encoded_embeds = []
+        device = next(self.embedding_model.parameters()).device
+        for batch_dict in tqdm(data_loader, desc='encoding', mininterval=10):
+            features = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
+            embeds=self(**features)["sentence_embeddings"].squeeze(1)
+            encoded_embeds.append(embeds)
+        encoded_embeds = torch.cat(encoded_embeds, axis=0)
+        if "return_numpy" in kwargs and  kwargs.get("return_numpy"):
+            encoded_embeds = encoded_embeds.cpu().detach().numpy()
+        return encoded_embeds
+    def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, pool_mask: Optional[torch.Tensor]=None, return_dict: bool=True):
+        autocast_ctx = torch.autocast if torch.cuda.is_available() else nullcontext
+        with autocast_ctx("cuda"):
+            ## decoder only layer
+            outputs = self.embedding_model(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+            )
+            ## latent attention layer
+            embeds = self.latent_attention_model(
+                outputs.last_hidden_state,
+                pool_mask,
+            )
+        if not return_dict:
+            return (embeds,)
+        return BaseModelOutputWithNoAttention(last_hidden_state=embeds)
+    @torch.no_grad()
+    def encode(self, prompts: List[str], instruction: str="", max_length: int=4096, **kwargs):
+        if self.padding_side == "right" and self.is_mask_instruction == True and len(instruction) > 0:
+            instruction_lens = len(self.tokenizer.tokenize(instruction))
+        else:
+            instruction_lens = 0
+        device = next(self.embedding_model.parameters()).device
+        batch_dict = input_transform_func(self.tokenizer,
+                                          {"input_texts": [prompt for prompt in prompts]},
+                                          always_add_eos=True,
+                                          max_length=max_length,
+                                          instruction=instruction)
+        features: NVEmbedFeatures = self.prepare_kwargs_from_batch(batch_dict, instruction_lens, device=device)
+        return self(**features)["sentence_embeddings"].squeeze(1)
+## AutoModel Register
+AutoModel.register(NVEmbedConfig, NVEmbedModel)
+AutoModel.register(LatentAttentionConfig, LatentAttentionModel)
+AutoModel.register(BidirectionalMistralConfig, BidirectionalMistralModel)
+## Register for auto class
+NVEmbedModel.register_for_auto_class("AutoModel")
+LatentAttentionModel.register_for_auto_class("AutoModel")
+BidirectionalMistralModel.register_for_auto_class("AutoModel")

modules.json ADDED Viewed

	@@ -0,0 +1,26 @@

+[
+  {
+    "idx": 0,
+    "name": "0",
+    "path": "",
+    "type": "sentence_transformers.models.Transformer"
+  },
+  {
+    "idx": 1,
+    "name": "1",
+    "path": "1_Pooling",
+    "type": "sentence_transformers.models.Pooling"
+  },
+  {
+    "idx": 2,
+    "name": "2",
+    "path": "2_Normalize",
+    "type": "sentence_transformers.models.Normalize"
+  },
+  {
+    "idx": 3,
+    "name": "3",
+    "path": "3_CSRSparsity",
+    "type": "sentence_transformers.sparse_encoder.models.CSRSparsity"
+  }
+]

sentence_bert_config.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "max_seq_length": 32768,
+  "do_lower_case": false
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": false
+}