Add files using upload-large-folder tool
Browse files- README.md +676 -3
- checkpoint-3800/1_Pooling/config.json +10 -0
- checkpoint-3800/README.md +670 -0
- checkpoint-3800/config.json +31 -0
- checkpoint-3800/config_sentence_transformers.json +10 -0
- checkpoint-3800/modules.json +20 -0
- checkpoint-3800/rng_state.pth +3 -0
- checkpoint-3800/scaler.pt +3 -0
- checkpoint-3800/scheduler.pt +3 -0
- checkpoint-3800/sentence_bert_config.json +4 -0
- checkpoint-3800/special_tokens_map.json +37 -0
- checkpoint-3800/tokenizer.json +0 -0
- checkpoint-3800/tokenizer_config.json +58 -0
- checkpoint-3800/trainer_state.json +1162 -0
- checkpoint-3800/training_args.bin +3 -0
- checkpoint-3800/vocab.txt +0 -0
- checkpoint-4000/1_Pooling/config.json +10 -0
- checkpoint-4000/README.md +672 -0
- checkpoint-4000/config.json +31 -0
- checkpoint-4000/modules.json +20 -0
- checkpoint-4000/rng_state.pth +3 -0
- checkpoint-4000/scaler.pt +3 -0
- checkpoint-4000/scheduler.pt +3 -0
- checkpoint-4000/tokenizer.json +0 -0
- checkpoint-4000/tokenizer_config.json +58 -0
- checkpoint-4000/trainer_state.json +1221 -0
- checkpoint-4000/training_args.bin +3 -0
- checkpoint-4000/vocab.txt +0 -0
- checkpoint-4200/README.md +674 -0
- checkpoint-4200/rng_state.pth +3 -0
- checkpoint-4200/scaler.pt +3 -0
- checkpoint-4200/trainer_state.json +1280 -0
- checkpoint-4200/training_args.bin +3 -0
- checkpoint-4200/vocab.txt +0 -0
- checkpoint-4400/config.json +31 -0
- checkpoint-4400/config_sentence_transformers.json +10 -0
- checkpoint-4400/modules.json +20 -0
- checkpoint-4400/sentence_bert_config.json +4 -0
- checkpoint-4480/1_Pooling/config.json +10 -0
- checkpoint-4480/README.md +676 -0
- checkpoint-4480/config.json +31 -0
- checkpoint-4480/config_sentence_transformers.json +10 -0
- checkpoint-4480/modules.json +20 -0
- checkpoint-4480/sentence_bert_config.json +4 -0
- checkpoint-4480/special_tokens_map.json +37 -0
- checkpoint-4480/tokenizer.json +0 -0
- checkpoint-4480/tokenizer_config.json +58 -0
- checkpoint-4480/trainer_state.json +1339 -0
- checkpoint-4480/vocab.txt +0 -0
- eval/Information-Retrieval_evaluation_full_en_results.csv +23 -0
README.md
CHANGED
@@ -1,3 +1,676 @@
|
|
1 |
-
---
|
2 |
-
|
3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:114699
|
8 |
+
- loss:CachedGISTEmbedLoss
|
9 |
+
base_model: BAAI/bge-large-en-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: For roles such as 'physiotherapist', 'neuromusculoskeletal physiotherapist',
|
12 |
+
'osteopath', and 'chiropractor', the skills needed include a deep understanding
|
13 |
+
of human anatomy and physiology, strong diagnostic skills, and the ability to
|
14 |
+
apply manual therapy techniques to treat musculoskeletal issues. Additionally,
|
15 |
+
effective communication skills are crucial for explaining treatments and exercises
|
16 |
+
to patients, while adaptability and problem-solving skills are essential for tailoring
|
17 |
+
treatments to individual patient needs.
|
18 |
+
sentences:
|
19 |
+
- Job roles such as insulation installers, HVAC technicians, and construction engineers
|
20 |
+
require knowledge of various types and characteristics of insulation materials
|
21 |
+
to effectively reduce heat transfer and improve energy efficiency in buildings
|
22 |
+
and systems. Understanding the typology of insulation materials, including their
|
23 |
+
thermal properties, durability, and environmental impact, is crucial for these
|
24 |
+
professionals to select the most appropriate materials for specific applications.
|
25 |
+
- Job roles such as Contract Managers, Legal Analysts, and Compliance Officers require
|
26 |
+
the skill of reviewing or auditing completed contracts to ensure legal accuracy,
|
27 |
+
compliance with regulations, and alignment with organizational goals.
|
28 |
+
- Job roles that require skills in dealing with emergency care situations include
|
29 |
+
emergency medical technicians (EMTs), paramedics, and emergency room nurses or
|
30 |
+
doctors, all of whom must quickly and effectively manage critical health situations
|
31 |
+
to save lives.
|
32 |
+
- source_sentence: Bus drivers, including those operating in various sectors like
|
33 |
+
public transit, intercity, private, or school services, need strong driving skills,
|
34 |
+
knowledge of traffic laws, and the ability to operate safely in diverse conditions.
|
35 |
+
Additionally, effective communication skills and the ability to handle passenger
|
36 |
+
inquiries and emergencies are crucial.
|
37 |
+
sentences:
|
38 |
+
- Job roles that require the skill to calibrate electronic instruments include calibration
|
39 |
+
technicians, quality control engineers, and instrumentation specialists. These
|
40 |
+
professionals ensure the accuracy and reliability of various electronic devices
|
41 |
+
and systems across different industries such as manufacturing, aerospace, and
|
42 |
+
automotive.
|
43 |
+
- Job roles such as Building Engineer, Architect, and Construction Specialist require
|
44 |
+
skills in designing, engineering, or developing air-tight building structures
|
45 |
+
to ensure energy efficiency and environmental control within the building.
|
46 |
+
- Job roles such as customer service representatives, flight attendants, and hotel
|
47 |
+
concierges require a strong focus on passengers or customers, ensuring their needs
|
48 |
+
and comfort are prioritized to provide excellent service and support.
|
49 |
+
- source_sentence: A mine surveyor, also known as a mining surveyor or mine planning
|
50 |
+
surveyor, requires expertise in geomatics and mining engineering to accurately
|
51 |
+
map and plan mine operations, ensuring safety and efficiency. They must also possess
|
52 |
+
strong analytical skills and the ability to use specialized software for creating
|
53 |
+
detailed mine plans and maintaining accurate records.
|
54 |
+
sentences:
|
55 |
+
- Job roles such as data analysts, business analysts, and financial analysts require
|
56 |
+
the skill to present reports or prepare statistical reports, as they often need
|
57 |
+
to communicate complex data insights clearly and effectively to stakeholders.
|
58 |
+
- Job roles that require monitoring flour unloading equipment include Quality Control
|
59 |
+
Technicians, Process Operators, and Mill Supervisors, who ensure the efficient
|
60 |
+
and safe operation of flour processing systems and the proper unloading of flour
|
61 |
+
from transport vehicles.
|
62 |
+
- Job roles that require skills in the manufacturing of made-up textile articles
|
63 |
+
include textile production managers, machinery operators, and quality control
|
64 |
+
inspectors, all of whom utilize specific technology and machinery to produce finished
|
65 |
+
textile products such as clothing, home textiles, and industrial fabrics.
|
66 |
+
- source_sentence: An insulation supervisor, regardless of the specific type of insulation
|
67 |
+
material or installation area, requires strong project management skills, knowledge
|
68 |
+
of building codes and safety regulations, and expertise in insulation techniques
|
69 |
+
to oversee the installation process effectively and ensure quality standards are
|
70 |
+
met.
|
71 |
+
sentences:
|
72 |
+
- Job roles that require skills in energy efficiency, such as promoting energy efficiency
|
73 |
+
or efficient energy use, include Energy Managers, Sustainability Specialists,
|
74 |
+
and Building Engineers, who focus on reducing energy consumption and improving
|
75 |
+
energy use in various settings. Additionally, roles like Battery Technicians or
|
76 |
+
Engineers involve battery benchmarking to enhance energy storage and efficiency
|
77 |
+
in technological devices and systems.
|
78 |
+
- The skill of applying or installing waterproofing and damp-proofing membranes
|
79 |
+
is primarily required by construction workers such as waterproofing specialists,
|
80 |
+
roofers, and building envelope technicians, who use these membranes to prevent
|
81 |
+
water damage in buildings and structures.
|
82 |
+
- Job roles such as laboratory technicians, chemists, and materials scientists require
|
83 |
+
skills in laboratory techniques, including electronic and thermic methods, gas
|
84 |
+
chromatography, and gravimetric analysis, to conduct precise experiments and analyze
|
85 |
+
materials. These professionals must apply natural science techniques and use various
|
86 |
+
lab techniques to ensure accurate and reliable results in their research or quality
|
87 |
+
control processes.
|
88 |
+
- source_sentence: For roles such as import/export manager, graduate export manager,
|
89 |
+
senior export manager, and other related positions in meat and meat products,
|
90 |
+
the key skills include a strong understanding of international trade regulations,
|
91 |
+
meat product knowledge, customs compliance, and excellent negotiation and communication
|
92 |
+
skills to manage global supply chains effectively. Additionally, proficiency in
|
93 |
+
relevant trade software and languages can be highly beneficial.
|
94 |
+
sentences:
|
95 |
+
- Job roles that require skills such as managing staff, coordinating employees,
|
96 |
+
and performing HR activities include Human Resources Managers, Team Leaders, Supervisors,
|
97 |
+
and Department Heads, all of whom are responsible for overseeing personnel, implementing
|
98 |
+
HR policies, and ensuring efficient team operations.
|
99 |
+
- Job roles such as Control Systems Engineer, Automation Engineer, and Systems Designer
|
100 |
+
require skills in designing, planning, and developing control systems to manage
|
101 |
+
and optimize the performance of various technological processes and machinery.
|
102 |
+
These professionals are tasked with creating efficient and reliable systems that
|
103 |
+
can operate autonomously or with minimal human intervention.
|
104 |
+
- Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager
|
105 |
+
require skills in conducting performance measurement and organizing or managing
|
106 |
+
conversion testing to ensure software and systems meet performance standards and
|
107 |
+
function correctly in real-world scenarios.
|
108 |
+
pipeline_tag: sentence-similarity
|
109 |
+
library_name: sentence-transformers
|
110 |
+
metrics:
|
111 |
+
- cosine_accuracy@1
|
112 |
+
- cosine_accuracy@20
|
113 |
+
- cosine_accuracy@50
|
114 |
+
- cosine_accuracy@100
|
115 |
+
- cosine_accuracy@150
|
116 |
+
- cosine_accuracy@200
|
117 |
+
- cosine_precision@1
|
118 |
+
- cosine_precision@20
|
119 |
+
- cosine_precision@50
|
120 |
+
- cosine_precision@100
|
121 |
+
- cosine_precision@150
|
122 |
+
- cosine_precision@200
|
123 |
+
- cosine_recall@1
|
124 |
+
- cosine_recall@20
|
125 |
+
- cosine_recall@50
|
126 |
+
- cosine_recall@100
|
127 |
+
- cosine_recall@150
|
128 |
+
- cosine_recall@200
|
129 |
+
- cosine_ndcg@1
|
130 |
+
- cosine_ndcg@20
|
131 |
+
- cosine_ndcg@50
|
132 |
+
- cosine_ndcg@100
|
133 |
+
- cosine_ndcg@150
|
134 |
+
- cosine_ndcg@200
|
135 |
+
- cosine_mrr@1
|
136 |
+
- cosine_mrr@20
|
137 |
+
- cosine_mrr@50
|
138 |
+
- cosine_mrr@100
|
139 |
+
- cosine_mrr@150
|
140 |
+
- cosine_mrr@200
|
141 |
+
- cosine_map@1
|
142 |
+
- cosine_map@20
|
143 |
+
- cosine_map@50
|
144 |
+
- cosine_map@100
|
145 |
+
- cosine_map@150
|
146 |
+
- cosine_map@200
|
147 |
+
- cosine_map@500
|
148 |
+
model-index:
|
149 |
+
- name: SentenceTransformer based on BAAI/bge-large-en-v1.5
|
150 |
+
results:
|
151 |
+
- task:
|
152 |
+
type: information-retrieval
|
153 |
+
name: Information Retrieval
|
154 |
+
dataset:
|
155 |
+
name: full en
|
156 |
+
type: full_en
|
157 |
+
metrics:
|
158 |
+
- type: cosine_accuracy@1
|
159 |
+
value: 0.7302631578947368
|
160 |
+
name: Cosine Accuracy@1
|
161 |
+
- type: cosine_accuracy@20
|
162 |
+
value: 0.993421052631579
|
163 |
+
name: Cosine Accuracy@20
|
164 |
+
- type: cosine_accuracy@50
|
165 |
+
value: 0.9967105263157895
|
166 |
+
name: Cosine Accuracy@50
|
167 |
+
- type: cosine_accuracy@100
|
168 |
+
value: 1.0
|
169 |
+
name: Cosine Accuracy@100
|
170 |
+
- type: cosine_accuracy@150
|
171 |
+
value: 1.0
|
172 |
+
name: Cosine Accuracy@150
|
173 |
+
- type: cosine_accuracy@200
|
174 |
+
value: 1.0
|
175 |
+
name: Cosine Accuracy@200
|
176 |
+
- type: cosine_precision@1
|
177 |
+
value: 0.7302631578947368
|
178 |
+
name: Cosine Precision@1
|
179 |
+
- type: cosine_precision@20
|
180 |
+
value: 0.4998355263157894
|
181 |
+
name: Cosine Precision@20
|
182 |
+
- type: cosine_precision@50
|
183 |
+
value: 0.39184210526315794
|
184 |
+
name: Cosine Precision@50
|
185 |
+
- type: cosine_precision@100
|
186 |
+
value: 0.3111842105263158
|
187 |
+
name: Cosine Precision@100
|
188 |
+
- type: cosine_precision@150
|
189 |
+
value: 0.2652412280701754
|
190 |
+
name: Cosine Precision@150
|
191 |
+
- type: cosine_precision@200
|
192 |
+
value: 0.232171052631579
|
193 |
+
name: Cosine Precision@200
|
194 |
+
- type: cosine_recall@1
|
195 |
+
value: 0.010227350724729817
|
196 |
+
name: Cosine Recall@1
|
197 |
+
- type: cosine_recall@20
|
198 |
+
value: 0.13368254620254577
|
199 |
+
name: Cosine Recall@20
|
200 |
+
- type: cosine_recall@50
|
201 |
+
value: 0.2541249933594102
|
202 |
+
name: Cosine Recall@50
|
203 |
+
- type: cosine_recall@100
|
204 |
+
value: 0.3948435268881245
|
205 |
+
name: Cosine Recall@100
|
206 |
+
- type: cosine_recall@150
|
207 |
+
value: 0.49626849018850344
|
208 |
+
name: Cosine Recall@150
|
209 |
+
- type: cosine_recall@200
|
210 |
+
value: 0.5720837677245543
|
211 |
+
name: Cosine Recall@200
|
212 |
+
- type: cosine_ndcg@1
|
213 |
+
value: 0.7302631578947368
|
214 |
+
name: Cosine Ndcg@1
|
215 |
+
- type: cosine_ndcg@20
|
216 |
+
value: 0.5384654647855256
|
217 |
+
name: Cosine Ndcg@20
|
218 |
+
- type: cosine_ndcg@50
|
219 |
+
value: 0.44986527953229877
|
220 |
+
name: Cosine Ndcg@50
|
221 |
+
- type: cosine_ndcg@100
|
222 |
+
value: 0.44277699637488865
|
223 |
+
name: Cosine Ndcg@100
|
224 |
+
- type: cosine_ndcg@150
|
225 |
+
value: 0.4895063673734854
|
226 |
+
name: Cosine Ndcg@150
|
227 |
+
- type: cosine_ndcg@200
|
228 |
+
value: 0.5346148440105628
|
229 |
+
name: Cosine Ndcg@200
|
230 |
+
- type: cosine_mrr@1
|
231 |
+
value: 0.7302631578947368
|
232 |
+
name: Cosine Mrr@1
|
233 |
+
- type: cosine_mrr@20
|
234 |
+
value: 0.8341772399749373
|
235 |
+
name: Cosine Mrr@20
|
236 |
+
- type: cosine_mrr@50
|
237 |
+
value: 0.8343338815789473
|
238 |
+
name: Cosine Mrr@50
|
239 |
+
- type: cosine_mrr@100
|
240 |
+
value: 0.8343905966424682
|
241 |
+
name: Cosine Mrr@100
|
242 |
+
- type: cosine_mrr@150
|
243 |
+
value: 0.8343905966424682
|
244 |
+
name: Cosine Mrr@150
|
245 |
+
- type: cosine_mrr@200
|
246 |
+
value: 0.8343905966424682
|
247 |
+
name: Cosine Mrr@200
|
248 |
+
- type: cosine_map@1
|
249 |
+
value: 0.7302631578947368
|
250 |
+
name: Cosine Map@1
|
251 |
+
- type: cosine_map@20
|
252 |
+
value: 0.3434603918412553
|
253 |
+
name: Cosine Map@20
|
254 |
+
- type: cosine_map@50
|
255 |
+
value: 0.23779270403918282
|
256 |
+
name: Cosine Map@50
|
257 |
+
- type: cosine_map@100
|
258 |
+
value: 0.21161540263537876
|
259 |
+
name: Cosine Map@100
|
260 |
+
- type: cosine_map@150
|
261 |
+
value: 0.22899252179487295
|
262 |
+
name: Cosine Map@150
|
263 |
+
- type: cosine_map@200
|
264 |
+
value: 0.24784282323083537
|
265 |
+
name: Cosine Map@200
|
266 |
+
- type: cosine_map@500
|
267 |
+
value: 0.298154972004029
|
268 |
+
name: Cosine Map@500
|
269 |
+
---
|
270 |
+
|
271 |
+
# Job-Skill matching fintuned BAAI/bge-large-en-v1.5
|
272 |
+
|
273 |
+
Top performing model on [TalentCLEF 2025](https://talentclef.github.io/talentclef/) Task B. Use it for job title <-> skill set matching
|
274 |
+
|
275 |
+
## Model Details
|
276 |
+
|
277 |
+
### Model Description
|
278 |
+
- **Model Type:** Sentence Transformer
|
279 |
+
- **Base model:** [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) <!-- at revision d4aa6901d3a41ba39fb536a557fa166f842b0e09 -->
|
280 |
+
- **Maximum Sequence Length:** 256 tokens
|
281 |
+
- **Output Dimensionality:** 1024 dimensions
|
282 |
+
- **Similarity Function:** Cosine Similarity
|
283 |
+
<!-- - **Training Dataset:** Unknown -->
|
284 |
+
<!-- - **Language:** Unknown -->
|
285 |
+
<!-- - **License:** Unknown -->
|
286 |
+
|
287 |
+
### Model Sources
|
288 |
+
|
289 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
290 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
291 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
292 |
+
|
293 |
+
### Full Model Architecture
|
294 |
+
|
295 |
+
```
|
296 |
+
SentenceTransformer(
|
297 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': True}) with Transformer model: BertModel
|
298 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
299 |
+
(2): Normalize()
|
300 |
+
)
|
301 |
+
```
|
302 |
+
|
303 |
+
## Usage
|
304 |
+
|
305 |
+
### Direct Usage (Sentence Transformers)
|
306 |
+
|
307 |
+
First install the Sentence Transformers library:
|
308 |
+
|
309 |
+
```bash
|
310 |
+
pip install -U sentence-transformers
|
311 |
+
```
|
312 |
+
|
313 |
+
Then you can load this model and run inference.
|
314 |
+
```python
|
315 |
+
from sentence_transformers import SentenceTransformer
|
316 |
+
|
317 |
+
# Download from the 🤗 Hub
|
318 |
+
model = SentenceTransformer("pj-mathematician/JobSkillBGE-large-en-v1.5")
|
319 |
+
# Run inference
|
320 |
+
sentences = [
|
321 |
+
'For roles such as import/export manager, graduate export manager, senior export manager, and other related positions in meat and meat products, the key skills include a strong understanding of international trade regulations, meat product knowledge, customs compliance, and excellent negotiation and communication skills to manage global supply chains effectively. Additionally, proficiency in relevant trade software and languages can be highly beneficial.',
|
322 |
+
'Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager require skills in conducting performance measurement and organizing or managing conversion testing to ensure software and systems meet performance standards and function correctly in real-world scenarios.',
|
323 |
+
'Job roles that require skills such as managing staff, coordinating employees, and performing HR activities include Human Resources Managers, Team Leaders, Supervisors, and Department Heads, all of whom are responsible for overseeing personnel, implementing HR policies, and ensuring efficient team operations.',
|
324 |
+
]
|
325 |
+
embeddings = model.encode(sentences)
|
326 |
+
print(embeddings.shape)
|
327 |
+
# [3, 1024]
|
328 |
+
|
329 |
+
# Get the similarity scores for the embeddings
|
330 |
+
similarities = model.similarity(embeddings, embeddings)
|
331 |
+
print(similarities.shape)
|
332 |
+
# [3, 3]
|
333 |
+
```
|
334 |
+
|
335 |
+
<!--
|
336 |
+
### Direct Usage (Transformers)
|
337 |
+
|
338 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
339 |
+
|
340 |
+
</details>
|
341 |
+
-->
|
342 |
+
|
343 |
+
<!--
|
344 |
+
### Downstream Usage (Sentence Transformers)
|
345 |
+
|
346 |
+
You can finetune this model on your own dataset.
|
347 |
+
|
348 |
+
<details><summary>Click to expand</summary>
|
349 |
+
|
350 |
+
</details>
|
351 |
+
-->
|
352 |
+
|
353 |
+
<!--
|
354 |
+
### Out-of-Scope Use
|
355 |
+
|
356 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
357 |
+
-->
|
358 |
+
|
359 |
+
## Evaluation
|
360 |
+
|
361 |
+
### Metrics
|
362 |
+
|
363 |
+
#### Information Retrieval
|
364 |
+
|
365 |
+
* Dataset: `full_en`
|
366 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
367 |
+
|
368 |
+
| Metric | Value |
|
369 |
+
|:---------------------|:-----------|
|
370 |
+
| cosine_accuracy@1 | 0.7303 |
|
371 |
+
| cosine_accuracy@20 | 0.9934 |
|
372 |
+
| cosine_accuracy@50 | 0.9967 |
|
373 |
+
| cosine_accuracy@100 | 1.0 |
|
374 |
+
| cosine_accuracy@150 | 1.0 |
|
375 |
+
| cosine_accuracy@200 | 1.0 |
|
376 |
+
| cosine_precision@1 | 0.7303 |
|
377 |
+
| cosine_precision@20 | 0.4998 |
|
378 |
+
| cosine_precision@50 | 0.3918 |
|
379 |
+
| cosine_precision@100 | 0.3112 |
|
380 |
+
| cosine_precision@150 | 0.2652 |
|
381 |
+
| cosine_precision@200 | 0.2322 |
|
382 |
+
| cosine_recall@1 | 0.0102 |
|
383 |
+
| cosine_recall@20 | 0.1337 |
|
384 |
+
| cosine_recall@50 | 0.2541 |
|
385 |
+
| cosine_recall@100 | 0.3948 |
|
386 |
+
| cosine_recall@150 | 0.4963 |
|
387 |
+
| cosine_recall@200 | 0.5721 |
|
388 |
+
| cosine_ndcg@1 | 0.7303 |
|
389 |
+
| cosine_ndcg@20 | 0.5385 |
|
390 |
+
| cosine_ndcg@50 | 0.4499 |
|
391 |
+
| cosine_ndcg@100 | 0.4428 |
|
392 |
+
| cosine_ndcg@150 | 0.4895 |
|
393 |
+
| **cosine_ndcg@200** | **0.5346** |
|
394 |
+
| cosine_mrr@1 | 0.7303 |
|
395 |
+
| cosine_mrr@20 | 0.8342 |
|
396 |
+
| cosine_mrr@50 | 0.8343 |
|
397 |
+
| cosine_mrr@100 | 0.8344 |
|
398 |
+
| cosine_mrr@150 | 0.8344 |
|
399 |
+
| cosine_mrr@200 | 0.8344 |
|
400 |
+
| cosine_map@1 | 0.7303 |
|
401 |
+
| cosine_map@20 | 0.3435 |
|
402 |
+
| cosine_map@50 | 0.2378 |
|
403 |
+
| cosine_map@100 | 0.2116 |
|
404 |
+
| cosine_map@150 | 0.229 |
|
405 |
+
| cosine_map@200 | 0.2478 |
|
406 |
+
| cosine_map@500 | 0.2982 |
|
407 |
+
|
408 |
+
<!--
|
409 |
+
## Bias, Risks and Limitations
|
410 |
+
|
411 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
412 |
+
-->
|
413 |
+
|
414 |
+
<!--
|
415 |
+
### Recommendations
|
416 |
+
|
417 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
418 |
+
-->
|
419 |
+
|
420 |
+
## Training Details
|
421 |
+
|
422 |
+
### Training Dataset
|
423 |
+
|
424 |
+
#### Unnamed Dataset
|
425 |
+
|
426 |
+
* Size: 114,699 training samples
|
427 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
428 |
+
* Approximate statistics based on the first 1000 samples:
|
429 |
+
| | anchor | positive |
|
430 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
431 |
+
| type | string | string |
|
432 |
+
| details | <ul><li>min: 43 tokens</li><li>mean: 65.45 tokens</li><li>max: 116 tokens</li></ul> | <ul><li>min: 34 tokens</li><li>mean: 55.34 tokens</li><li>max: 162 tokens</li></ul> |
|
433 |
+
* Samples:
|
434 |
+
| anchor | positive |
|
435 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
436 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require promoting health and safety include occupational health and safety specialists, safety managers, and public health educators, all of whom work to ensure safe and healthy environments in workplaces and communities.</code> |
|
437 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require organizing rehearsals include directors, choreographers, and conductors in theater, dance, and music ensembles, who must efficiently plan and schedule practice sessions to prepare performers for a successful final performance.</code> |
|
438 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles such as Health and Safety Managers, Environmental Health Officers, and Risk Management Specialists often require the skill of negotiating health and safety issues with third parties to ensure compliance and protection standards are met across different organizations and sites.</code> |
|
439 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
440 |
+
```json
|
441 |
+
{'guide': SentenceTransformer(
|
442 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
443 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
444 |
+
(2): Normalize()
|
445 |
+
), 'temperature': 0.01, 'mini_batch_size': 32, 'margin_strategy': 'absolute', 'margin': 0.0}
|
446 |
+
```
|
447 |
+
|
448 |
+
### Training Hyperparameters
|
449 |
+
#### Non-Default Hyperparameters
|
450 |
+
|
451 |
+
- `eval_strategy`: steps
|
452 |
+
- `per_device_train_batch_size`: 64
|
453 |
+
- `per_device_eval_batch_size`: 128
|
454 |
+
- `gradient_accumulation_steps`: 2
|
455 |
+
- `num_train_epochs`: 5
|
456 |
+
- `warmup_ratio`: 0.05
|
457 |
+
- `log_on_each_node`: False
|
458 |
+
- `fp16`: True
|
459 |
+
- `dataloader_num_workers`: 4
|
460 |
+
- `ddp_find_unused_parameters`: True
|
461 |
+
- `batch_sampler`: no_duplicates
|
462 |
+
|
463 |
+
#### All Hyperparameters
|
464 |
+
<details><summary>Click to expand</summary>
|
465 |
+
|
466 |
+
- `overwrite_output_dir`: False
|
467 |
+
- `do_predict`: False
|
468 |
+
- `eval_strategy`: steps
|
469 |
+
- `prediction_loss_only`: True
|
470 |
+
- `per_device_train_batch_size`: 64
|
471 |
+
- `per_device_eval_batch_size`: 128
|
472 |
+
- `per_gpu_train_batch_size`: None
|
473 |
+
- `per_gpu_eval_batch_size`: None
|
474 |
+
- `gradient_accumulation_steps`: 2
|
475 |
+
- `eval_accumulation_steps`: None
|
476 |
+
- `torch_empty_cache_steps`: None
|
477 |
+
- `learning_rate`: 5e-05
|
478 |
+
- `weight_decay`: 0.0
|
479 |
+
- `adam_beta1`: 0.9
|
480 |
+
- `adam_beta2`: 0.999
|
481 |
+
- `adam_epsilon`: 1e-08
|
482 |
+
- `max_grad_norm`: 1.0
|
483 |
+
- `num_train_epochs`: 5
|
484 |
+
- `max_steps`: -1
|
485 |
+
- `lr_scheduler_type`: linear
|
486 |
+
- `lr_scheduler_kwargs`: {}
|
487 |
+
- `warmup_ratio`: 0.05
|
488 |
+
- `warmup_steps`: 0
|
489 |
+
- `log_level`: passive
|
490 |
+
- `log_level_replica`: warning
|
491 |
+
- `log_on_each_node`: False
|
492 |
+
- `logging_nan_inf_filter`: True
|
493 |
+
- `save_safetensors`: True
|
494 |
+
- `save_on_each_node`: False
|
495 |
+
- `save_only_model`: False
|
496 |
+
- `restore_callback_states_from_checkpoint`: False
|
497 |
+
- `no_cuda`: False
|
498 |
+
- `use_cpu`: False
|
499 |
+
- `use_mps_device`: False
|
500 |
+
- `seed`: 42
|
501 |
+
- `data_seed`: None
|
502 |
+
- `jit_mode_eval`: False
|
503 |
+
- `use_ipex`: False
|
504 |
+
- `bf16`: False
|
505 |
+
- `fp16`: True
|
506 |
+
- `fp16_opt_level`: O1
|
507 |
+
- `half_precision_backend`: auto
|
508 |
+
- `bf16_full_eval`: False
|
509 |
+
- `fp16_full_eval`: False
|
510 |
+
- `tf32`: None
|
511 |
+
- `local_rank`: 0
|
512 |
+
- `ddp_backend`: None
|
513 |
+
- `tpu_num_cores`: None
|
514 |
+
- `tpu_metrics_debug`: False
|
515 |
+
- `debug`: []
|
516 |
+
- `dataloader_drop_last`: True
|
517 |
+
- `dataloader_num_workers`: 4
|
518 |
+
- `dataloader_prefetch_factor`: None
|
519 |
+
- `past_index`: -1
|
520 |
+
- `disable_tqdm`: False
|
521 |
+
- `remove_unused_columns`: True
|
522 |
+
- `label_names`: None
|
523 |
+
- `load_best_model_at_end`: False
|
524 |
+
- `ignore_data_skip`: False
|
525 |
+
- `fsdp`: []
|
526 |
+
- `fsdp_min_num_params`: 0
|
527 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
528 |
+
- `tp_size`: 0
|
529 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
530 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
531 |
+
- `deepspeed`: None
|
532 |
+
- `label_smoothing_factor`: 0.0
|
533 |
+
- `optim`: adamw_torch
|
534 |
+
- `optim_args`: None
|
535 |
+
- `adafactor`: False
|
536 |
+
- `group_by_length`: False
|
537 |
+
- `length_column_name`: length
|
538 |
+
- `ddp_find_unused_parameters`: True
|
539 |
+
- `ddp_bucket_cap_mb`: None
|
540 |
+
- `ddp_broadcast_buffers`: False
|
541 |
+
- `dataloader_pin_memory`: True
|
542 |
+
- `dataloader_persistent_workers`: False
|
543 |
+
- `skip_memory_metrics`: True
|
544 |
+
- `use_legacy_prediction_loop`: False
|
545 |
+
- `push_to_hub`: False
|
546 |
+
- `resume_from_checkpoint`: None
|
547 |
+
- `hub_model_id`: None
|
548 |
+
- `hub_strategy`: every_save
|
549 |
+
- `hub_private_repo`: None
|
550 |
+
- `hub_always_push`: False
|
551 |
+
- `gradient_checkpointing`: False
|
552 |
+
- `gradient_checkpointing_kwargs`: None
|
553 |
+
- `include_inputs_for_metrics`: False
|
554 |
+
- `include_for_metrics`: []
|
555 |
+
- `eval_do_concat_batches`: True
|
556 |
+
- `fp16_backend`: auto
|
557 |
+
- `push_to_hub_model_id`: None
|
558 |
+
- `push_to_hub_organization`: None
|
559 |
+
- `mp_parameters`:
|
560 |
+
- `auto_find_batch_size`: False
|
561 |
+
- `full_determinism`: False
|
562 |
+
- `torchdynamo`: None
|
563 |
+
- `ray_scope`: last
|
564 |
+
- `ddp_timeout`: 1800
|
565 |
+
- `torch_compile`: False
|
566 |
+
- `torch_compile_backend`: None
|
567 |
+
- `torch_compile_mode`: None
|
568 |
+
- `include_tokens_per_second`: False
|
569 |
+
- `include_num_input_tokens_seen`: False
|
570 |
+
- `neftune_noise_alpha`: None
|
571 |
+
- `optim_target_modules`: None
|
572 |
+
- `batch_eval_metrics`: False
|
573 |
+
- `eval_on_start`: False
|
574 |
+
- `use_liger_kernel`: False
|
575 |
+
- `eval_use_gather_object`: False
|
576 |
+
- `average_tokens_across_devices`: False
|
577 |
+
- `prompts`: None
|
578 |
+
- `batch_sampler`: no_duplicates
|
579 |
+
- `multi_dataset_batch_sampler`: proportional
|
580 |
+
|
581 |
+
</details>
|
582 |
+
|
583 |
+
### Training Logs
|
584 |
+
| Epoch | Step | Training Loss | full_en_cosine_ndcg@200 |
|
585 |
+
|:------:|:----:|:-------------:|:-----------------------:|
|
586 |
+
| -1 | -1 | - | 0.4784 |
|
587 |
+
| 0.0011 | 1 | 9.119 | - |
|
588 |
+
| 0.1116 | 100 | 4.1469 | - |
|
589 |
+
| 0.2232 | 200 | 2.5294 | 0.5362 |
|
590 |
+
| 0.3348 | 300 | 2.3611 | - |
|
591 |
+
| 0.4464 | 400 | 2.192 | 0.5318 |
|
592 |
+
| 0.5580 | 500 | 2.0338 | - |
|
593 |
+
| 0.6696 | 600 | 1.9009 | 0.5383 |
|
594 |
+
| 0.7812 | 700 | 1.8404 | - |
|
595 |
+
| 0.8929 | 800 | 1.7692 | 0.5352 |
|
596 |
+
| 1.0045 | 900 | 1.6921 | - |
|
597 |
+
| 1.1161 | 1000 | 1.3861 | 0.5368 |
|
598 |
+
| 1.2277 | 1100 | 1.3863 | - |
|
599 |
+
| 1.3393 | 1200 | 1.3546 | 0.5259 |
|
600 |
+
| 1.4509 | 1300 | 1.373 | - |
|
601 |
+
| 1.5625 | 1400 | 1.3364 | 0.5303 |
|
602 |
+
| 1.6741 | 1500 | 1.2876 | - |
|
603 |
+
| 1.7857 | 1600 | 1.3094 | 0.5323 |
|
604 |
+
| 1.8973 | 1700 | 1.2784 | - |
|
605 |
+
| 2.0089 | 1800 | 1.2204 | 0.5330 |
|
606 |
+
| 2.1205 | 1900 | 0.9617 | - |
|
607 |
+
| 2.2321 | 2000 | 1.0004 | 0.5277 |
|
608 |
+
| 2.3438 | 2100 | 0.9694 | - |
|
609 |
+
| 2.4554 | 2200 | 0.9843 | 0.5356 |
|
610 |
+
| 2.5670 | 2300 | 0.9743 | - |
|
611 |
+
| 2.6786 | 2400 | 0.9252 | 0.5320 |
|
612 |
+
| 2.7902 | 2500 | 0.9272 | - |
|
613 |
+
| 2.9018 | 2600 | 0.9279 | 0.5333 |
|
614 |
+
| 3.0134 | 2700 | 0.857 | - |
|
615 |
+
| 3.125 | 2800 | 0.7313 | 0.5300 |
|
616 |
+
| 3.2366 | 2900 | 0.7103 | - |
|
617 |
+
| 3.3482 | 3000 | 0.7187 | 0.5319 |
|
618 |
+
| 3.4598 | 3100 | 0.7067 | - |
|
619 |
+
| 3.5714 | 3200 | 0.7157 | 0.5369 |
|
620 |
+
| 3.6830 | 3300 | 0.7113 | - |
|
621 |
+
| 3.7946 | 3400 | 0.7013 | 0.5341 |
|
622 |
+
| 3.9062 | 3500 | 0.6903 | - |
|
623 |
+
| 4.0179 | 3600 | 0.6462 | 0.5335 |
|
624 |
+
| 4.1295 | 3700 | 0.5162 | - |
|
625 |
+
| 4.2411 | 3800 | 0.524 | 0.5352 |
|
626 |
+
| 4.3527 | 3900 | 0.5303 | - |
|
627 |
+
| 4.4643 | 4000 | 0.5269 | 0.5341 |
|
628 |
+
| 4.5759 | 4100 | 0.4824 | - |
|
629 |
+
| 4.6875 | 4200 | 0.5222 | 0.5342 |
|
630 |
+
| 4.7991 | 4300 | 0.5104 | - |
|
631 |
+
| 4.9107 | 4400 | 0.5002 | 0.5346 |
|
632 |
+
|
633 |
+
|
634 |
+
### Framework Versions
|
635 |
+
- Python: 3.11.11
|
636 |
+
- Sentence Transformers: 4.1.0
|
637 |
+
- Transformers: 4.51.2
|
638 |
+
- PyTorch: 2.6.0+cu124
|
639 |
+
- Accelerate: 1.6.0
|
640 |
+
- Datasets: 3.5.0
|
641 |
+
- Tokenizers: 0.21.1
|
642 |
+
|
643 |
+
## Citation
|
644 |
+
|
645 |
+
### BibTeX
|
646 |
+
|
647 |
+
#### Sentence Transformers
|
648 |
+
```bibtex
|
649 |
+
@inproceedings{reimers-2019-sentence-bert,
|
650 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
651 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
652 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
653 |
+
month = "11",
|
654 |
+
year = "2019",
|
655 |
+
publisher = "Association for Computational Linguistics",
|
656 |
+
url = "https://arxiv.org/abs/1908.10084",
|
657 |
+
}
|
658 |
+
```
|
659 |
+
|
660 |
+
<!--
|
661 |
+
## Glossary
|
662 |
+
|
663 |
+
*Clearly define terms in order to be accessible across audiences.*
|
664 |
+
-->
|
665 |
+
|
666 |
+
<!--
|
667 |
+
## Model Card Authors
|
668 |
+
|
669 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
670 |
+
-->
|
671 |
+
|
672 |
+
<!--
|
673 |
+
## Model Card Contact
|
674 |
+
|
675 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
676 |
+
-->
|
checkpoint-3800/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
checkpoint-3800/README.md
ADDED
@@ -0,0 +1,670 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:114699
|
8 |
+
- loss:CachedGISTEmbedLoss
|
9 |
+
base_model: BAAI/bge-large-en-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: For roles such as 'physiotherapist', 'neuromusculoskeletal physiotherapist',
|
12 |
+
'osteopath', and 'chiropractor', the skills needed include a deep understanding
|
13 |
+
of human anatomy and physiology, strong diagnostic skills, and the ability to
|
14 |
+
apply manual therapy techniques to treat musculoskeletal issues. Additionally,
|
15 |
+
effective communication skills are crucial for explaining treatments and exercises
|
16 |
+
to patients, while adaptability and problem-solving skills are essential for tailoring
|
17 |
+
treatments to individual patient needs.
|
18 |
+
sentences:
|
19 |
+
- Job roles such as insulation installers, HVAC technicians, and construction engineers
|
20 |
+
require knowledge of various types and characteristics of insulation materials
|
21 |
+
to effectively reduce heat transfer and improve energy efficiency in buildings
|
22 |
+
and systems. Understanding the typology of insulation materials, including their
|
23 |
+
thermal properties, durability, and environmental impact, is crucial for these
|
24 |
+
professionals to select the most appropriate materials for specific applications.
|
25 |
+
- Job roles such as Contract Managers, Legal Analysts, and Compliance Officers require
|
26 |
+
the skill of reviewing or auditing completed contracts to ensure legal accuracy,
|
27 |
+
compliance with regulations, and alignment with organizational goals.
|
28 |
+
- Job roles that require skills in dealing with emergency care situations include
|
29 |
+
emergency medical technicians (EMTs), paramedics, and emergency room nurses or
|
30 |
+
doctors, all of whom must quickly and effectively manage critical health situations
|
31 |
+
to save lives.
|
32 |
+
- source_sentence: Bus drivers, including those operating in various sectors like
|
33 |
+
public transit, intercity, private, or school services, need strong driving skills,
|
34 |
+
knowledge of traffic laws, and the ability to operate safely in diverse conditions.
|
35 |
+
Additionally, effective communication skills and the ability to handle passenger
|
36 |
+
inquiries and emergencies are crucial.
|
37 |
+
sentences:
|
38 |
+
- Job roles that require the skill to calibrate electronic instruments include calibration
|
39 |
+
technicians, quality control engineers, and instrumentation specialists. These
|
40 |
+
professionals ensure the accuracy and reliability of various electronic devices
|
41 |
+
and systems across different industries such as manufacturing, aerospace, and
|
42 |
+
automotive.
|
43 |
+
- Job roles such as Building Engineer, Architect, and Construction Specialist require
|
44 |
+
skills in designing, engineering, or developing air-tight building structures
|
45 |
+
to ensure energy efficiency and environmental control within the building.
|
46 |
+
- Job roles such as customer service representatives, flight attendants, and hotel
|
47 |
+
concierges require a strong focus on passengers or customers, ensuring their needs
|
48 |
+
and comfort are prioritized to provide excellent service and support.
|
49 |
+
- source_sentence: A mine surveyor, also known as a mining surveyor or mine planning
|
50 |
+
surveyor, requires expertise in geomatics and mining engineering to accurately
|
51 |
+
map and plan mine operations, ensuring safety and efficiency. They must also possess
|
52 |
+
strong analytical skills and the ability to use specialized software for creating
|
53 |
+
detailed mine plans and maintaining accurate records.
|
54 |
+
sentences:
|
55 |
+
- Job roles such as data analysts, business analysts, and financial analysts require
|
56 |
+
the skill to present reports or prepare statistical reports, as they often need
|
57 |
+
to communicate complex data insights clearly and effectively to stakeholders.
|
58 |
+
- Job roles that require monitoring flour unloading equipment include Quality Control
|
59 |
+
Technicians, Process Operators, and Mill Supervisors, who ensure the efficient
|
60 |
+
and safe operation of flour processing systems and the proper unloading of flour
|
61 |
+
from transport vehicles.
|
62 |
+
- Job roles that require skills in the manufacturing of made-up textile articles
|
63 |
+
include textile production managers, machinery operators, and quality control
|
64 |
+
inspectors, all of whom utilize specific technology and machinery to produce finished
|
65 |
+
textile products such as clothing, home textiles, and industrial fabrics.
|
66 |
+
- source_sentence: An insulation supervisor, regardless of the specific type of insulation
|
67 |
+
material or installation area, requires strong project management skills, knowledge
|
68 |
+
of building codes and safety regulations, and expertise in insulation techniques
|
69 |
+
to oversee the installation process effectively and ensure quality standards are
|
70 |
+
met.
|
71 |
+
sentences:
|
72 |
+
- Job roles that require skills in energy efficiency, such as promoting energy efficiency
|
73 |
+
or efficient energy use, include Energy Managers, Sustainability Specialists,
|
74 |
+
and Building Engineers, who focus on reducing energy consumption and improving
|
75 |
+
energy use in various settings. Additionally, roles like Battery Technicians or
|
76 |
+
Engineers involve battery benchmarking to enhance energy storage and efficiency
|
77 |
+
in technological devices and systems.
|
78 |
+
- The skill of applying or installing waterproofing and damp-proofing membranes
|
79 |
+
is primarily required by construction workers such as waterproofing specialists,
|
80 |
+
roofers, and building envelope technicians, who use these membranes to prevent
|
81 |
+
water damage in buildings and structures.
|
82 |
+
- Job roles such as laboratory technicians, chemists, and materials scientists require
|
83 |
+
skills in laboratory techniques, including electronic and thermic methods, gas
|
84 |
+
chromatography, and gravimetric analysis, to conduct precise experiments and analyze
|
85 |
+
materials. These professionals must apply natural science techniques and use various
|
86 |
+
lab techniques to ensure accurate and reliable results in their research or quality
|
87 |
+
control processes.
|
88 |
+
- source_sentence: For roles such as import/export manager, graduate export manager,
|
89 |
+
senior export manager, and other related positions in meat and meat products,
|
90 |
+
the key skills include a strong understanding of international trade regulations,
|
91 |
+
meat product knowledge, customs compliance, and excellent negotiation and communication
|
92 |
+
skills to manage global supply chains effectively. Additionally, proficiency in
|
93 |
+
relevant trade software and languages can be highly beneficial.
|
94 |
+
sentences:
|
95 |
+
- Job roles that require skills such as managing staff, coordinating employees,
|
96 |
+
and performing HR activities include Human Resources Managers, Team Leaders, Supervisors,
|
97 |
+
and Department Heads, all of whom are responsible for overseeing personnel, implementing
|
98 |
+
HR policies, and ensuring efficient team operations.
|
99 |
+
- Job roles such as Control Systems Engineer, Automation Engineer, and Systems Designer
|
100 |
+
require skills in designing, planning, and developing control systems to manage
|
101 |
+
and optimize the performance of various technological processes and machinery.
|
102 |
+
These professionals are tasked with creating efficient and reliable systems that
|
103 |
+
can operate autonomously or with minimal human intervention.
|
104 |
+
- Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager
|
105 |
+
require skills in conducting performance measurement and organizing or managing
|
106 |
+
conversion testing to ensure software and systems meet performance standards and
|
107 |
+
function correctly in real-world scenarios.
|
108 |
+
pipeline_tag: sentence-similarity
|
109 |
+
library_name: sentence-transformers
|
110 |
+
metrics:
|
111 |
+
- cosine_accuracy@1
|
112 |
+
- cosine_accuracy@20
|
113 |
+
- cosine_accuracy@50
|
114 |
+
- cosine_accuracy@100
|
115 |
+
- cosine_accuracy@150
|
116 |
+
- cosine_accuracy@200
|
117 |
+
- cosine_precision@1
|
118 |
+
- cosine_precision@20
|
119 |
+
- cosine_precision@50
|
120 |
+
- cosine_precision@100
|
121 |
+
- cosine_precision@150
|
122 |
+
- cosine_precision@200
|
123 |
+
- cosine_recall@1
|
124 |
+
- cosine_recall@20
|
125 |
+
- cosine_recall@50
|
126 |
+
- cosine_recall@100
|
127 |
+
- cosine_recall@150
|
128 |
+
- cosine_recall@200
|
129 |
+
- cosine_ndcg@1
|
130 |
+
- cosine_ndcg@20
|
131 |
+
- cosine_ndcg@50
|
132 |
+
- cosine_ndcg@100
|
133 |
+
- cosine_ndcg@150
|
134 |
+
- cosine_ndcg@200
|
135 |
+
- cosine_mrr@1
|
136 |
+
- cosine_mrr@20
|
137 |
+
- cosine_mrr@50
|
138 |
+
- cosine_mrr@100
|
139 |
+
- cosine_mrr@150
|
140 |
+
- cosine_mrr@200
|
141 |
+
- cosine_map@1
|
142 |
+
- cosine_map@20
|
143 |
+
- cosine_map@50
|
144 |
+
- cosine_map@100
|
145 |
+
- cosine_map@150
|
146 |
+
- cosine_map@200
|
147 |
+
- cosine_map@500
|
148 |
+
model-index:
|
149 |
+
- name: SentenceTransformer based on BAAI/bge-large-en-v1.5
|
150 |
+
results:
|
151 |
+
- task:
|
152 |
+
type: information-retrieval
|
153 |
+
name: Information Retrieval
|
154 |
+
dataset:
|
155 |
+
name: full en
|
156 |
+
type: full_en
|
157 |
+
metrics:
|
158 |
+
- type: cosine_accuracy@1
|
159 |
+
value: 0.7302631578947368
|
160 |
+
name: Cosine Accuracy@1
|
161 |
+
- type: cosine_accuracy@20
|
162 |
+
value: 0.993421052631579
|
163 |
+
name: Cosine Accuracy@20
|
164 |
+
- type: cosine_accuracy@50
|
165 |
+
value: 0.9967105263157895
|
166 |
+
name: Cosine Accuracy@50
|
167 |
+
- type: cosine_accuracy@100
|
168 |
+
value: 1.0
|
169 |
+
name: Cosine Accuracy@100
|
170 |
+
- type: cosine_accuracy@150
|
171 |
+
value: 1.0
|
172 |
+
name: Cosine Accuracy@150
|
173 |
+
- type: cosine_accuracy@200
|
174 |
+
value: 1.0
|
175 |
+
name: Cosine Accuracy@200
|
176 |
+
- type: cosine_precision@1
|
177 |
+
value: 0.7302631578947368
|
178 |
+
name: Cosine Precision@1
|
179 |
+
- type: cosine_precision@20
|
180 |
+
value: 0.49588815789473684
|
181 |
+
name: Cosine Precision@20
|
182 |
+
- type: cosine_precision@50
|
183 |
+
value: 0.3930921052631579
|
184 |
+
name: Cosine Precision@50
|
185 |
+
- type: cosine_precision@100
|
186 |
+
value: 0.311546052631579
|
187 |
+
name: Cosine Precision@100
|
188 |
+
- type: cosine_precision@150
|
189 |
+
value: 0.265219298245614
|
190 |
+
name: Cosine Precision@150
|
191 |
+
- type: cosine_precision@200
|
192 |
+
value: 0.23268092105263163
|
193 |
+
name: Cosine Precision@200
|
194 |
+
- type: cosine_recall@1
|
195 |
+
value: 0.010244630514181254
|
196 |
+
name: Cosine Recall@1
|
197 |
+
- type: cosine_recall@20
|
198 |
+
value: 0.1324589336710221
|
199 |
+
name: Cosine Recall@20
|
200 |
+
- type: cosine_recall@50
|
201 |
+
value: 0.2548099607629461
|
202 |
+
name: Cosine Recall@50
|
203 |
+
- type: cosine_recall@100
|
204 |
+
value: 0.39498767852245736
|
205 |
+
name: Cosine Recall@100
|
206 |
+
- type: cosine_recall@150
|
207 |
+
value: 0.49574169519464223
|
208 |
+
name: Cosine Recall@150
|
209 |
+
- type: cosine_recall@200
|
210 |
+
value: 0.574019804020236
|
211 |
+
name: Cosine Recall@200
|
212 |
+
- type: cosine_ndcg@1
|
213 |
+
value: 0.7302631578947368
|
214 |
+
name: Cosine Ndcg@1
|
215 |
+
- type: cosine_ndcg@20
|
216 |
+
value: 0.5351701323930714
|
217 |
+
name: Cosine Ndcg@20
|
218 |
+
- type: cosine_ndcg@50
|
219 |
+
value: 0.4502625298651447
|
220 |
+
name: Cosine Ndcg@50
|
221 |
+
- type: cosine_ndcg@100
|
222 |
+
value: 0.44247378999755477
|
223 |
+
name: Cosine Ndcg@100
|
224 |
+
- type: cosine_ndcg@150
|
225 |
+
value: 0.48886293038433404
|
226 |
+
name: Cosine Ndcg@150
|
227 |
+
- type: cosine_ndcg@200
|
228 |
+
value: 0.5352268343210608
|
229 |
+
name: Cosine Ndcg@200
|
230 |
+
- type: cosine_mrr@1
|
231 |
+
value: 0.7302631578947368
|
232 |
+
name: Cosine Mrr@1
|
233 |
+
- type: cosine_mrr@20
|
234 |
+
value: 0.8321467731829576
|
235 |
+
name: Cosine Mrr@20
|
236 |
+
- type: cosine_mrr@50
|
237 |
+
value: 0.832296294714058
|
238 |
+
name: Cosine Mrr@50
|
239 |
+
- type: cosine_mrr@100
|
240 |
+
value: 0.8323485085820613
|
241 |
+
name: Cosine Mrr@100
|
242 |
+
- type: cosine_mrr@150
|
243 |
+
value: 0.8323485085820613
|
244 |
+
name: Cosine Mrr@150
|
245 |
+
- type: cosine_mrr@200
|
246 |
+
value: 0.8323485085820613
|
247 |
+
name: Cosine Mrr@200
|
248 |
+
- type: cosine_map@1
|
249 |
+
value: 0.7302631578947368
|
250 |
+
name: Cosine Map@1
|
251 |
+
- type: cosine_map@20
|
252 |
+
value: 0.3411525812655742
|
253 |
+
name: Cosine Map@20
|
254 |
+
- type: cosine_map@50
|
255 |
+
value: 0.23814436251631807
|
256 |
+
name: Cosine Map@50
|
257 |
+
- type: cosine_map@100
|
258 |
+
value: 0.21150798737582682
|
259 |
+
name: Cosine Map@100
|
260 |
+
- type: cosine_map@150
|
261 |
+
value: 0.22868847990327232
|
262 |
+
name: Cosine Map@150
|
263 |
+
- type: cosine_map@200
|
264 |
+
value: 0.2480155691306444
|
265 |
+
name: Cosine Map@200
|
266 |
+
- type: cosine_map@500
|
267 |
+
value: 0.29792672341621373
|
268 |
+
name: Cosine Map@500
|
269 |
+
---
|
270 |
+
|
271 |
+
# SentenceTransformer based on BAAI/bge-large-en-v1.5
|
272 |
+
|
273 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
274 |
+
|
275 |
+
## Model Details
|
276 |
+
|
277 |
+
### Model Description
|
278 |
+
- **Model Type:** Sentence Transformer
|
279 |
+
- **Base model:** [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) <!-- at revision d4aa6901d3a41ba39fb536a557fa166f842b0e09 -->
|
280 |
+
- **Maximum Sequence Length:** 256 tokens
|
281 |
+
- **Output Dimensionality:** 1024 dimensions
|
282 |
+
- **Similarity Function:** Cosine Similarity
|
283 |
+
<!-- - **Training Dataset:** Unknown -->
|
284 |
+
<!-- - **Language:** Unknown -->
|
285 |
+
<!-- - **License:** Unknown -->
|
286 |
+
|
287 |
+
### Model Sources
|
288 |
+
|
289 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
290 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
291 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
292 |
+
|
293 |
+
### Full Model Architecture
|
294 |
+
|
295 |
+
```
|
296 |
+
SentenceTransformer(
|
297 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': True}) with Transformer model: BertModel
|
298 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
299 |
+
(2): Normalize()
|
300 |
+
)
|
301 |
+
```
|
302 |
+
|
303 |
+
## Usage
|
304 |
+
|
305 |
+
### Direct Usage (Sentence Transformers)
|
306 |
+
|
307 |
+
First install the Sentence Transformers library:
|
308 |
+
|
309 |
+
```bash
|
310 |
+
pip install -U sentence-transformers
|
311 |
+
```
|
312 |
+
|
313 |
+
Then you can load this model and run inference.
|
314 |
+
```python
|
315 |
+
from sentence_transformers import SentenceTransformer
|
316 |
+
|
317 |
+
# Download from the 🤗 Hub
|
318 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
319 |
+
# Run inference
|
320 |
+
sentences = [
|
321 |
+
'For roles such as import/export manager, graduate export manager, senior export manager, and other related positions in meat and meat products, the key skills include a strong understanding of international trade regulations, meat product knowledge, customs compliance, and excellent negotiation and communication skills to manage global supply chains effectively. Additionally, proficiency in relevant trade software and languages can be highly beneficial.',
|
322 |
+
'Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager require skills in conducting performance measurement and organizing or managing conversion testing to ensure software and systems meet performance standards and function correctly in real-world scenarios.',
|
323 |
+
'Job roles that require skills such as managing staff, coordinating employees, and performing HR activities include Human Resources Managers, Team Leaders, Supervisors, and Department Heads, all of whom are responsible for overseeing personnel, implementing HR policies, and ensuring efficient team operations.',
|
324 |
+
]
|
325 |
+
embeddings = model.encode(sentences)
|
326 |
+
print(embeddings.shape)
|
327 |
+
# [3, 1024]
|
328 |
+
|
329 |
+
# Get the similarity scores for the embeddings
|
330 |
+
similarities = model.similarity(embeddings, embeddings)
|
331 |
+
print(similarities.shape)
|
332 |
+
# [3, 3]
|
333 |
+
```
|
334 |
+
|
335 |
+
<!--
|
336 |
+
### Direct Usage (Transformers)
|
337 |
+
|
338 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
339 |
+
|
340 |
+
</details>
|
341 |
+
-->
|
342 |
+
|
343 |
+
<!--
|
344 |
+
### Downstream Usage (Sentence Transformers)
|
345 |
+
|
346 |
+
You can finetune this model on your own dataset.
|
347 |
+
|
348 |
+
<details><summary>Click to expand</summary>
|
349 |
+
|
350 |
+
</details>
|
351 |
+
-->
|
352 |
+
|
353 |
+
<!--
|
354 |
+
### Out-of-Scope Use
|
355 |
+
|
356 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
357 |
+
-->
|
358 |
+
|
359 |
+
## Evaluation
|
360 |
+
|
361 |
+
### Metrics
|
362 |
+
|
363 |
+
#### Information Retrieval
|
364 |
+
|
365 |
+
* Dataset: `full_en`
|
366 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
367 |
+
|
368 |
+
| Metric | Value |
|
369 |
+
|:---------------------|:-----------|
|
370 |
+
| cosine_accuracy@1 | 0.7303 |
|
371 |
+
| cosine_accuracy@20 | 0.9934 |
|
372 |
+
| cosine_accuracy@50 | 0.9967 |
|
373 |
+
| cosine_accuracy@100 | 1.0 |
|
374 |
+
| cosine_accuracy@150 | 1.0 |
|
375 |
+
| cosine_accuracy@200 | 1.0 |
|
376 |
+
| cosine_precision@1 | 0.7303 |
|
377 |
+
| cosine_precision@20 | 0.4959 |
|
378 |
+
| cosine_precision@50 | 0.3931 |
|
379 |
+
| cosine_precision@100 | 0.3115 |
|
380 |
+
| cosine_precision@150 | 0.2652 |
|
381 |
+
| cosine_precision@200 | 0.2327 |
|
382 |
+
| cosine_recall@1 | 0.0102 |
|
383 |
+
| cosine_recall@20 | 0.1325 |
|
384 |
+
| cosine_recall@50 | 0.2548 |
|
385 |
+
| cosine_recall@100 | 0.395 |
|
386 |
+
| cosine_recall@150 | 0.4957 |
|
387 |
+
| cosine_recall@200 | 0.574 |
|
388 |
+
| cosine_ndcg@1 | 0.7303 |
|
389 |
+
| cosine_ndcg@20 | 0.5352 |
|
390 |
+
| cosine_ndcg@50 | 0.4503 |
|
391 |
+
| cosine_ndcg@100 | 0.4425 |
|
392 |
+
| cosine_ndcg@150 | 0.4889 |
|
393 |
+
| **cosine_ndcg@200** | **0.5352** |
|
394 |
+
| cosine_mrr@1 | 0.7303 |
|
395 |
+
| cosine_mrr@20 | 0.8321 |
|
396 |
+
| cosine_mrr@50 | 0.8323 |
|
397 |
+
| cosine_mrr@100 | 0.8323 |
|
398 |
+
| cosine_mrr@150 | 0.8323 |
|
399 |
+
| cosine_mrr@200 | 0.8323 |
|
400 |
+
| cosine_map@1 | 0.7303 |
|
401 |
+
| cosine_map@20 | 0.3412 |
|
402 |
+
| cosine_map@50 | 0.2381 |
|
403 |
+
| cosine_map@100 | 0.2115 |
|
404 |
+
| cosine_map@150 | 0.2287 |
|
405 |
+
| cosine_map@200 | 0.248 |
|
406 |
+
| cosine_map@500 | 0.2979 |
|
407 |
+
|
408 |
+
<!--
|
409 |
+
## Bias, Risks and Limitations
|
410 |
+
|
411 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
412 |
+
-->
|
413 |
+
|
414 |
+
<!--
|
415 |
+
### Recommendations
|
416 |
+
|
417 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
418 |
+
-->
|
419 |
+
|
420 |
+
## Training Details
|
421 |
+
|
422 |
+
### Training Dataset
|
423 |
+
|
424 |
+
#### Unnamed Dataset
|
425 |
+
|
426 |
+
* Size: 114,699 training samples
|
427 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
428 |
+
* Approximate statistics based on the first 1000 samples:
|
429 |
+
| | anchor | positive |
|
430 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
431 |
+
| type | string | string |
|
432 |
+
| details | <ul><li>min: 43 tokens</li><li>mean: 65.45 tokens</li><li>max: 116 tokens</li></ul> | <ul><li>min: 34 tokens</li><li>mean: 55.34 tokens</li><li>max: 162 tokens</li></ul> |
|
433 |
+
* Samples:
|
434 |
+
| anchor | positive |
|
435 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
436 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require promoting health and safety include occupational health and safety specialists, safety managers, and public health educators, all of whom work to ensure safe and healthy environments in workplaces and communities.</code> |
|
437 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require organizing rehearsals include directors, choreographers, and conductors in theater, dance, and music ensembles, who must efficiently plan and schedule practice sessions to prepare performers for a successful final performance.</code> |
|
438 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles such as Health and Safety Managers, Environmental Health Officers, and Risk Management Specialists often require the skill of negotiating health and safety issues with third parties to ensure compliance and protection standards are met across different organizations and sites.</code> |
|
439 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
440 |
+
```json
|
441 |
+
{'guide': SentenceTransformer(
|
442 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
443 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
444 |
+
(2): Normalize()
|
445 |
+
), 'temperature': 0.01, 'mini_batch_size': 32, 'margin_strategy': 'absolute', 'margin': 0.0}
|
446 |
+
```
|
447 |
+
|
448 |
+
### Training Hyperparameters
|
449 |
+
#### Non-Default Hyperparameters
|
450 |
+
|
451 |
+
- `eval_strategy`: steps
|
452 |
+
- `per_device_train_batch_size`: 64
|
453 |
+
- `per_device_eval_batch_size`: 128
|
454 |
+
- `gradient_accumulation_steps`: 2
|
455 |
+
- `num_train_epochs`: 5
|
456 |
+
- `warmup_ratio`: 0.05
|
457 |
+
- `log_on_each_node`: False
|
458 |
+
- `fp16`: True
|
459 |
+
- `dataloader_num_workers`: 4
|
460 |
+
- `ddp_find_unused_parameters`: True
|
461 |
+
- `batch_sampler`: no_duplicates
|
462 |
+
|
463 |
+
#### All Hyperparameters
|
464 |
+
<details><summary>Click to expand</summary>
|
465 |
+
|
466 |
+
- `overwrite_output_dir`: False
|
467 |
+
- `do_predict`: False
|
468 |
+
- `eval_strategy`: steps
|
469 |
+
- `prediction_loss_only`: True
|
470 |
+
- `per_device_train_batch_size`: 64
|
471 |
+
- `per_device_eval_batch_size`: 128
|
472 |
+
- `per_gpu_train_batch_size`: None
|
473 |
+
- `per_gpu_eval_batch_size`: None
|
474 |
+
- `gradient_accumulation_steps`: 2
|
475 |
+
- `eval_accumulation_steps`: None
|
476 |
+
- `torch_empty_cache_steps`: None
|
477 |
+
- `learning_rate`: 5e-05
|
478 |
+
- `weight_decay`: 0.0
|
479 |
+
- `adam_beta1`: 0.9
|
480 |
+
- `adam_beta2`: 0.999
|
481 |
+
- `adam_epsilon`: 1e-08
|
482 |
+
- `max_grad_norm`: 1.0
|
483 |
+
- `num_train_epochs`: 5
|
484 |
+
- `max_steps`: -1
|
485 |
+
- `lr_scheduler_type`: linear
|
486 |
+
- `lr_scheduler_kwargs`: {}
|
487 |
+
- `warmup_ratio`: 0.05
|
488 |
+
- `warmup_steps`: 0
|
489 |
+
- `log_level`: passive
|
490 |
+
- `log_level_replica`: warning
|
491 |
+
- `log_on_each_node`: False
|
492 |
+
- `logging_nan_inf_filter`: True
|
493 |
+
- `save_safetensors`: True
|
494 |
+
- `save_on_each_node`: False
|
495 |
+
- `save_only_model`: False
|
496 |
+
- `restore_callback_states_from_checkpoint`: False
|
497 |
+
- `no_cuda`: False
|
498 |
+
- `use_cpu`: False
|
499 |
+
- `use_mps_device`: False
|
500 |
+
- `seed`: 42
|
501 |
+
- `data_seed`: None
|
502 |
+
- `jit_mode_eval`: False
|
503 |
+
- `use_ipex`: False
|
504 |
+
- `bf16`: False
|
505 |
+
- `fp16`: True
|
506 |
+
- `fp16_opt_level`: O1
|
507 |
+
- `half_precision_backend`: auto
|
508 |
+
- `bf16_full_eval`: False
|
509 |
+
- `fp16_full_eval`: False
|
510 |
+
- `tf32`: None
|
511 |
+
- `local_rank`: 0
|
512 |
+
- `ddp_backend`: None
|
513 |
+
- `tpu_num_cores`: None
|
514 |
+
- `tpu_metrics_debug`: False
|
515 |
+
- `debug`: []
|
516 |
+
- `dataloader_drop_last`: True
|
517 |
+
- `dataloader_num_workers`: 4
|
518 |
+
- `dataloader_prefetch_factor`: None
|
519 |
+
- `past_index`: -1
|
520 |
+
- `disable_tqdm`: False
|
521 |
+
- `remove_unused_columns`: True
|
522 |
+
- `label_names`: None
|
523 |
+
- `load_best_model_at_end`: False
|
524 |
+
- `ignore_data_skip`: False
|
525 |
+
- `fsdp`: []
|
526 |
+
- `fsdp_min_num_params`: 0
|
527 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
528 |
+
- `tp_size`: 0
|
529 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
530 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
531 |
+
- `deepspeed`: None
|
532 |
+
- `label_smoothing_factor`: 0.0
|
533 |
+
- `optim`: adamw_torch
|
534 |
+
- `optim_args`: None
|
535 |
+
- `adafactor`: False
|
536 |
+
- `group_by_length`: False
|
537 |
+
- `length_column_name`: length
|
538 |
+
- `ddp_find_unused_parameters`: True
|
539 |
+
- `ddp_bucket_cap_mb`: None
|
540 |
+
- `ddp_broadcast_buffers`: False
|
541 |
+
- `dataloader_pin_memory`: True
|
542 |
+
- `dataloader_persistent_workers`: False
|
543 |
+
- `skip_memory_metrics`: True
|
544 |
+
- `use_legacy_prediction_loop`: False
|
545 |
+
- `push_to_hub`: False
|
546 |
+
- `resume_from_checkpoint`: None
|
547 |
+
- `hub_model_id`: None
|
548 |
+
- `hub_strategy`: every_save
|
549 |
+
- `hub_private_repo`: None
|
550 |
+
- `hub_always_push`: False
|
551 |
+
- `gradient_checkpointing`: False
|
552 |
+
- `gradient_checkpointing_kwargs`: None
|
553 |
+
- `include_inputs_for_metrics`: False
|
554 |
+
- `include_for_metrics`: []
|
555 |
+
- `eval_do_concat_batches`: True
|
556 |
+
- `fp16_backend`: auto
|
557 |
+
- `push_to_hub_model_id`: None
|
558 |
+
- `push_to_hub_organization`: None
|
559 |
+
- `mp_parameters`:
|
560 |
+
- `auto_find_batch_size`: False
|
561 |
+
- `full_determinism`: False
|
562 |
+
- `torchdynamo`: None
|
563 |
+
- `ray_scope`: last
|
564 |
+
- `ddp_timeout`: 1800
|
565 |
+
- `torch_compile`: False
|
566 |
+
- `torch_compile_backend`: None
|
567 |
+
- `torch_compile_mode`: None
|
568 |
+
- `include_tokens_per_second`: False
|
569 |
+
- `include_num_input_tokens_seen`: False
|
570 |
+
- `neftune_noise_alpha`: None
|
571 |
+
- `optim_target_modules`: None
|
572 |
+
- `batch_eval_metrics`: False
|
573 |
+
- `eval_on_start`: False
|
574 |
+
- `use_liger_kernel`: False
|
575 |
+
- `eval_use_gather_object`: False
|
576 |
+
- `average_tokens_across_devices`: False
|
577 |
+
- `prompts`: None
|
578 |
+
- `batch_sampler`: no_duplicates
|
579 |
+
- `multi_dataset_batch_sampler`: proportional
|
580 |
+
|
581 |
+
</details>
|
582 |
+
|
583 |
+
### Training Logs
|
584 |
+
| Epoch | Step | Training Loss | full_en_cosine_ndcg@200 |
|
585 |
+
|:------:|:----:|:-------------:|:-----------------------:|
|
586 |
+
| -1 | -1 | - | 0.4784 |
|
587 |
+
| 0.0011 | 1 | 9.119 | - |
|
588 |
+
| 0.1116 | 100 | 4.1469 | - |
|
589 |
+
| 0.2232 | 200 | 2.5294 | 0.5362 |
|
590 |
+
| 0.3348 | 300 | 2.3611 | - |
|
591 |
+
| 0.4464 | 400 | 2.192 | 0.5318 |
|
592 |
+
| 0.5580 | 500 | 2.0338 | - |
|
593 |
+
| 0.6696 | 600 | 1.9009 | 0.5383 |
|
594 |
+
| 0.7812 | 700 | 1.8404 | - |
|
595 |
+
| 0.8929 | 800 | 1.7692 | 0.5352 |
|
596 |
+
| 1.0045 | 900 | 1.6921 | - |
|
597 |
+
| 1.1161 | 1000 | 1.3861 | 0.5368 |
|
598 |
+
| 1.2277 | 1100 | 1.3863 | - |
|
599 |
+
| 1.3393 | 1200 | 1.3546 | 0.5259 |
|
600 |
+
| 1.4509 | 1300 | 1.373 | - |
|
601 |
+
| 1.5625 | 1400 | 1.3364 | 0.5303 |
|
602 |
+
| 1.6741 | 1500 | 1.2876 | - |
|
603 |
+
| 1.7857 | 1600 | 1.3094 | 0.5323 |
|
604 |
+
| 1.8973 | 1700 | 1.2784 | - |
|
605 |
+
| 2.0089 | 1800 | 1.2204 | 0.5330 |
|
606 |
+
| 2.1205 | 1900 | 0.9617 | - |
|
607 |
+
| 2.2321 | 2000 | 1.0004 | 0.5277 |
|
608 |
+
| 2.3438 | 2100 | 0.9694 | - |
|
609 |
+
| 2.4554 | 2200 | 0.9843 | 0.5356 |
|
610 |
+
| 2.5670 | 2300 | 0.9743 | - |
|
611 |
+
| 2.6786 | 2400 | 0.9252 | 0.5320 |
|
612 |
+
| 2.7902 | 2500 | 0.9272 | - |
|
613 |
+
| 2.9018 | 2600 | 0.9279 | 0.5333 |
|
614 |
+
| 3.0134 | 2700 | 0.857 | - |
|
615 |
+
| 3.125 | 2800 | 0.7313 | 0.5300 |
|
616 |
+
| 3.2366 | 2900 | 0.7103 | - |
|
617 |
+
| 3.3482 | 3000 | 0.7187 | 0.5319 |
|
618 |
+
| 3.4598 | 3100 | 0.7067 | - |
|
619 |
+
| 3.5714 | 3200 | 0.7157 | 0.5369 |
|
620 |
+
| 3.6830 | 3300 | 0.7113 | - |
|
621 |
+
| 3.7946 | 3400 | 0.7013 | 0.5341 |
|
622 |
+
| 3.9062 | 3500 | 0.6903 | - |
|
623 |
+
| 4.0179 | 3600 | 0.6462 | 0.5335 |
|
624 |
+
| 4.1295 | 3700 | 0.5162 | - |
|
625 |
+
| 4.2411 | 3800 | 0.524 | 0.5352 |
|
626 |
+
|
627 |
+
|
628 |
+
### Framework Versions
|
629 |
+
- Python: 3.11.11
|
630 |
+
- Sentence Transformers: 4.1.0
|
631 |
+
- Transformers: 4.51.2
|
632 |
+
- PyTorch: 2.6.0+cu124
|
633 |
+
- Accelerate: 1.6.0
|
634 |
+
- Datasets: 3.5.0
|
635 |
+
- Tokenizers: 0.21.1
|
636 |
+
|
637 |
+
## Citation
|
638 |
+
|
639 |
+
### BibTeX
|
640 |
+
|
641 |
+
#### Sentence Transformers
|
642 |
+
```bibtex
|
643 |
+
@inproceedings{reimers-2019-sentence-bert,
|
644 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
645 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
646 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
647 |
+
month = "11",
|
648 |
+
year = "2019",
|
649 |
+
publisher = "Association for Computational Linguistics",
|
650 |
+
url = "https://arxiv.org/abs/1908.10084",
|
651 |
+
}
|
652 |
+
```
|
653 |
+
|
654 |
+
<!--
|
655 |
+
## Glossary
|
656 |
+
|
657 |
+
*Clearly define terms in order to be accessible across audiences.*
|
658 |
+
-->
|
659 |
+
|
660 |
+
<!--
|
661 |
+
## Model Card Authors
|
662 |
+
|
663 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
664 |
+
-->
|
665 |
+
|
666 |
+
<!--
|
667 |
+
## Model Card Contact
|
668 |
+
|
669 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
670 |
+
-->
|
checkpoint-3800/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 1024,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 4096,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 16,
|
23 |
+
"num_hidden_layers": 24,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.51.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
checkpoint-3800/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "4.1.0",
|
4 |
+
"transformers": "4.51.2",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
checkpoint-3800/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
checkpoint-3800/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f937324527cebf777b8a7c7c1cd4f3c8a8a5910a9fb89ba80088a100ca5ca116
|
3 |
+
size 15894
|
checkpoint-3800/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:88a258ff813fa9a6dcf72f0b33c9d959565b77ec7f8de44485779ae4211121fe
|
3 |
+
size 988
|
checkpoint-3800/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ca359da081832058c653aaf3bc709bd71d0e81bbe104ff0c207b33b11eb9c3c
|
3 |
+
size 1064
|
checkpoint-3800/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
checkpoint-3800/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
checkpoint-3800/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3800/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
checkpoint-3800/trainer_state.json
ADDED
@@ -0,0 +1,1162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": null,
|
3 |
+
"best_metric": null,
|
4 |
+
"best_model_checkpoint": null,
|
5 |
+
"epoch": 4.241071428571429,
|
6 |
+
"eval_steps": 200,
|
7 |
+
"global_step": 3800,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.0011160714285714285,
|
14 |
+
"grad_norm": NaN,
|
15 |
+
"learning_rate": 0.0,
|
16 |
+
"loss": 9.119,
|
17 |
+
"step": 1
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.11160714285714286,
|
21 |
+
"grad_norm": 17.632238388061523,
|
22 |
+
"learning_rate": 2.1428571428571428e-05,
|
23 |
+
"loss": 4.1469,
|
24 |
+
"step": 100
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.22321428571428573,
|
28 |
+
"grad_norm": 14.409270286560059,
|
29 |
+
"learning_rate": 4.375e-05,
|
30 |
+
"loss": 2.5294,
|
31 |
+
"step": 200
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 0.22321428571428573,
|
35 |
+
"eval_full_en_cosine_accuracy@1": 0.7467105263157895,
|
36 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
37 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
38 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
39 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
40 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
41 |
+
"eval_full_en_cosine_map@1": 0.7467105263157895,
|
42 |
+
"eval_full_en_cosine_map@100": 0.2121058701298033,
|
43 |
+
"eval_full_en_cosine_map@150": 0.2294109301872967,
|
44 |
+
"eval_full_en_cosine_map@20": 0.34167650006204187,
|
45 |
+
"eval_full_en_cosine_map@200": 0.2492171685943861,
|
46 |
+
"eval_full_en_cosine_map@50": 0.237336657426832,
|
47 |
+
"eval_full_en_cosine_map@500": 0.3000288940307502,
|
48 |
+
"eval_full_en_cosine_mrr@1": 0.7467105263157895,
|
49 |
+
"eval_full_en_cosine_mrr@100": 0.8460592769803298,
|
50 |
+
"eval_full_en_cosine_mrr@150": 0.8460592769803298,
|
51 |
+
"eval_full_en_cosine_mrr@20": 0.8458948032961192,
|
52 |
+
"eval_full_en_cosine_mrr@200": 0.8460592769803298,
|
53 |
+
"eval_full_en_cosine_mrr@50": 0.8460122844991269,
|
54 |
+
"eval_full_en_cosine_ndcg@1": 0.7467105263157895,
|
55 |
+
"eval_full_en_cosine_ndcg@100": 0.4430509248084704,
|
56 |
+
"eval_full_en_cosine_ndcg@150": 0.4894828917681416,
|
57 |
+
"eval_full_en_cosine_ndcg@20": 0.5367541274871807,
|
58 |
+
"eval_full_en_cosine_ndcg@200": 0.5361903606133726,
|
59 |
+
"eval_full_en_cosine_ndcg@50": 0.448683811733402,
|
60 |
+
"eval_full_en_cosine_precision@1": 0.7467105263157895,
|
61 |
+
"eval_full_en_cosine_precision@100": 0.31240131578947367,
|
62 |
+
"eval_full_en_cosine_precision@150": 0.26592105263157895,
|
63 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
64 |
+
"eval_full_en_cosine_precision@200": 0.23370065789473685,
|
65 |
+
"eval_full_en_cosine_precision@50": 0.3904605263157895,
|
66 |
+
"eval_full_en_cosine_recall@1": 0.010753343030902496,
|
67 |
+
"eval_full_en_cosine_recall@100": 0.39446255566624855,
|
68 |
+
"eval_full_en_cosine_recall@150": 0.49544823712709557,
|
69 |
+
"eval_full_en_cosine_recall@20": 0.13279013317825217,
|
70 |
+
"eval_full_en_cosine_recall@200": 0.5739614992682516,
|
71 |
+
"eval_full_en_cosine_recall@50": 0.25254843470147753,
|
72 |
+
"eval_runtime": 1.5828,
|
73 |
+
"eval_samples_per_second": 0.0,
|
74 |
+
"eval_sequential_score": 0.5361903606133726,
|
75 |
+
"eval_steps_per_second": 0.0,
|
76 |
+
"step": 200
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 0.33482142857142855,
|
80 |
+
"grad_norm": 16.260934829711914,
|
81 |
+
"learning_rate": 4.915413533834587e-05,
|
82 |
+
"loss": 2.3611,
|
83 |
+
"step": 300
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 0.44642857142857145,
|
87 |
+
"grad_norm": 13.242988586425781,
|
88 |
+
"learning_rate": 4.797932330827068e-05,
|
89 |
+
"loss": 2.192,
|
90 |
+
"step": 400
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"epoch": 0.44642857142857145,
|
94 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
95 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
96 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
97 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
98 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
99 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
100 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
101 |
+
"eval_full_en_cosine_map@100": 0.2088144416212806,
|
102 |
+
"eval_full_en_cosine_map@150": 0.22677217670719133,
|
103 |
+
"eval_full_en_cosine_map@20": 0.3349832137166454,
|
104 |
+
"eval_full_en_cosine_map@200": 0.245946497368659,
|
105 |
+
"eval_full_en_cosine_map@50": 0.23473921202287384,
|
106 |
+
"eval_full_en_cosine_map@500": 0.2973985707303743,
|
107 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
108 |
+
"eval_full_en_cosine_mrr@100": 0.8394156306336016,
|
109 |
+
"eval_full_en_cosine_mrr@150": 0.8394156306336016,
|
110 |
+
"eval_full_en_cosine_mrr@20": 0.8392713554720135,
|
111 |
+
"eval_full_en_cosine_mrr@200": 0.8394156306336016,
|
112 |
+
"eval_full_en_cosine_mrr@50": 0.8393810045948205,
|
113 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
114 |
+
"eval_full_en_cosine_ndcg@100": 0.43855475512592684,
|
115 |
+
"eval_full_en_cosine_ndcg@150": 0.48609390907359196,
|
116 |
+
"eval_full_en_cosine_ndcg@20": 0.5288083416910968,
|
117 |
+
"eval_full_en_cosine_ndcg@200": 0.5318117937684201,
|
118 |
+
"eval_full_en_cosine_ndcg@50": 0.4453338982563473,
|
119 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
120 |
+
"eval_full_en_cosine_precision@100": 0.3088157894736842,
|
121 |
+
"eval_full_en_cosine_precision@150": 0.2644517543859649,
|
122 |
+
"eval_full_en_cosine_precision@20": 0.4875,
|
123 |
+
"eval_full_en_cosine_precision@200": 0.23172697368421055,
|
124 |
+
"eval_full_en_cosine_precision@50": 0.38782894736842105,
|
125 |
+
"eval_full_en_cosine_recall@1": 0.010619007443519193,
|
126 |
+
"eval_full_en_cosine_recall@100": 0.3902042311088277,
|
127 |
+
"eval_full_en_cosine_recall@150": 0.4925745165667779,
|
128 |
+
"eval_full_en_cosine_recall@20": 0.1301764615450556,
|
129 |
+
"eval_full_en_cosine_recall@200": 0.5696006364444781,
|
130 |
+
"eval_full_en_cosine_recall@50": 0.2518199886564403,
|
131 |
+
"eval_runtime": 1.5596,
|
132 |
+
"eval_samples_per_second": 0.0,
|
133 |
+
"eval_sequential_score": 0.5318117937684201,
|
134 |
+
"eval_steps_per_second": 0.0,
|
135 |
+
"step": 400
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.5580357142857143,
|
139 |
+
"grad_norm": 13.307888984680176,
|
140 |
+
"learning_rate": 4.680451127819549e-05,
|
141 |
+
"loss": 2.0338,
|
142 |
+
"step": 500
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.6696428571428571,
|
146 |
+
"grad_norm": 12.763930320739746,
|
147 |
+
"learning_rate": 4.56296992481203e-05,
|
148 |
+
"loss": 1.9009,
|
149 |
+
"step": 600
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.6696428571428571,
|
153 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
154 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
155 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
156 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
157 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
158 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
159 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
160 |
+
"eval_full_en_cosine_map@100": 0.2146410944227793,
|
161 |
+
"eval_full_en_cosine_map@150": 0.23271596511985665,
|
162 |
+
"eval_full_en_cosine_map@20": 0.3429678297332613,
|
163 |
+
"eval_full_en_cosine_map@200": 0.2520997707361607,
|
164 |
+
"eval_full_en_cosine_map@50": 0.2404899713826549,
|
165 |
+
"eval_full_en_cosine_map@500": 0.302904619520322,
|
166 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
167 |
+
"eval_full_en_cosine_mrr@100": 0.8306572094298247,
|
168 |
+
"eval_full_en_cosine_mrr@150": 0.8306572094298247,
|
169 |
+
"eval_full_en_cosine_mrr@20": 0.8304491697994989,
|
170 |
+
"eval_full_en_cosine_mrr@200": 0.8306572094298247,
|
171 |
+
"eval_full_en_cosine_mrr@50": 0.8306058114035089,
|
172 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
173 |
+
"eval_full_en_cosine_ndcg@100": 0.4445617284976941,
|
174 |
+
"eval_full_en_cosine_ndcg@150": 0.4922393935902775,
|
175 |
+
"eval_full_en_cosine_ndcg@20": 0.5357880041966661,
|
176 |
+
"eval_full_en_cosine_ndcg@200": 0.5383209000398446,
|
177 |
+
"eval_full_en_cosine_ndcg@50": 0.4504820590447715,
|
178 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
179 |
+
"eval_full_en_cosine_precision@100": 0.31358552631578945,
|
180 |
+
"eval_full_en_cosine_precision@150": 0.2677412280701754,
|
181 |
+
"eval_full_en_cosine_precision@20": 0.49720394736842105,
|
182 |
+
"eval_full_en_cosine_precision@200": 0.23452302631578953,
|
183 |
+
"eval_full_en_cosine_precision@50": 0.3932894736842105,
|
184 |
+
"eval_full_en_cosine_recall@1": 0.010303516134180577,
|
185 |
+
"eval_full_en_cosine_recall@100": 0.3970033142271577,
|
186 |
+
"eval_full_en_cosine_recall@150": 0.5001101850184368,
|
187 |
+
"eval_full_en_cosine_recall@20": 0.13302896177814508,
|
188 |
+
"eval_full_en_cosine_recall@200": 0.5777429812058247,
|
189 |
+
"eval_full_en_cosine_recall@50": 0.254528957048419,
|
190 |
+
"eval_runtime": 1.5616,
|
191 |
+
"eval_samples_per_second": 0.0,
|
192 |
+
"eval_sequential_score": 0.5383209000398446,
|
193 |
+
"eval_steps_per_second": 0.0,
|
194 |
+
"step": 600
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 0.78125,
|
198 |
+
"grad_norm": 13.439990997314453,
|
199 |
+
"learning_rate": 4.4454887218045117e-05,
|
200 |
+
"loss": 1.8404,
|
201 |
+
"step": 700
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 0.8928571428571429,
|
205 |
+
"grad_norm": 12.594465255737305,
|
206 |
+
"learning_rate": 4.3280075187969924e-05,
|
207 |
+
"loss": 1.7692,
|
208 |
+
"step": 800
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 0.8928571428571429,
|
212 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
213 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
214 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
215 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
216 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
217 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
218 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
219 |
+
"eval_full_en_cosine_map@100": 0.21030614519224017,
|
220 |
+
"eval_full_en_cosine_map@150": 0.22737063252522982,
|
221 |
+
"eval_full_en_cosine_map@20": 0.3442880676713117,
|
222 |
+
"eval_full_en_cosine_map@200": 0.24764067563282596,
|
223 |
+
"eval_full_en_cosine_map@50": 0.23827484272575025,
|
224 |
+
"eval_full_en_cosine_map@500": 0.2987091429260604,
|
225 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
226 |
+
"eval_full_en_cosine_mrr@100": 0.8404268619187053,
|
227 |
+
"eval_full_en_cosine_mrr@150": 0.8404268619187053,
|
228 |
+
"eval_full_en_cosine_mrr@20": 0.8402307852965749,
|
229 |
+
"eval_full_en_cosine_mrr@200": 0.8404268619187053,
|
230 |
+
"eval_full_en_cosine_mrr@50": 0.8403738058915406,
|
231 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
232 |
+
"eval_full_en_cosine_ndcg@100": 0.440670430732987,
|
233 |
+
"eval_full_en_cosine_ndcg@150": 0.486778222456143,
|
234 |
+
"eval_full_en_cosine_ndcg@20": 0.5383903905850532,
|
235 |
+
"eval_full_en_cosine_ndcg@200": 0.5352292016764449,
|
236 |
+
"eval_full_en_cosine_ndcg@50": 0.45046850998342597,
|
237 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
238 |
+
"eval_full_en_cosine_precision@100": 0.3099342105263158,
|
239 |
+
"eval_full_en_cosine_precision@150": 0.26390350877192986,
|
240 |
+
"eval_full_en_cosine_precision@20": 0.5,
|
241 |
+
"eval_full_en_cosine_precision@200": 0.23320723684210526,
|
242 |
+
"eval_full_en_cosine_precision@50": 0.39335526315789476,
|
243 |
+
"eval_full_en_cosine_recall@1": 0.01051277780149725,
|
244 |
+
"eval_full_en_cosine_recall@100": 0.39158535797000443,
|
245 |
+
"eval_full_en_cosine_recall@150": 0.4917399858788313,
|
246 |
+
"eval_full_en_cosine_recall@20": 0.13328036442285973,
|
247 |
+
"eval_full_en_cosine_recall@200": 0.5734492892933252,
|
248 |
+
"eval_full_en_cosine_recall@50": 0.254129727850083,
|
249 |
+
"eval_runtime": 1.5752,
|
250 |
+
"eval_samples_per_second": 0.0,
|
251 |
+
"eval_sequential_score": 0.5352292016764449,
|
252 |
+
"eval_steps_per_second": 0.0,
|
253 |
+
"step": 800
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 1.0044642857142858,
|
257 |
+
"grad_norm": 13.140974998474121,
|
258 |
+
"learning_rate": 4.212875939849624e-05,
|
259 |
+
"loss": 1.6921,
|
260 |
+
"step": 900
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 1.1160714285714286,
|
264 |
+
"grad_norm": 12.160736083984375,
|
265 |
+
"learning_rate": 4.096569548872181e-05,
|
266 |
+
"loss": 1.3861,
|
267 |
+
"step": 1000
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"epoch": 1.1160714285714286,
|
271 |
+
"eval_full_en_cosine_accuracy@1": 0.7401315789473685,
|
272 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
273 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
274 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
275 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
276 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
277 |
+
"eval_full_en_cosine_map@1": 0.7401315789473685,
|
278 |
+
"eval_full_en_cosine_map@100": 0.21155466872463927,
|
279 |
+
"eval_full_en_cosine_map@150": 0.2291636549745022,
|
280 |
+
"eval_full_en_cosine_map@20": 0.3373673798048492,
|
281 |
+
"eval_full_en_cosine_map@200": 0.24905074192004603,
|
282 |
+
"eval_full_en_cosine_map@50": 0.2376950112180141,
|
283 |
+
"eval_full_en_cosine_map@500": 0.3006802538137734,
|
284 |
+
"eval_full_en_cosine_mrr@1": 0.7401315789473685,
|
285 |
+
"eval_full_en_cosine_mrr@100": 0.8405236576289212,
|
286 |
+
"eval_full_en_cosine_mrr@150": 0.8405236576289212,
|
287 |
+
"eval_full_en_cosine_mrr@20": 0.8403143274853806,
|
288 |
+
"eval_full_en_cosine_mrr@200": 0.8405236576289212,
|
289 |
+
"eval_full_en_cosine_mrr@50": 0.840463849016481,
|
290 |
+
"eval_full_en_cosine_ndcg@1": 0.7401315789473685,
|
291 |
+
"eval_full_en_cosine_ndcg@100": 0.44212858816477746,
|
292 |
+
"eval_full_en_cosine_ndcg@150": 0.48946706445562127,
|
293 |
+
"eval_full_en_cosine_ndcg@20": 0.5332180756481385,
|
294 |
+
"eval_full_en_cosine_ndcg@200": 0.5367929588661781,
|
295 |
+
"eval_full_en_cosine_ndcg@50": 0.44979391873656477,
|
296 |
+
"eval_full_en_cosine_precision@1": 0.7401315789473685,
|
297 |
+
"eval_full_en_cosine_precision@100": 0.3114473684210526,
|
298 |
+
"eval_full_en_cosine_precision@150": 0.266469298245614,
|
299 |
+
"eval_full_en_cosine_precision@20": 0.49243421052631575,
|
300 |
+
"eval_full_en_cosine_precision@200": 0.2345888157894737,
|
301 |
+
"eval_full_en_cosine_precision@50": 0.3921052631578947,
|
302 |
+
"eval_full_en_cosine_recall@1": 0.010392607884295562,
|
303 |
+
"eval_full_en_cosine_recall@100": 0.3933254279416559,
|
304 |
+
"eval_full_en_cosine_recall@150": 0.4957503189606009,
|
305 |
+
"eval_full_en_cosine_recall@20": 0.13107623492706288,
|
306 |
+
"eval_full_en_cosine_recall@200": 0.5753954619760326,
|
307 |
+
"eval_full_en_cosine_recall@50": 0.2539746341397596,
|
308 |
+
"eval_runtime": 1.6397,
|
309 |
+
"eval_samples_per_second": 0.0,
|
310 |
+
"eval_sequential_score": 0.5367929588661781,
|
311 |
+
"eval_steps_per_second": 0.0,
|
312 |
+
"step": 1000
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"epoch": 1.2276785714285714,
|
316 |
+
"grad_norm": 13.078369140625,
|
317 |
+
"learning_rate": 3.9790883458646615e-05,
|
318 |
+
"loss": 1.3863,
|
319 |
+
"step": 1100
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"epoch": 1.3392857142857144,
|
323 |
+
"grad_norm": 11.990692138671875,
|
324 |
+
"learning_rate": 3.861607142857143e-05,
|
325 |
+
"loss": 1.3546,
|
326 |
+
"step": 1200
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"epoch": 1.3392857142857144,
|
330 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
331 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
332 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
333 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
334 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
335 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
336 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
337 |
+
"eval_full_en_cosine_map@100": 0.20552277525856266,
|
338 |
+
"eval_full_en_cosine_map@150": 0.22274311961933413,
|
339 |
+
"eval_full_en_cosine_map@20": 0.3363904557549852,
|
340 |
+
"eval_full_en_cosine_map@200": 0.24106738760441354,
|
341 |
+
"eval_full_en_cosine_map@50": 0.23370113464760453,
|
342 |
+
"eval_full_en_cosine_map@500": 0.28981293048421486,
|
343 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
344 |
+
"eval_full_en_cosine_mrr@100": 0.8322617799738206,
|
345 |
+
"eval_full_en_cosine_mrr@150": 0.8322617799738206,
|
346 |
+
"eval_full_en_cosine_mrr@20": 0.8320620443153339,
|
347 |
+
"eval_full_en_cosine_mrr@200": 0.8322617799738206,
|
348 |
+
"eval_full_en_cosine_mrr@50": 0.8322050649102997,
|
349 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
350 |
+
"eval_full_en_cosine_ndcg@100": 0.43445871937106545,
|
351 |
+
"eval_full_en_cosine_ndcg@150": 0.48130417146010107,
|
352 |
+
"eval_full_en_cosine_ndcg@20": 0.531477407982968,
|
353 |
+
"eval_full_en_cosine_ndcg@200": 0.5259375639543232,
|
354 |
+
"eval_full_en_cosine_ndcg@50": 0.4444057356887903,
|
355 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
356 |
+
"eval_full_en_cosine_precision@100": 0.3039802631578947,
|
357 |
+
"eval_full_en_cosine_precision@150": 0.25999999999999995,
|
358 |
+
"eval_full_en_cosine_precision@20": 0.4925986842105263,
|
359 |
+
"eval_full_en_cosine_precision@200": 0.22763157894736838,
|
360 |
+
"eval_full_en_cosine_precision@50": 0.3867105263157895,
|
361 |
+
"eval_full_en_cosine_recall@1": 0.010318104890368607,
|
362 |
+
"eval_full_en_cosine_recall@100": 0.385615965839615,
|
363 |
+
"eval_full_en_cosine_recall@150": 0.48656381032984825,
|
364 |
+
"eval_full_en_cosine_recall@20": 0.13139326985918445,
|
365 |
+
"eval_full_en_cosine_recall@200": 0.5617757383007209,
|
366 |
+
"eval_full_en_cosine_recall@50": 0.2506285703289517,
|
367 |
+
"eval_runtime": 1.5585,
|
368 |
+
"eval_samples_per_second": 0.0,
|
369 |
+
"eval_sequential_score": 0.5259375639543232,
|
370 |
+
"eval_steps_per_second": 0.0,
|
371 |
+
"step": 1200
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 1.4508928571428572,
|
375 |
+
"grad_norm": 15.019533157348633,
|
376 |
+
"learning_rate": 3.744125939849624e-05,
|
377 |
+
"loss": 1.373,
|
378 |
+
"step": 1300
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 1.5625,
|
382 |
+
"grad_norm": 10.545878410339355,
|
383 |
+
"learning_rate": 3.626644736842105e-05,
|
384 |
+
"loss": 1.3364,
|
385 |
+
"step": 1400
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 1.5625,
|
389 |
+
"eval_full_en_cosine_accuracy@1": 0.7171052631578947,
|
390 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
391 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
392 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
393 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
394 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
395 |
+
"eval_full_en_cosine_map@1": 0.7171052631578947,
|
396 |
+
"eval_full_en_cosine_map@100": 0.20833018055660496,
|
397 |
+
"eval_full_en_cosine_map@150": 0.22583322401021033,
|
398 |
+
"eval_full_en_cosine_map@20": 0.34006318172507877,
|
399 |
+
"eval_full_en_cosine_map@200": 0.24462161151730188,
|
400 |
+
"eval_full_en_cosine_map@50": 0.23483789231739935,
|
401 |
+
"eval_full_en_cosine_map@500": 0.2946124561805931,
|
402 |
+
"eval_full_en_cosine_mrr@1": 0.7171052631578947,
|
403 |
+
"eval_full_en_cosine_mrr@100": 0.8267713172687238,
|
404 |
+
"eval_full_en_cosine_mrr@150": 0.8267713172687238,
|
405 |
+
"eval_full_en_cosine_mrr@20": 0.8265913362952838,
|
406 |
+
"eval_full_en_cosine_mrr@200": 0.8267713172687238,
|
407 |
+
"eval_full_en_cosine_mrr@50": 0.8267343568902494,
|
408 |
+
"eval_full_en_cosine_ndcg@1": 0.7171052631578947,
|
409 |
+
"eval_full_en_cosine_ndcg@100": 0.4377486787968229,
|
410 |
+
"eval_full_en_cosine_ndcg@150": 0.4850669425848544,
|
411 |
+
"eval_full_en_cosine_ndcg@20": 0.5331724259953773,
|
412 |
+
"eval_full_en_cosine_ndcg@200": 0.5302927064126869,
|
413 |
+
"eval_full_en_cosine_ndcg@50": 0.4451308688476405,
|
414 |
+
"eval_full_en_cosine_precision@1": 0.7171052631578947,
|
415 |
+
"eval_full_en_cosine_precision@100": 0.3074671052631579,
|
416 |
+
"eval_full_en_cosine_precision@150": 0.2625657894736842,
|
417 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
418 |
+
"eval_full_en_cosine_precision@200": 0.23016447368421053,
|
419 |
+
"eval_full_en_cosine_precision@50": 0.38769736842105257,
|
420 |
+
"eval_full_en_cosine_recall@1": 0.010208074045806198,
|
421 |
+
"eval_full_en_cosine_recall@100": 0.3902466549235702,
|
422 |
+
"eval_full_en_cosine_recall@150": 0.49226776551348056,
|
423 |
+
"eval_full_en_cosine_recall@20": 0.13255572846134298,
|
424 |
+
"eval_full_en_cosine_recall@200": 0.5680994353864672,
|
425 |
+
"eval_full_en_cosine_recall@50": 0.25126941591084845,
|
426 |
+
"eval_runtime": 1.5595,
|
427 |
+
"eval_samples_per_second": 0.0,
|
428 |
+
"eval_sequential_score": 0.5302927064126869,
|
429 |
+
"eval_steps_per_second": 0.0,
|
430 |
+
"step": 1400
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"epoch": 1.6741071428571428,
|
434 |
+
"grad_norm": 18.495975494384766,
|
435 |
+
"learning_rate": 3.509163533834587e-05,
|
436 |
+
"loss": 1.2876,
|
437 |
+
"step": 1500
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 1.7857142857142856,
|
441 |
+
"grad_norm": 12.646751403808594,
|
442 |
+
"learning_rate": 3.391682330827068e-05,
|
443 |
+
"loss": 1.3094,
|
444 |
+
"step": 1600
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 1.7857142857142856,
|
448 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
449 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
450 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
451 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
452 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
453 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
454 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
455 |
+
"eval_full_en_cosine_map@100": 0.20923239071614674,
|
456 |
+
"eval_full_en_cosine_map@150": 0.225604138471006,
|
457 |
+
"eval_full_en_cosine_map@20": 0.34034356587585846,
|
458 |
+
"eval_full_en_cosine_map@200": 0.24539737099429304,
|
459 |
+
"eval_full_en_cosine_map@50": 0.23464702413938254,
|
460 |
+
"eval_full_en_cosine_map@500": 0.29597166286299953,
|
461 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
462 |
+
"eval_full_en_cosine_mrr@100": 0.8214137967940215,
|
463 |
+
"eval_full_en_cosine_mrr@150": 0.8214137967940215,
|
464 |
+
"eval_full_en_cosine_mrr@20": 0.8213699371448987,
|
465 |
+
"eval_full_en_cosine_mrr@200": 0.8214137967940215,
|
466 |
+
"eval_full_en_cosine_mrr@50": 0.8213699371448987,
|
467 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
468 |
+
"eval_full_en_cosine_ndcg@100": 0.4396726832556684,
|
469 |
+
"eval_full_en_cosine_ndcg@150": 0.4847816359827512,
|
470 |
+
"eval_full_en_cosine_ndcg@20": 0.532792025753163,
|
471 |
+
"eval_full_en_cosine_ndcg@200": 0.5323403273572274,
|
472 |
+
"eval_full_en_cosine_ndcg@50": 0.4452189433184465,
|
473 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
474 |
+
"eval_full_en_cosine_precision@100": 0.3098026315789474,
|
475 |
+
"eval_full_en_cosine_precision@150": 0.26274122807017547,
|
476 |
+
"eval_full_en_cosine_precision@20": 0.4935855263157895,
|
477 |
+
"eval_full_en_cosine_precision@200": 0.23192434210526314,
|
478 |
+
"eval_full_en_cosine_precision@50": 0.38763157894736844,
|
479 |
+
"eval_full_en_cosine_recall@1": 0.010122149362902188,
|
480 |
+
"eval_full_en_cosine_recall@100": 0.39236988612007834,
|
481 |
+
"eval_full_en_cosine_recall@150": 0.4910778378543689,
|
482 |
+
"eval_full_en_cosine_recall@20": 0.13108496301513997,
|
483 |
+
"eval_full_en_cosine_recall@200": 0.5709689534914331,
|
484 |
+
"eval_full_en_cosine_recall@50": 0.25093448303772187,
|
485 |
+
"eval_runtime": 1.5873,
|
486 |
+
"eval_samples_per_second": 0.0,
|
487 |
+
"eval_sequential_score": 0.5323403273572274,
|
488 |
+
"eval_steps_per_second": 0.0,
|
489 |
+
"step": 1600
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 1.8973214285714286,
|
493 |
+
"grad_norm": 11.858412742614746,
|
494 |
+
"learning_rate": 3.274201127819549e-05,
|
495 |
+
"loss": 1.2784,
|
496 |
+
"step": 1700
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 2.0089285714285716,
|
500 |
+
"grad_norm": 11.152688026428223,
|
501 |
+
"learning_rate": 3.1567199248120306e-05,
|
502 |
+
"loss": 1.2204,
|
503 |
+
"step": 1800
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"epoch": 2.0089285714285716,
|
507 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
508 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
509 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
510 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
511 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
512 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
513 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
514 |
+
"eval_full_en_cosine_map@100": 0.2098412194483687,
|
515 |
+
"eval_full_en_cosine_map@150": 0.22663911455304064,
|
516 |
+
"eval_full_en_cosine_map@20": 0.3433147887298301,
|
517 |
+
"eval_full_en_cosine_map@200": 0.24620266722190678,
|
518 |
+
"eval_full_en_cosine_map@50": 0.23714915519951082,
|
519 |
+
"eval_full_en_cosine_map@500": 0.29690932859887553,
|
520 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
521 |
+
"eval_full_en_cosine_mrr@100": 0.8394024772357531,
|
522 |
+
"eval_full_en_cosine_mrr@150": 0.8394024772357531,
|
523 |
+
"eval_full_en_cosine_mrr@20": 0.8393426686233129,
|
524 |
+
"eval_full_en_cosine_mrr@200": 0.8394024772357531,
|
525 |
+
"eval_full_en_cosine_mrr@50": 0.8393426686233129,
|
526 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
527 |
+
"eval_full_en_cosine_ndcg@100": 0.4396519841053572,
|
528 |
+
"eval_full_en_cosine_ndcg@150": 0.4856325134708184,
|
529 |
+
"eval_full_en_cosine_ndcg@20": 0.5375317893335387,
|
530 |
+
"eval_full_en_cosine_ndcg@200": 0.533015167774829,
|
531 |
+
"eval_full_en_cosine_ndcg@50": 0.44810398395306655,
|
532 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
533 |
+
"eval_full_en_cosine_precision@100": 0.3084539473684211,
|
534 |
+
"eval_full_en_cosine_precision@150": 0.2627631578947368,
|
535 |
+
"eval_full_en_cosine_precision@20": 0.49769736842105267,
|
536 |
+
"eval_full_en_cosine_precision@200": 0.2314309210526316,
|
537 |
+
"eval_full_en_cosine_precision@50": 0.3891447368421053,
|
538 |
+
"eval_full_en_cosine_recall@1": 0.010440810366523372,
|
539 |
+
"eval_full_en_cosine_recall@100": 0.39036009395952986,
|
540 |
+
"eval_full_en_cosine_recall@150": 0.49041982254882954,
|
541 |
+
"eval_full_en_cosine_recall@20": 0.13228070304056636,
|
542 |
+
"eval_full_en_cosine_recall@200": 0.5704962189819233,
|
543 |
+
"eval_full_en_cosine_recall@50": 0.25248213212752935,
|
544 |
+
"eval_runtime": 1.6049,
|
545 |
+
"eval_samples_per_second": 0.0,
|
546 |
+
"eval_sequential_score": 0.533015167774829,
|
547 |
+
"eval_steps_per_second": 0.0,
|
548 |
+
"step": 1800
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 2.1205357142857144,
|
552 |
+
"grad_norm": 12.91015625,
|
553 |
+
"learning_rate": 3.0392387218045114e-05,
|
554 |
+
"loss": 0.9617,
|
555 |
+
"step": 1900
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 2.232142857142857,
|
559 |
+
"grad_norm": 11.646313667297363,
|
560 |
+
"learning_rate": 2.9217575187969924e-05,
|
561 |
+
"loss": 1.0004,
|
562 |
+
"step": 2000
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 2.232142857142857,
|
566 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
567 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
568 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
569 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
570 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
571 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
572 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
573 |
+
"eval_full_en_cosine_map@100": 0.20775225168018954,
|
574 |
+
"eval_full_en_cosine_map@150": 0.22393096419950168,
|
575 |
+
"eval_full_en_cosine_map@20": 0.3380596885262807,
|
576 |
+
"eval_full_en_cosine_map@200": 0.24259765295506924,
|
577 |
+
"eval_full_en_cosine_map@50": 0.23452814948810471,
|
578 |
+
"eval_full_en_cosine_map@500": 0.2920026964508484,
|
579 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
580 |
+
"eval_full_en_cosine_mrr@100": 0.8325452625382137,
|
581 |
+
"eval_full_en_cosine_mrr@150": 0.8325452625382137,
|
582 |
+
"eval_full_en_cosine_mrr@20": 0.8324781304222094,
|
583 |
+
"eval_full_en_cosine_mrr@200": 0.8325452625382137,
|
584 |
+
"eval_full_en_cosine_mrr@50": 0.8325452625382137,
|
585 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
586 |
+
"eval_full_en_cosine_ndcg@100": 0.4376001104057169,
|
587 |
+
"eval_full_en_cosine_ndcg@150": 0.48181431955382,
|
588 |
+
"eval_full_en_cosine_ndcg@20": 0.5323035546433559,
|
589 |
+
"eval_full_en_cosine_ndcg@200": 0.5276663014224582,
|
590 |
+
"eval_full_en_cosine_ndcg@50": 0.44660441452063837,
|
591 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
592 |
+
"eval_full_en_cosine_precision@100": 0.30644736842105263,
|
593 |
+
"eval_full_en_cosine_precision@150": 0.259890350877193,
|
594 |
+
"eval_full_en_cosine_precision@20": 0.4916118421052632,
|
595 |
+
"eval_full_en_cosine_precision@200": 0.2280921052631579,
|
596 |
+
"eval_full_en_cosine_precision@50": 0.3886842105263158,
|
597 |
+
"eval_full_en_cosine_recall@1": 0.010329446437905086,
|
598 |
+
"eval_full_en_cosine_recall@100": 0.38885062846601265,
|
599 |
+
"eval_full_en_cosine_recall@150": 0.4854595951837256,
|
600 |
+
"eval_full_en_cosine_recall@20": 0.131078016933875,
|
601 |
+
"eval_full_en_cosine_recall@200": 0.5630724982932908,
|
602 |
+
"eval_full_en_cosine_recall@50": 0.252357645205228,
|
603 |
+
"eval_runtime": 1.5613,
|
604 |
+
"eval_samples_per_second": 0.0,
|
605 |
+
"eval_sequential_score": 0.5276663014224582,
|
606 |
+
"eval_steps_per_second": 0.0,
|
607 |
+
"step": 2000
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"epoch": 2.34375,
|
611 |
+
"grad_norm": 12.087961196899414,
|
612 |
+
"learning_rate": 2.8042763157894735e-05,
|
613 |
+
"loss": 0.9694,
|
614 |
+
"step": 2100
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 2.455357142857143,
|
618 |
+
"grad_norm": 8.181659698486328,
|
619 |
+
"learning_rate": 2.6867951127819552e-05,
|
620 |
+
"loss": 0.9843,
|
621 |
+
"step": 2200
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"epoch": 2.455357142857143,
|
625 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
626 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
627 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
628 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
629 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
630 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
631 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
632 |
+
"eval_full_en_cosine_map@100": 0.209953160245849,
|
633 |
+
"eval_full_en_cosine_map@150": 0.22760030144833215,
|
634 |
+
"eval_full_en_cosine_map@20": 0.34078157961918865,
|
635 |
+
"eval_full_en_cosine_map@200": 0.24749824184265867,
|
636 |
+
"eval_full_en_cosine_map@50": 0.2365248444512811,
|
637 |
+
"eval_full_en_cosine_map@500": 0.29789431690676116,
|
638 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
639 |
+
"eval_full_en_cosine_mrr@100": 0.8318935359231412,
|
640 |
+
"eval_full_en_cosine_mrr@150": 0.8318935359231412,
|
641 |
+
"eval_full_en_cosine_mrr@20": 0.8316833751044278,
|
642 |
+
"eval_full_en_cosine_mrr@200": 0.8318935359231412,
|
643 |
+
"eval_full_en_cosine_mrr@50": 0.8318935359231412,
|
644 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
645 |
+
"eval_full_en_cosine_ndcg@100": 0.44076958126493176,
|
646 |
+
"eval_full_en_cosine_ndcg@150": 0.48838061313116793,
|
647 |
+
"eval_full_en_cosine_ndcg@20": 0.5350320556020238,
|
648 |
+
"eval_full_en_cosine_ndcg@200": 0.5355574509263721,
|
649 |
+
"eval_full_en_cosine_ndcg@50": 0.44803994906340594,
|
650 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
651 |
+
"eval_full_en_cosine_precision@100": 0.3099671052631579,
|
652 |
+
"eval_full_en_cosine_precision@150": 0.2648464912280702,
|
653 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
654 |
+
"eval_full_en_cosine_precision@200": 0.23342105263157892,
|
655 |
+
"eval_full_en_cosine_precision@50": 0.39052631578947367,
|
656 |
+
"eval_full_en_cosine_recall@1": 0.010284539147879572,
|
657 |
+
"eval_full_en_cosine_recall@100": 0.39296182819932773,
|
658 |
+
"eval_full_en_cosine_recall@150": 0.4959148528891931,
|
659 |
+
"eval_full_en_cosine_recall@20": 0.13200577828629578,
|
660 |
+
"eval_full_en_cosine_recall@200": 0.5749370249014907,
|
661 |
+
"eval_full_en_cosine_recall@50": 0.25310992970173135,
|
662 |
+
"eval_runtime": 1.8632,
|
663 |
+
"eval_samples_per_second": 0.0,
|
664 |
+
"eval_sequential_score": 0.5355574509263721,
|
665 |
+
"eval_steps_per_second": 0.0,
|
666 |
+
"step": 2200
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 2.5669642857142856,
|
670 |
+
"grad_norm": 13.6882905960083,
|
671 |
+
"learning_rate": 2.5693139097744363e-05,
|
672 |
+
"loss": 0.9743,
|
673 |
+
"step": 2300
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"epoch": 2.678571428571429,
|
677 |
+
"grad_norm": 11.966975212097168,
|
678 |
+
"learning_rate": 2.4518327067669177e-05,
|
679 |
+
"loss": 0.9252,
|
680 |
+
"step": 2400
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 2.678571428571429,
|
684 |
+
"eval_full_en_cosine_accuracy@1": 0.7335526315789473,
|
685 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
686 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
687 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
688 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
689 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
690 |
+
"eval_full_en_cosine_map@1": 0.7335526315789473,
|
691 |
+
"eval_full_en_cosine_map@100": 0.20983286336268822,
|
692 |
+
"eval_full_en_cosine_map@150": 0.22675852672419078,
|
693 |
+
"eval_full_en_cosine_map@20": 0.34004090105732804,
|
694 |
+
"eval_full_en_cosine_map@200": 0.24584993568226646,
|
695 |
+
"eval_full_en_cosine_map@50": 0.23672594782424658,
|
696 |
+
"eval_full_en_cosine_map@500": 0.29632183596698103,
|
697 |
+
"eval_full_en_cosine_mrr@1": 0.7335526315789473,
|
698 |
+
"eval_full_en_cosine_mrr@100": 0.83135268727374,
|
699 |
+
"eval_full_en_cosine_mrr@150": 0.83135268727374,
|
700 |
+
"eval_full_en_cosine_mrr@20": 0.8311351294903929,
|
701 |
+
"eval_full_en_cosine_mrr@200": 0.83135268727374,
|
702 |
+
"eval_full_en_cosine_mrr@50": 0.8312917710944029,
|
703 |
+
"eval_full_en_cosine_ndcg@1": 0.7335526315789473,
|
704 |
+
"eval_full_en_cosine_ndcg@100": 0.4400577813719261,
|
705 |
+
"eval_full_en_cosine_ndcg@150": 0.4859220111165228,
|
706 |
+
"eval_full_en_cosine_ndcg@20": 0.5344170691501652,
|
707 |
+
"eval_full_en_cosine_ndcg@200": 0.5320416498978522,
|
708 |
+
"eval_full_en_cosine_ndcg@50": 0.4485020943766835,
|
709 |
+
"eval_full_en_cosine_precision@1": 0.7335526315789473,
|
710 |
+
"eval_full_en_cosine_precision@100": 0.30907894736842106,
|
711 |
+
"eval_full_en_cosine_precision@150": 0.26278508771929826,
|
712 |
+
"eval_full_en_cosine_precision@20": 0.4960526315789474,
|
713 |
+
"eval_full_en_cosine_precision@200": 0.23090460526315787,
|
714 |
+
"eval_full_en_cosine_precision@50": 0.39151315789473684,
|
715 |
+
"eval_full_en_cosine_recall@1": 0.010402156873475942,
|
716 |
+
"eval_full_en_cosine_recall@100": 0.39206565501916524,
|
717 |
+
"eval_full_en_cosine_recall@150": 0.49176955829136443,
|
718 |
+
"eval_full_en_cosine_recall@20": 0.1321996647113643,
|
719 |
+
"eval_full_en_cosine_recall@200": 0.569344104113959,
|
720 |
+
"eval_full_en_cosine_recall@50": 0.2535254041631645,
|
721 |
+
"eval_runtime": 1.5826,
|
722 |
+
"eval_samples_per_second": 0.0,
|
723 |
+
"eval_sequential_score": 0.5320416498978522,
|
724 |
+
"eval_steps_per_second": 0.0,
|
725 |
+
"step": 2400
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 2.790178571428571,
|
729 |
+
"grad_norm": 11.857823371887207,
|
730 |
+
"learning_rate": 2.3343515037593984e-05,
|
731 |
+
"loss": 0.9272,
|
732 |
+
"step": 2500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 2.9017857142857144,
|
736 |
+
"grad_norm": 12.297764778137207,
|
737 |
+
"learning_rate": 2.2168703007518798e-05,
|
738 |
+
"loss": 0.9279,
|
739 |
+
"step": 2600
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"epoch": 2.9017857142857144,
|
743 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
744 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
745 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
746 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
747 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
748 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
749 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
750 |
+
"eval_full_en_cosine_map@100": 0.20939105710550232,
|
751 |
+
"eval_full_en_cosine_map@150": 0.22725165687553775,
|
752 |
+
"eval_full_en_cosine_map@20": 0.3403680329074837,
|
753 |
+
"eval_full_en_cosine_map@200": 0.24658865195474836,
|
754 |
+
"eval_full_en_cosine_map@50": 0.23612691752121232,
|
755 |
+
"eval_full_en_cosine_map@500": 0.29718900909315255,
|
756 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
757 |
+
"eval_full_en_cosine_mrr@100": 0.8391709003546018,
|
758 |
+
"eval_full_en_cosine_mrr@150": 0.8391709003546018,
|
759 |
+
"eval_full_en_cosine_mrr@20": 0.8391064008705977,
|
760 |
+
"eval_full_en_cosine_mrr@200": 0.8391709003546018,
|
761 |
+
"eval_full_en_cosine_mrr@50": 0.8391064008705977,
|
762 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
763 |
+
"eval_full_en_cosine_ndcg@100": 0.4389185422351881,
|
764 |
+
"eval_full_en_cosine_ndcg@150": 0.4868646893605612,
|
765 |
+
"eval_full_en_cosine_ndcg@20": 0.5359014833764041,
|
766 |
+
"eval_full_en_cosine_ndcg@200": 0.5332804255738979,
|
767 |
+
"eval_full_en_cosine_ndcg@50": 0.44749591453362436,
|
768 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
769 |
+
"eval_full_en_cosine_precision@100": 0.30779605263157894,
|
770 |
+
"eval_full_en_cosine_precision@150": 0.26355263157894737,
|
771 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
772 |
+
"eval_full_en_cosine_precision@200": 0.2316282894736842,
|
773 |
+
"eval_full_en_cosine_precision@50": 0.38901315789473684,
|
774 |
+
"eval_full_en_cosine_recall@1": 0.010425572953236805,
|
775 |
+
"eval_full_en_cosine_recall@100": 0.3892001066901767,
|
776 |
+
"eval_full_en_cosine_recall@150": 0.492569756570653,
|
777 |
+
"eval_full_en_cosine_recall@20": 0.13284603422933672,
|
778 |
+
"eval_full_en_cosine_recall@200": 0.5706210722984945,
|
779 |
+
"eval_full_en_cosine_recall@50": 0.2518705529759721,
|
780 |
+
"eval_runtime": 1.577,
|
781 |
+
"eval_samples_per_second": 0.0,
|
782 |
+
"eval_sequential_score": 0.5332804255738979,
|
783 |
+
"eval_steps_per_second": 0.0,
|
784 |
+
"step": 2600
|
785 |
+
},
|
786 |
+
{
|
787 |
+
"epoch": 3.013392857142857,
|
788 |
+
"grad_norm": 12.120986938476562,
|
789 |
+
"learning_rate": 2.099389097744361e-05,
|
790 |
+
"loss": 0.857,
|
791 |
+
"step": 2700
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"epoch": 3.125,
|
795 |
+
"grad_norm": 14.276410102844238,
|
796 |
+
"learning_rate": 1.9819078947368423e-05,
|
797 |
+
"loss": 0.7313,
|
798 |
+
"step": 2800
|
799 |
+
},
|
800 |
+
{
|
801 |
+
"epoch": 3.125,
|
802 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
803 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
804 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
805 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
806 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
807 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
808 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
809 |
+
"eval_full_en_cosine_map@100": 0.20830025965749158,
|
810 |
+
"eval_full_en_cosine_map@150": 0.22525408557521698,
|
811 |
+
"eval_full_en_cosine_map@20": 0.34094306993307805,
|
812 |
+
"eval_full_en_cosine_map@200": 0.24400549054611867,
|
813 |
+
"eval_full_en_cosine_map@50": 0.23400685602624646,
|
814 |
+
"eval_full_en_cosine_map@500": 0.29401532392219154,
|
815 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
816 |
+
"eval_full_en_cosine_mrr@100": 0.8315051952798665,
|
817 |
+
"eval_full_en_cosine_mrr@150": 0.8315051952798665,
|
818 |
+
"eval_full_en_cosine_mrr@20": 0.8314268744778616,
|
819 |
+
"eval_full_en_cosine_mrr@200": 0.8315051952798665,
|
820 |
+
"eval_full_en_cosine_mrr@50": 0.8315051952798665,
|
821 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
822 |
+
"eval_full_en_cosine_ndcg@100": 0.43885977048304636,
|
823 |
+
"eval_full_en_cosine_ndcg@150": 0.48486671483618976,
|
824 |
+
"eval_full_en_cosine_ndcg@20": 0.5365677326031855,
|
825 |
+
"eval_full_en_cosine_ndcg@200": 0.5299990147795507,
|
826 |
+
"eval_full_en_cosine_ndcg@50": 0.44591298214905706,
|
827 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
828 |
+
"eval_full_en_cosine_precision@100": 0.308125,
|
829 |
+
"eval_full_en_cosine_precision@150": 0.2621052631578948,
|
830 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
831 |
+
"eval_full_en_cosine_precision@200": 0.22980263157894737,
|
832 |
+
"eval_full_en_cosine_precision@50": 0.3870394736842105,
|
833 |
+
"eval_full_en_cosine_recall@1": 0.010317820884117123,
|
834 |
+
"eval_full_en_cosine_recall@100": 0.38998825691236244,
|
835 |
+
"eval_full_en_cosine_recall@150": 0.4900687458798103,
|
836 |
+
"eval_full_en_cosine_recall@20": 0.13271573138828288,
|
837 |
+
"eval_full_en_cosine_recall@200": 0.5659226272090475,
|
838 |
+
"eval_full_en_cosine_recall@50": 0.25218483369820577,
|
839 |
+
"eval_runtime": 1.607,
|
840 |
+
"eval_samples_per_second": 0.0,
|
841 |
+
"eval_sequential_score": 0.5299990147795507,
|
842 |
+
"eval_steps_per_second": 0.0,
|
843 |
+
"step": 2800
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"epoch": 3.236607142857143,
|
847 |
+
"grad_norm": 8.85190486907959,
|
848 |
+
"learning_rate": 1.8644266917293237e-05,
|
849 |
+
"loss": 0.7103,
|
850 |
+
"step": 2900
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"epoch": 3.3482142857142856,
|
854 |
+
"grad_norm": 8.932626724243164,
|
855 |
+
"learning_rate": 1.7469454887218044e-05,
|
856 |
+
"loss": 0.7187,
|
857 |
+
"step": 3000
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 3.3482142857142856,
|
861 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
862 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
863 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
864 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
865 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
866 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
867 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
868 |
+
"eval_full_en_cosine_map@100": 0.20842370079433947,
|
869 |
+
"eval_full_en_cosine_map@150": 0.22608431932756923,
|
870 |
+
"eval_full_en_cosine_map@20": 0.34026464907579207,
|
871 |
+
"eval_full_en_cosine_map@200": 0.2451065024940476,
|
872 |
+
"eval_full_en_cosine_map@50": 0.23418777403622906,
|
873 |
+
"eval_full_en_cosine_map@500": 0.2945476002258968,
|
874 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
875 |
+
"eval_full_en_cosine_mrr@100": 0.8303256958684593,
|
876 |
+
"eval_full_en_cosine_mrr@150": 0.8303256958684593,
|
877 |
+
"eval_full_en_cosine_mrr@20": 0.830265887256019,
|
878 |
+
"eval_full_en_cosine_mrr@200": 0.8303256958684593,
|
879 |
+
"eval_full_en_cosine_mrr@50": 0.830265887256019,
|
880 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
881 |
+
"eval_full_en_cosine_ndcg@100": 0.4379203478644915,
|
882 |
+
"eval_full_en_cosine_ndcg@150": 0.4860723616469748,
|
883 |
+
"eval_full_en_cosine_ndcg@20": 0.534483012777908,
|
884 |
+
"eval_full_en_cosine_ndcg@200": 0.5318565059446251,
|
885 |
+
"eval_full_en_cosine_ndcg@50": 0.4443024102705765,
|
886 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
887 |
+
"eval_full_en_cosine_precision@100": 0.30750000000000005,
|
888 |
+
"eval_full_en_cosine_precision@150": 0.26370614035087714,
|
889 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
890 |
+
"eval_full_en_cosine_precision@200": 0.23116776315789475,
|
891 |
+
"eval_full_en_cosine_precision@50": 0.38539473684210523,
|
892 |
+
"eval_full_en_cosine_recall@1": 0.010298189290703101,
|
893 |
+
"eval_full_en_cosine_recall@100": 0.38891472258186655,
|
894 |
+
"eval_full_en_cosine_recall@150": 0.4925623824521817,
|
895 |
+
"eval_full_en_cosine_recall@20": 0.13215936080151625,
|
896 |
+
"eval_full_en_cosine_recall@200": 0.5698259119139981,
|
897 |
+
"eval_full_en_cosine_recall@50": 0.2502092759755724,
|
898 |
+
"eval_runtime": 1.6179,
|
899 |
+
"eval_samples_per_second": 0.0,
|
900 |
+
"eval_sequential_score": 0.5318565059446251,
|
901 |
+
"eval_steps_per_second": 0.0,
|
902 |
+
"step": 3000
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 3.4598214285714284,
|
906 |
+
"grad_norm": 12.761665344238281,
|
907 |
+
"learning_rate": 1.6294642857142858e-05,
|
908 |
+
"loss": 0.7067,
|
909 |
+
"step": 3100
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 3.571428571428571,
|
913 |
+
"grad_norm": 12.318887710571289,
|
914 |
+
"learning_rate": 1.5119830827067668e-05,
|
915 |
+
"loss": 0.7157,
|
916 |
+
"step": 3200
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"epoch": 3.571428571428571,
|
920 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
921 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
922 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
923 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
924 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
925 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
926 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
927 |
+
"eval_full_en_cosine_map@100": 0.21126096647489126,
|
928 |
+
"eval_full_en_cosine_map@150": 0.22897332387217115,
|
929 |
+
"eval_full_en_cosine_map@20": 0.34020926250086975,
|
930 |
+
"eval_full_en_cosine_map@200": 0.24883265008518762,
|
931 |
+
"eval_full_en_cosine_map@50": 0.2366562995235259,
|
932 |
+
"eval_full_en_cosine_map@500": 0.30009134506130936,
|
933 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
934 |
+
"eval_full_en_cosine_mrr@100": 0.8208446325794724,
|
935 |
+
"eval_full_en_cosine_mrr@150": 0.8208446325794724,
|
936 |
+
"eval_full_en_cosine_mrr@20": 0.8206285125693021,
|
937 |
+
"eval_full_en_cosine_mrr@200": 0.8208446325794724,
|
938 |
+
"eval_full_en_cosine_mrr@50": 0.8208446325794724,
|
939 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
940 |
+
"eval_full_en_cosine_ndcg@100": 0.4420871692985379,
|
941 |
+
"eval_full_en_cosine_ndcg@150": 0.48983718804719595,
|
942 |
+
"eval_full_en_cosine_ndcg@20": 0.5349182539944062,
|
943 |
+
"eval_full_en_cosine_ndcg@200": 0.5368995914478877,
|
944 |
+
"eval_full_en_cosine_ndcg@50": 0.4481578438397021,
|
945 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
946 |
+
"eval_full_en_cosine_precision@100": 0.3118421052631579,
|
947 |
+
"eval_full_en_cosine_precision@150": 0.26625,
|
948 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
949 |
+
"eval_full_en_cosine_precision@200": 0.2341282894736842,
|
950 |
+
"eval_full_en_cosine_precision@50": 0.39125,
|
951 |
+
"eval_full_en_cosine_recall@1": 0.010071368365416018,
|
952 |
+
"eval_full_en_cosine_recall@100": 0.39435465355460575,
|
953 |
+
"eval_full_en_cosine_recall@150": 0.49776297598034985,
|
954 |
+
"eval_full_en_cosine_recall@20": 0.1332224887798492,
|
955 |
+
"eval_full_en_cosine_recall@200": 0.5769437157052201,
|
956 |
+
"eval_full_en_cosine_recall@50": 0.25406609475829245,
|
957 |
+
"eval_runtime": 1.5833,
|
958 |
+
"eval_samples_per_second": 0.0,
|
959 |
+
"eval_sequential_score": 0.5368995914478877,
|
960 |
+
"eval_steps_per_second": 0.0,
|
961 |
+
"step": 3200
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 3.6830357142857144,
|
965 |
+
"grad_norm": 10.974320411682129,
|
966 |
+
"learning_rate": 1.3945018796992482e-05,
|
967 |
+
"loss": 0.7113,
|
968 |
+
"step": 3300
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 3.794642857142857,
|
972 |
+
"grad_norm": 11.004631042480469,
|
973 |
+
"learning_rate": 1.2770206766917295e-05,
|
974 |
+
"loss": 0.7013,
|
975 |
+
"step": 3400
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"epoch": 3.794642857142857,
|
979 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
980 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
981 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
982 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
983 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
984 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
985 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
986 |
+
"eval_full_en_cosine_map@100": 0.20998333195374114,
|
987 |
+
"eval_full_en_cosine_map@150": 0.22683318021248486,
|
988 |
+
"eval_full_en_cosine_map@20": 0.34034679376659244,
|
989 |
+
"eval_full_en_cosine_map@200": 0.24654495691213385,
|
990 |
+
"eval_full_en_cosine_map@50": 0.23617479010012724,
|
991 |
+
"eval_full_en_cosine_map@500": 0.29617185416029185,
|
992 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
993 |
+
"eval_full_en_cosine_mrr@100": 0.8291805255603549,
|
994 |
+
"eval_full_en_cosine_mrr@150": 0.8291805255603549,
|
995 |
+
"eval_full_en_cosine_mrr@20": 0.8291105367585632,
|
996 |
+
"eval_full_en_cosine_mrr@200": 0.8291805255603549,
|
997 |
+
"eval_full_en_cosine_mrr@50": 0.8291805255603549,
|
998 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
999 |
+
"eval_full_en_cosine_ndcg@100": 0.4407299508694298,
|
1000 |
+
"eval_full_en_cosine_ndcg@150": 0.48655314671133576,
|
1001 |
+
"eval_full_en_cosine_ndcg@20": 0.5349966588302529,
|
1002 |
+
"eval_full_en_cosine_ndcg@200": 0.5341334488223752,
|
1003 |
+
"eval_full_en_cosine_ndcg@50": 0.448065635044085,
|
1004 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
1005 |
+
"eval_full_en_cosine_precision@100": 0.30973684210526314,
|
1006 |
+
"eval_full_en_cosine_precision@150": 0.26320175438596494,
|
1007 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
1008 |
+
"eval_full_en_cosine_precision@200": 0.23210526315789473,
|
1009 |
+
"eval_full_en_cosine_precision@50": 0.3907894736842106,
|
1010 |
+
"eval_full_en_cosine_recall@1": 0.010311461817674684,
|
1011 |
+
"eval_full_en_cosine_recall@100": 0.3931693265429022,
|
1012 |
+
"eval_full_en_cosine_recall@150": 0.49300140763214356,
|
1013 |
+
"eval_full_en_cosine_recall@20": 0.1329270784727238,
|
1014 |
+
"eval_full_en_cosine_recall@200": 0.573228327517634,
|
1015 |
+
"eval_full_en_cosine_recall@50": 0.25330386821616296,
|
1016 |
+
"eval_runtime": 1.577,
|
1017 |
+
"eval_samples_per_second": 0.0,
|
1018 |
+
"eval_sequential_score": 0.5341334488223752,
|
1019 |
+
"eval_steps_per_second": 0.0,
|
1020 |
+
"step": 3400
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 3.90625,
|
1024 |
+
"grad_norm": 12.102640151977539,
|
1025 |
+
"learning_rate": 1.1595394736842107e-05,
|
1026 |
+
"loss": 0.6903,
|
1027 |
+
"step": 3500
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 4.017857142857143,
|
1031 |
+
"grad_norm": 7.348757743835449,
|
1032 |
+
"learning_rate": 1.0420582706766918e-05,
|
1033 |
+
"loss": 0.6462,
|
1034 |
+
"step": 3600
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 4.017857142857143,
|
1038 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
1039 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1040 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1041 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
1042 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1043 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1044 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
1045 |
+
"eval_full_en_cosine_map@100": 0.2102732775077637,
|
1046 |
+
"eval_full_en_cosine_map@150": 0.22767943965852241,
|
1047 |
+
"eval_full_en_cosine_map@20": 0.338502447126724,
|
1048 |
+
"eval_full_en_cosine_map@200": 0.24667619158922902,
|
1049 |
+
"eval_full_en_cosine_map@50": 0.23576300870587916,
|
1050 |
+
"eval_full_en_cosine_map@500": 0.2971463650911015,
|
1051 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
1052 |
+
"eval_full_en_cosine_mrr@100": 0.8263833835420962,
|
1053 |
+
"eval_full_en_cosine_mrr@150": 0.8263833835420962,
|
1054 |
+
"eval_full_en_cosine_mrr@20": 0.8263213180008847,
|
1055 |
+
"eval_full_en_cosine_mrr@200": 0.8263833835420962,
|
1056 |
+
"eval_full_en_cosine_mrr@50": 0.8263213180008847,
|
1057 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
1058 |
+
"eval_full_en_cosine_ndcg@100": 0.44114478517461736,
|
1059 |
+
"eval_full_en_cosine_ndcg@150": 0.4883455168714466,
|
1060 |
+
"eval_full_en_cosine_ndcg@20": 0.53288860900767,
|
1061 |
+
"eval_full_en_cosine_ndcg@200": 0.5334866046140189,
|
1062 |
+
"eval_full_en_cosine_ndcg@50": 0.4473951526251337,
|
1063 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
1064 |
+
"eval_full_en_cosine_precision@100": 0.31078947368421056,
|
1065 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1066 |
+
"eval_full_en_cosine_precision@20": 0.4934210526315789,
|
1067 |
+
"eval_full_en_cosine_precision@200": 0.23212171052631578,
|
1068 |
+
"eval_full_en_cosine_precision@50": 0.3899342105263158,
|
1069 |
+
"eval_full_en_cosine_recall@1": 0.01018155854728512,
|
1070 |
+
"eval_full_en_cosine_recall@100": 0.3935816727444405,
|
1071 |
+
"eval_full_en_cosine_recall@150": 0.4958028561341766,
|
1072 |
+
"eval_full_en_cosine_recall@20": 0.13181077303144853,
|
1073 |
+
"eval_full_en_cosine_recall@200": 0.5716317929962068,
|
1074 |
+
"eval_full_en_cosine_recall@50": 0.25274553753777246,
|
1075 |
+
"eval_runtime": 1.6024,
|
1076 |
+
"eval_samples_per_second": 0.0,
|
1077 |
+
"eval_sequential_score": 0.5334866046140189,
|
1078 |
+
"eval_steps_per_second": 0.0,
|
1079 |
+
"step": 3600
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"epoch": 4.129464285714286,
|
1083 |
+
"grad_norm": 8.786450386047363,
|
1084 |
+
"learning_rate": 9.24577067669173e-06,
|
1085 |
+
"loss": 0.5162,
|
1086 |
+
"step": 3700
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"epoch": 4.241071428571429,
|
1090 |
+
"grad_norm": 10.602435111999512,
|
1091 |
+
"learning_rate": 8.070958646616542e-06,
|
1092 |
+
"loss": 0.524,
|
1093 |
+
"step": 3800
|
1094 |
+
},
|
1095 |
+
{
|
1096 |
+
"epoch": 4.241071428571429,
|
1097 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1098 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1099 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1100 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1101 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1102 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1103 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1104 |
+
"eval_full_en_cosine_map@100": 0.21150798737582682,
|
1105 |
+
"eval_full_en_cosine_map@150": 0.22868847990327232,
|
1106 |
+
"eval_full_en_cosine_map@20": 0.3411525812655742,
|
1107 |
+
"eval_full_en_cosine_map@200": 0.2480155691306444,
|
1108 |
+
"eval_full_en_cosine_map@50": 0.23814436251631807,
|
1109 |
+
"eval_full_en_cosine_map@500": 0.29792672341621373,
|
1110 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1111 |
+
"eval_full_en_cosine_mrr@100": 0.8323485085820613,
|
1112 |
+
"eval_full_en_cosine_mrr@150": 0.8323485085820613,
|
1113 |
+
"eval_full_en_cosine_mrr@20": 0.8321467731829576,
|
1114 |
+
"eval_full_en_cosine_mrr@200": 0.8323485085820613,
|
1115 |
+
"eval_full_en_cosine_mrr@50": 0.832296294714058,
|
1116 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1117 |
+
"eval_full_en_cosine_ndcg@100": 0.44247378999755477,
|
1118 |
+
"eval_full_en_cosine_ndcg@150": 0.48886293038433404,
|
1119 |
+
"eval_full_en_cosine_ndcg@20": 0.5351701323930714,
|
1120 |
+
"eval_full_en_cosine_ndcg@200": 0.5352268343210608,
|
1121 |
+
"eval_full_en_cosine_ndcg@50": 0.4502625298651447,
|
1122 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1123 |
+
"eval_full_en_cosine_precision@100": 0.311546052631579,
|
1124 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1125 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
1126 |
+
"eval_full_en_cosine_precision@200": 0.23268092105263163,
|
1127 |
+
"eval_full_en_cosine_precision@50": 0.3930921052631579,
|
1128 |
+
"eval_full_en_cosine_recall@1": 0.010244630514181254,
|
1129 |
+
"eval_full_en_cosine_recall@100": 0.39498767852245736,
|
1130 |
+
"eval_full_en_cosine_recall@150": 0.49574169519464223,
|
1131 |
+
"eval_full_en_cosine_recall@20": 0.1324589336710221,
|
1132 |
+
"eval_full_en_cosine_recall@200": 0.574019804020236,
|
1133 |
+
"eval_full_en_cosine_recall@50": 0.2548099607629461,
|
1134 |
+
"eval_runtime": 1.5919,
|
1135 |
+
"eval_samples_per_second": 0.0,
|
1136 |
+
"eval_sequential_score": 0.5352268343210608,
|
1137 |
+
"eval_steps_per_second": 0.0,
|
1138 |
+
"step": 3800
|
1139 |
+
}
|
1140 |
+
],
|
1141 |
+
"logging_steps": 100,
|
1142 |
+
"max_steps": 4480,
|
1143 |
+
"num_input_tokens_seen": 0,
|
1144 |
+
"num_train_epochs": 5,
|
1145 |
+
"save_steps": 200,
|
1146 |
+
"stateful_callbacks": {
|
1147 |
+
"TrainerControl": {
|
1148 |
+
"args": {
|
1149 |
+
"should_epoch_stop": false,
|
1150 |
+
"should_evaluate": false,
|
1151 |
+
"should_log": false,
|
1152 |
+
"should_save": true,
|
1153 |
+
"should_training_stop": false
|
1154 |
+
},
|
1155 |
+
"attributes": {}
|
1156 |
+
}
|
1157 |
+
},
|
1158 |
+
"total_flos": 0.0,
|
1159 |
+
"train_batch_size": 64,
|
1160 |
+
"trial_name": null,
|
1161 |
+
"trial_params": null
|
1162 |
+
}
|
checkpoint-3800/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab218b8b16eec7c543d0bc5420b88d78608af29dcbf60891a693f97306d3aba4
|
3 |
+
size 5560
|
checkpoint-3800/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-4000/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
checkpoint-4000/README.md
ADDED
@@ -0,0 +1,672 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:114699
|
8 |
+
- loss:CachedGISTEmbedLoss
|
9 |
+
base_model: BAAI/bge-large-en-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: For roles such as 'physiotherapist', 'neuromusculoskeletal physiotherapist',
|
12 |
+
'osteopath', and 'chiropractor', the skills needed include a deep understanding
|
13 |
+
of human anatomy and physiology, strong diagnostic skills, and the ability to
|
14 |
+
apply manual therapy techniques to treat musculoskeletal issues. Additionally,
|
15 |
+
effective communication skills are crucial for explaining treatments and exercises
|
16 |
+
to patients, while adaptability and problem-solving skills are essential for tailoring
|
17 |
+
treatments to individual patient needs.
|
18 |
+
sentences:
|
19 |
+
- Job roles such as insulation installers, HVAC technicians, and construction engineers
|
20 |
+
require knowledge of various types and characteristics of insulation materials
|
21 |
+
to effectively reduce heat transfer and improve energy efficiency in buildings
|
22 |
+
and systems. Understanding the typology of insulation materials, including their
|
23 |
+
thermal properties, durability, and environmental impact, is crucial for these
|
24 |
+
professionals to select the most appropriate materials for specific applications.
|
25 |
+
- Job roles such as Contract Managers, Legal Analysts, and Compliance Officers require
|
26 |
+
the skill of reviewing or auditing completed contracts to ensure legal accuracy,
|
27 |
+
compliance with regulations, and alignment with organizational goals.
|
28 |
+
- Job roles that require skills in dealing with emergency care situations include
|
29 |
+
emergency medical technicians (EMTs), paramedics, and emergency room nurses or
|
30 |
+
doctors, all of whom must quickly and effectively manage critical health situations
|
31 |
+
to save lives.
|
32 |
+
- source_sentence: Bus drivers, including those operating in various sectors like
|
33 |
+
public transit, intercity, private, or school services, need strong driving skills,
|
34 |
+
knowledge of traffic laws, and the ability to operate safely in diverse conditions.
|
35 |
+
Additionally, effective communication skills and the ability to handle passenger
|
36 |
+
inquiries and emergencies are crucial.
|
37 |
+
sentences:
|
38 |
+
- Job roles that require the skill to calibrate electronic instruments include calibration
|
39 |
+
technicians, quality control engineers, and instrumentation specialists. These
|
40 |
+
professionals ensure the accuracy and reliability of various electronic devices
|
41 |
+
and systems across different industries such as manufacturing, aerospace, and
|
42 |
+
automotive.
|
43 |
+
- Job roles such as Building Engineer, Architect, and Construction Specialist require
|
44 |
+
skills in designing, engineering, or developing air-tight building structures
|
45 |
+
to ensure energy efficiency and environmental control within the building.
|
46 |
+
- Job roles such as customer service representatives, flight attendants, and hotel
|
47 |
+
concierges require a strong focus on passengers or customers, ensuring their needs
|
48 |
+
and comfort are prioritized to provide excellent service and support.
|
49 |
+
- source_sentence: A mine surveyor, also known as a mining surveyor or mine planning
|
50 |
+
surveyor, requires expertise in geomatics and mining engineering to accurately
|
51 |
+
map and plan mine operations, ensuring safety and efficiency. They must also possess
|
52 |
+
strong analytical skills and the ability to use specialized software for creating
|
53 |
+
detailed mine plans and maintaining accurate records.
|
54 |
+
sentences:
|
55 |
+
- Job roles such as data analysts, business analysts, and financial analysts require
|
56 |
+
the skill to present reports or prepare statistical reports, as they often need
|
57 |
+
to communicate complex data insights clearly and effectively to stakeholders.
|
58 |
+
- Job roles that require monitoring flour unloading equipment include Quality Control
|
59 |
+
Technicians, Process Operators, and Mill Supervisors, who ensure the efficient
|
60 |
+
and safe operation of flour processing systems and the proper unloading of flour
|
61 |
+
from transport vehicles.
|
62 |
+
- Job roles that require skills in the manufacturing of made-up textile articles
|
63 |
+
include textile production managers, machinery operators, and quality control
|
64 |
+
inspectors, all of whom utilize specific technology and machinery to produce finished
|
65 |
+
textile products such as clothing, home textiles, and industrial fabrics.
|
66 |
+
- source_sentence: An insulation supervisor, regardless of the specific type of insulation
|
67 |
+
material or installation area, requires strong project management skills, knowledge
|
68 |
+
of building codes and safety regulations, and expertise in insulation techniques
|
69 |
+
to oversee the installation process effectively and ensure quality standards are
|
70 |
+
met.
|
71 |
+
sentences:
|
72 |
+
- Job roles that require skills in energy efficiency, such as promoting energy efficiency
|
73 |
+
or efficient energy use, include Energy Managers, Sustainability Specialists,
|
74 |
+
and Building Engineers, who focus on reducing energy consumption and improving
|
75 |
+
energy use in various settings. Additionally, roles like Battery Technicians or
|
76 |
+
Engineers involve battery benchmarking to enhance energy storage and efficiency
|
77 |
+
in technological devices and systems.
|
78 |
+
- The skill of applying or installing waterproofing and damp-proofing membranes
|
79 |
+
is primarily required by construction workers such as waterproofing specialists,
|
80 |
+
roofers, and building envelope technicians, who use these membranes to prevent
|
81 |
+
water damage in buildings and structures.
|
82 |
+
- Job roles such as laboratory technicians, chemists, and materials scientists require
|
83 |
+
skills in laboratory techniques, including electronic and thermic methods, gas
|
84 |
+
chromatography, and gravimetric analysis, to conduct precise experiments and analyze
|
85 |
+
materials. These professionals must apply natural science techniques and use various
|
86 |
+
lab techniques to ensure accurate and reliable results in their research or quality
|
87 |
+
control processes.
|
88 |
+
- source_sentence: For roles such as import/export manager, graduate export manager,
|
89 |
+
senior export manager, and other related positions in meat and meat products,
|
90 |
+
the key skills include a strong understanding of international trade regulations,
|
91 |
+
meat product knowledge, customs compliance, and excellent negotiation and communication
|
92 |
+
skills to manage global supply chains effectively. Additionally, proficiency in
|
93 |
+
relevant trade software and languages can be highly beneficial.
|
94 |
+
sentences:
|
95 |
+
- Job roles that require skills such as managing staff, coordinating employees,
|
96 |
+
and performing HR activities include Human Resources Managers, Team Leaders, Supervisors,
|
97 |
+
and Department Heads, all of whom are responsible for overseeing personnel, implementing
|
98 |
+
HR policies, and ensuring efficient team operations.
|
99 |
+
- Job roles such as Control Systems Engineer, Automation Engineer, and Systems Designer
|
100 |
+
require skills in designing, planning, and developing control systems to manage
|
101 |
+
and optimize the performance of various technological processes and machinery.
|
102 |
+
These professionals are tasked with creating efficient and reliable systems that
|
103 |
+
can operate autonomously or with minimal human intervention.
|
104 |
+
- Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager
|
105 |
+
require skills in conducting performance measurement and organizing or managing
|
106 |
+
conversion testing to ensure software and systems meet performance standards and
|
107 |
+
function correctly in real-world scenarios.
|
108 |
+
pipeline_tag: sentence-similarity
|
109 |
+
library_name: sentence-transformers
|
110 |
+
metrics:
|
111 |
+
- cosine_accuracy@1
|
112 |
+
- cosine_accuracy@20
|
113 |
+
- cosine_accuracy@50
|
114 |
+
- cosine_accuracy@100
|
115 |
+
- cosine_accuracy@150
|
116 |
+
- cosine_accuracy@200
|
117 |
+
- cosine_precision@1
|
118 |
+
- cosine_precision@20
|
119 |
+
- cosine_precision@50
|
120 |
+
- cosine_precision@100
|
121 |
+
- cosine_precision@150
|
122 |
+
- cosine_precision@200
|
123 |
+
- cosine_recall@1
|
124 |
+
- cosine_recall@20
|
125 |
+
- cosine_recall@50
|
126 |
+
- cosine_recall@100
|
127 |
+
- cosine_recall@150
|
128 |
+
- cosine_recall@200
|
129 |
+
- cosine_ndcg@1
|
130 |
+
- cosine_ndcg@20
|
131 |
+
- cosine_ndcg@50
|
132 |
+
- cosine_ndcg@100
|
133 |
+
- cosine_ndcg@150
|
134 |
+
- cosine_ndcg@200
|
135 |
+
- cosine_mrr@1
|
136 |
+
- cosine_mrr@20
|
137 |
+
- cosine_mrr@50
|
138 |
+
- cosine_mrr@100
|
139 |
+
- cosine_mrr@150
|
140 |
+
- cosine_mrr@200
|
141 |
+
- cosine_map@1
|
142 |
+
- cosine_map@20
|
143 |
+
- cosine_map@50
|
144 |
+
- cosine_map@100
|
145 |
+
- cosine_map@150
|
146 |
+
- cosine_map@200
|
147 |
+
- cosine_map@500
|
148 |
+
model-index:
|
149 |
+
- name: SentenceTransformer based on BAAI/bge-large-en-v1.5
|
150 |
+
results:
|
151 |
+
- task:
|
152 |
+
type: information-retrieval
|
153 |
+
name: Information Retrieval
|
154 |
+
dataset:
|
155 |
+
name: full en
|
156 |
+
type: full_en
|
157 |
+
metrics:
|
158 |
+
- type: cosine_accuracy@1
|
159 |
+
value: 0.7368421052631579
|
160 |
+
name: Cosine Accuracy@1
|
161 |
+
- type: cosine_accuracy@20
|
162 |
+
value: 0.993421052631579
|
163 |
+
name: Cosine Accuracy@20
|
164 |
+
- type: cosine_accuracy@50
|
165 |
+
value: 0.9967105263157895
|
166 |
+
name: Cosine Accuracy@50
|
167 |
+
- type: cosine_accuracy@100
|
168 |
+
value: 1.0
|
169 |
+
name: Cosine Accuracy@100
|
170 |
+
- type: cosine_accuracy@150
|
171 |
+
value: 1.0
|
172 |
+
name: Cosine Accuracy@150
|
173 |
+
- type: cosine_accuracy@200
|
174 |
+
value: 1.0
|
175 |
+
name: Cosine Accuracy@200
|
176 |
+
- type: cosine_precision@1
|
177 |
+
value: 0.7368421052631579
|
178 |
+
name: Cosine Precision@1
|
179 |
+
- type: cosine_precision@20
|
180 |
+
value: 0.4947368421052632
|
181 |
+
name: Cosine Precision@20
|
182 |
+
- type: cosine_precision@50
|
183 |
+
value: 0.3913815789473684
|
184 |
+
name: Cosine Precision@50
|
185 |
+
- type: cosine_precision@100
|
186 |
+
value: 0.30953947368421053
|
187 |
+
name: Cosine Precision@100
|
188 |
+
- type: cosine_precision@150
|
189 |
+
value: 0.26460526315789473
|
190 |
+
name: Cosine Precision@150
|
191 |
+
- type: cosine_precision@200
|
192 |
+
value: 0.23187500000000003
|
193 |
+
name: Cosine Precision@200
|
194 |
+
- type: cosine_recall@1
|
195 |
+
value: 0.010305566449078924
|
196 |
+
name: Cosine Recall@1
|
197 |
+
- type: cosine_recall@20
|
198 |
+
value: 0.13233275450376297
|
199 |
+
name: Cosine Recall@20
|
200 |
+
- type: cosine_recall@50
|
201 |
+
value: 0.2535655251683108
|
202 |
+
name: Cosine Recall@50
|
203 |
+
- type: cosine_recall@100
|
204 |
+
value: 0.3922740640225546
|
205 |
+
name: Cosine Recall@100
|
206 |
+
- type: cosine_recall@150
|
207 |
+
value: 0.4949163913773604
|
208 |
+
name: Cosine Recall@150
|
209 |
+
- type: cosine_recall@200
|
210 |
+
value: 0.572041877895568
|
211 |
+
name: Cosine Recall@200
|
212 |
+
- type: cosine_ndcg@1
|
213 |
+
value: 0.7368421052631579
|
214 |
+
name: Cosine Ndcg@1
|
215 |
+
- type: cosine_ndcg@20
|
216 |
+
value: 0.5353264293739176
|
217 |
+
name: Cosine Ndcg@20
|
218 |
+
- type: cosine_ndcg@50
|
219 |
+
value: 0.44939083758113085
|
220 |
+
name: Cosine Ndcg@50
|
221 |
+
- type: cosine_ndcg@100
|
222 |
+
value: 0.4408521323246635
|
223 |
+
name: Cosine Ndcg@100
|
224 |
+
- type: cosine_ndcg@150
|
225 |
+
value: 0.48834055710549873
|
226 |
+
name: Cosine Ndcg@150
|
227 |
+
- type: cosine_ndcg@200
|
228 |
+
value: 0.5341206282180626
|
229 |
+
name: Cosine Ndcg@200
|
230 |
+
- type: cosine_mrr@1
|
231 |
+
value: 0.7368421052631579
|
232 |
+
name: Cosine Mrr@1
|
233 |
+
- type: cosine_mrr@20
|
234 |
+
value: 0.837172357978279
|
235 |
+
name: Cosine Mrr@20
|
236 |
+
- type: cosine_mrr@50
|
237 |
+
value: 0.837328999582289
|
238 |
+
name: Cosine Mrr@50
|
239 |
+
- type: cosine_mrr@100
|
240 |
+
value: 0.8373899157616261
|
241 |
+
name: Cosine Mrr@100
|
242 |
+
- type: cosine_mrr@150
|
243 |
+
value: 0.8373899157616261
|
244 |
+
name: Cosine Mrr@150
|
245 |
+
- type: cosine_mrr@200
|
246 |
+
value: 0.8373899157616261
|
247 |
+
name: Cosine Mrr@200
|
248 |
+
- type: cosine_map@1
|
249 |
+
value: 0.7368421052631579
|
250 |
+
name: Cosine Map@1
|
251 |
+
- type: cosine_map@20
|
252 |
+
value: 0.34076177455520346
|
253 |
+
name: Cosine Map@20
|
254 |
+
- type: cosine_map@50
|
255 |
+
value: 0.23677969810249233
|
256 |
+
name: Cosine Map@50
|
257 |
+
- type: cosine_map@100
|
258 |
+
value: 0.2101198919267321
|
259 |
+
name: Cosine Map@100
|
260 |
+
- type: cosine_map@150
|
261 |
+
value: 0.2276536266469315
|
262 |
+
name: Cosine Map@150
|
263 |
+
- type: cosine_map@200
|
264 |
+
value: 0.24678319516569472
|
265 |
+
name: Cosine Map@200
|
266 |
+
- type: cosine_map@500
|
267 |
+
value: 0.297249372287514
|
268 |
+
name: Cosine Map@500
|
269 |
+
---
|
270 |
+
|
271 |
+
# SentenceTransformer based on BAAI/bge-large-en-v1.5
|
272 |
+
|
273 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
274 |
+
|
275 |
+
## Model Details
|
276 |
+
|
277 |
+
### Model Description
|
278 |
+
- **Model Type:** Sentence Transformer
|
279 |
+
- **Base model:** [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) <!-- at revision d4aa6901d3a41ba39fb536a557fa166f842b0e09 -->
|
280 |
+
- **Maximum Sequence Length:** 256 tokens
|
281 |
+
- **Output Dimensionality:** 1024 dimensions
|
282 |
+
- **Similarity Function:** Cosine Similarity
|
283 |
+
<!-- - **Training Dataset:** Unknown -->
|
284 |
+
<!-- - **Language:** Unknown -->
|
285 |
+
<!-- - **License:** Unknown -->
|
286 |
+
|
287 |
+
### Model Sources
|
288 |
+
|
289 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
290 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
291 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
292 |
+
|
293 |
+
### Full Model Architecture
|
294 |
+
|
295 |
+
```
|
296 |
+
SentenceTransformer(
|
297 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': True}) with Transformer model: BertModel
|
298 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
299 |
+
(2): Normalize()
|
300 |
+
)
|
301 |
+
```
|
302 |
+
|
303 |
+
## Usage
|
304 |
+
|
305 |
+
### Direct Usage (Sentence Transformers)
|
306 |
+
|
307 |
+
First install the Sentence Transformers library:
|
308 |
+
|
309 |
+
```bash
|
310 |
+
pip install -U sentence-transformers
|
311 |
+
```
|
312 |
+
|
313 |
+
Then you can load this model and run inference.
|
314 |
+
```python
|
315 |
+
from sentence_transformers import SentenceTransformer
|
316 |
+
|
317 |
+
# Download from the 🤗 Hub
|
318 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
319 |
+
# Run inference
|
320 |
+
sentences = [
|
321 |
+
'For roles such as import/export manager, graduate export manager, senior export manager, and other related positions in meat and meat products, the key skills include a strong understanding of international trade regulations, meat product knowledge, customs compliance, and excellent negotiation and communication skills to manage global supply chains effectively. Additionally, proficiency in relevant trade software and languages can be highly beneficial.',
|
322 |
+
'Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager require skills in conducting performance measurement and organizing or managing conversion testing to ensure software and systems meet performance standards and function correctly in real-world scenarios.',
|
323 |
+
'Job roles that require skills such as managing staff, coordinating employees, and performing HR activities include Human Resources Managers, Team Leaders, Supervisors, and Department Heads, all of whom are responsible for overseeing personnel, implementing HR policies, and ensuring efficient team operations.',
|
324 |
+
]
|
325 |
+
embeddings = model.encode(sentences)
|
326 |
+
print(embeddings.shape)
|
327 |
+
# [3, 1024]
|
328 |
+
|
329 |
+
# Get the similarity scores for the embeddings
|
330 |
+
similarities = model.similarity(embeddings, embeddings)
|
331 |
+
print(similarities.shape)
|
332 |
+
# [3, 3]
|
333 |
+
```
|
334 |
+
|
335 |
+
<!--
|
336 |
+
### Direct Usage (Transformers)
|
337 |
+
|
338 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
339 |
+
|
340 |
+
</details>
|
341 |
+
-->
|
342 |
+
|
343 |
+
<!--
|
344 |
+
### Downstream Usage (Sentence Transformers)
|
345 |
+
|
346 |
+
You can finetune this model on your own dataset.
|
347 |
+
|
348 |
+
<details><summary>Click to expand</summary>
|
349 |
+
|
350 |
+
</details>
|
351 |
+
-->
|
352 |
+
|
353 |
+
<!--
|
354 |
+
### Out-of-Scope Use
|
355 |
+
|
356 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
357 |
+
-->
|
358 |
+
|
359 |
+
## Evaluation
|
360 |
+
|
361 |
+
### Metrics
|
362 |
+
|
363 |
+
#### Information Retrieval
|
364 |
+
|
365 |
+
* Dataset: `full_en`
|
366 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
367 |
+
|
368 |
+
| Metric | Value |
|
369 |
+
|:---------------------|:-----------|
|
370 |
+
| cosine_accuracy@1 | 0.7368 |
|
371 |
+
| cosine_accuracy@20 | 0.9934 |
|
372 |
+
| cosine_accuracy@50 | 0.9967 |
|
373 |
+
| cosine_accuracy@100 | 1.0 |
|
374 |
+
| cosine_accuracy@150 | 1.0 |
|
375 |
+
| cosine_accuracy@200 | 1.0 |
|
376 |
+
| cosine_precision@1 | 0.7368 |
|
377 |
+
| cosine_precision@20 | 0.4947 |
|
378 |
+
| cosine_precision@50 | 0.3914 |
|
379 |
+
| cosine_precision@100 | 0.3095 |
|
380 |
+
| cosine_precision@150 | 0.2646 |
|
381 |
+
| cosine_precision@200 | 0.2319 |
|
382 |
+
| cosine_recall@1 | 0.0103 |
|
383 |
+
| cosine_recall@20 | 0.1323 |
|
384 |
+
| cosine_recall@50 | 0.2536 |
|
385 |
+
| cosine_recall@100 | 0.3923 |
|
386 |
+
| cosine_recall@150 | 0.4949 |
|
387 |
+
| cosine_recall@200 | 0.572 |
|
388 |
+
| cosine_ndcg@1 | 0.7368 |
|
389 |
+
| cosine_ndcg@20 | 0.5353 |
|
390 |
+
| cosine_ndcg@50 | 0.4494 |
|
391 |
+
| cosine_ndcg@100 | 0.4409 |
|
392 |
+
| cosine_ndcg@150 | 0.4883 |
|
393 |
+
| **cosine_ndcg@200** | **0.5341** |
|
394 |
+
| cosine_mrr@1 | 0.7368 |
|
395 |
+
| cosine_mrr@20 | 0.8372 |
|
396 |
+
| cosine_mrr@50 | 0.8373 |
|
397 |
+
| cosine_mrr@100 | 0.8374 |
|
398 |
+
| cosine_mrr@150 | 0.8374 |
|
399 |
+
| cosine_mrr@200 | 0.8374 |
|
400 |
+
| cosine_map@1 | 0.7368 |
|
401 |
+
| cosine_map@20 | 0.3408 |
|
402 |
+
| cosine_map@50 | 0.2368 |
|
403 |
+
| cosine_map@100 | 0.2101 |
|
404 |
+
| cosine_map@150 | 0.2277 |
|
405 |
+
| cosine_map@200 | 0.2468 |
|
406 |
+
| cosine_map@500 | 0.2972 |
|
407 |
+
|
408 |
+
<!--
|
409 |
+
## Bias, Risks and Limitations
|
410 |
+
|
411 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
412 |
+
-->
|
413 |
+
|
414 |
+
<!--
|
415 |
+
### Recommendations
|
416 |
+
|
417 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
418 |
+
-->
|
419 |
+
|
420 |
+
## Training Details
|
421 |
+
|
422 |
+
### Training Dataset
|
423 |
+
|
424 |
+
#### Unnamed Dataset
|
425 |
+
|
426 |
+
* Size: 114,699 training samples
|
427 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
428 |
+
* Approximate statistics based on the first 1000 samples:
|
429 |
+
| | anchor | positive |
|
430 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
431 |
+
| type | string | string |
|
432 |
+
| details | <ul><li>min: 43 tokens</li><li>mean: 65.45 tokens</li><li>max: 116 tokens</li></ul> | <ul><li>min: 34 tokens</li><li>mean: 55.34 tokens</li><li>max: 162 tokens</li></ul> |
|
433 |
+
* Samples:
|
434 |
+
| anchor | positive |
|
435 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
436 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require promoting health and safety include occupational health and safety specialists, safety managers, and public health educators, all of whom work to ensure safe and healthy environments in workplaces and communities.</code> |
|
437 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require organizing rehearsals include directors, choreographers, and conductors in theater, dance, and music ensembles, who must efficiently plan and schedule practice sessions to prepare performers for a successful final performance.</code> |
|
438 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles such as Health and Safety Managers, Environmental Health Officers, and Risk Management Specialists often require the skill of negotiating health and safety issues with third parties to ensure compliance and protection standards are met across different organizations and sites.</code> |
|
439 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
440 |
+
```json
|
441 |
+
{'guide': SentenceTransformer(
|
442 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
443 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
444 |
+
(2): Normalize()
|
445 |
+
), 'temperature': 0.01, 'mini_batch_size': 32, 'margin_strategy': 'absolute', 'margin': 0.0}
|
446 |
+
```
|
447 |
+
|
448 |
+
### Training Hyperparameters
|
449 |
+
#### Non-Default Hyperparameters
|
450 |
+
|
451 |
+
- `eval_strategy`: steps
|
452 |
+
- `per_device_train_batch_size`: 64
|
453 |
+
- `per_device_eval_batch_size`: 128
|
454 |
+
- `gradient_accumulation_steps`: 2
|
455 |
+
- `num_train_epochs`: 5
|
456 |
+
- `warmup_ratio`: 0.05
|
457 |
+
- `log_on_each_node`: False
|
458 |
+
- `fp16`: True
|
459 |
+
- `dataloader_num_workers`: 4
|
460 |
+
- `ddp_find_unused_parameters`: True
|
461 |
+
- `batch_sampler`: no_duplicates
|
462 |
+
|
463 |
+
#### All Hyperparameters
|
464 |
+
<details><summary>Click to expand</summary>
|
465 |
+
|
466 |
+
- `overwrite_output_dir`: False
|
467 |
+
- `do_predict`: False
|
468 |
+
- `eval_strategy`: steps
|
469 |
+
- `prediction_loss_only`: True
|
470 |
+
- `per_device_train_batch_size`: 64
|
471 |
+
- `per_device_eval_batch_size`: 128
|
472 |
+
- `per_gpu_train_batch_size`: None
|
473 |
+
- `per_gpu_eval_batch_size`: None
|
474 |
+
- `gradient_accumulation_steps`: 2
|
475 |
+
- `eval_accumulation_steps`: None
|
476 |
+
- `torch_empty_cache_steps`: None
|
477 |
+
- `learning_rate`: 5e-05
|
478 |
+
- `weight_decay`: 0.0
|
479 |
+
- `adam_beta1`: 0.9
|
480 |
+
- `adam_beta2`: 0.999
|
481 |
+
- `adam_epsilon`: 1e-08
|
482 |
+
- `max_grad_norm`: 1.0
|
483 |
+
- `num_train_epochs`: 5
|
484 |
+
- `max_steps`: -1
|
485 |
+
- `lr_scheduler_type`: linear
|
486 |
+
- `lr_scheduler_kwargs`: {}
|
487 |
+
- `warmup_ratio`: 0.05
|
488 |
+
- `warmup_steps`: 0
|
489 |
+
- `log_level`: passive
|
490 |
+
- `log_level_replica`: warning
|
491 |
+
- `log_on_each_node`: False
|
492 |
+
- `logging_nan_inf_filter`: True
|
493 |
+
- `save_safetensors`: True
|
494 |
+
- `save_on_each_node`: False
|
495 |
+
- `save_only_model`: False
|
496 |
+
- `restore_callback_states_from_checkpoint`: False
|
497 |
+
- `no_cuda`: False
|
498 |
+
- `use_cpu`: False
|
499 |
+
- `use_mps_device`: False
|
500 |
+
- `seed`: 42
|
501 |
+
- `data_seed`: None
|
502 |
+
- `jit_mode_eval`: False
|
503 |
+
- `use_ipex`: False
|
504 |
+
- `bf16`: False
|
505 |
+
- `fp16`: True
|
506 |
+
- `fp16_opt_level`: O1
|
507 |
+
- `half_precision_backend`: auto
|
508 |
+
- `bf16_full_eval`: False
|
509 |
+
- `fp16_full_eval`: False
|
510 |
+
- `tf32`: None
|
511 |
+
- `local_rank`: 0
|
512 |
+
- `ddp_backend`: None
|
513 |
+
- `tpu_num_cores`: None
|
514 |
+
- `tpu_metrics_debug`: False
|
515 |
+
- `debug`: []
|
516 |
+
- `dataloader_drop_last`: True
|
517 |
+
- `dataloader_num_workers`: 4
|
518 |
+
- `dataloader_prefetch_factor`: None
|
519 |
+
- `past_index`: -1
|
520 |
+
- `disable_tqdm`: False
|
521 |
+
- `remove_unused_columns`: True
|
522 |
+
- `label_names`: None
|
523 |
+
- `load_best_model_at_end`: False
|
524 |
+
- `ignore_data_skip`: False
|
525 |
+
- `fsdp`: []
|
526 |
+
- `fsdp_min_num_params`: 0
|
527 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
528 |
+
- `tp_size`: 0
|
529 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
530 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
531 |
+
- `deepspeed`: None
|
532 |
+
- `label_smoothing_factor`: 0.0
|
533 |
+
- `optim`: adamw_torch
|
534 |
+
- `optim_args`: None
|
535 |
+
- `adafactor`: False
|
536 |
+
- `group_by_length`: False
|
537 |
+
- `length_column_name`: length
|
538 |
+
- `ddp_find_unused_parameters`: True
|
539 |
+
- `ddp_bucket_cap_mb`: None
|
540 |
+
- `ddp_broadcast_buffers`: False
|
541 |
+
- `dataloader_pin_memory`: True
|
542 |
+
- `dataloader_persistent_workers`: False
|
543 |
+
- `skip_memory_metrics`: True
|
544 |
+
- `use_legacy_prediction_loop`: False
|
545 |
+
- `push_to_hub`: False
|
546 |
+
- `resume_from_checkpoint`: None
|
547 |
+
- `hub_model_id`: None
|
548 |
+
- `hub_strategy`: every_save
|
549 |
+
- `hub_private_repo`: None
|
550 |
+
- `hub_always_push`: False
|
551 |
+
- `gradient_checkpointing`: False
|
552 |
+
- `gradient_checkpointing_kwargs`: None
|
553 |
+
- `include_inputs_for_metrics`: False
|
554 |
+
- `include_for_metrics`: []
|
555 |
+
- `eval_do_concat_batches`: True
|
556 |
+
- `fp16_backend`: auto
|
557 |
+
- `push_to_hub_model_id`: None
|
558 |
+
- `push_to_hub_organization`: None
|
559 |
+
- `mp_parameters`:
|
560 |
+
- `auto_find_batch_size`: False
|
561 |
+
- `full_determinism`: False
|
562 |
+
- `torchdynamo`: None
|
563 |
+
- `ray_scope`: last
|
564 |
+
- `ddp_timeout`: 1800
|
565 |
+
- `torch_compile`: False
|
566 |
+
- `torch_compile_backend`: None
|
567 |
+
- `torch_compile_mode`: None
|
568 |
+
- `include_tokens_per_second`: False
|
569 |
+
- `include_num_input_tokens_seen`: False
|
570 |
+
- `neftune_noise_alpha`: None
|
571 |
+
- `optim_target_modules`: None
|
572 |
+
- `batch_eval_metrics`: False
|
573 |
+
- `eval_on_start`: False
|
574 |
+
- `use_liger_kernel`: False
|
575 |
+
- `eval_use_gather_object`: False
|
576 |
+
- `average_tokens_across_devices`: False
|
577 |
+
- `prompts`: None
|
578 |
+
- `batch_sampler`: no_duplicates
|
579 |
+
- `multi_dataset_batch_sampler`: proportional
|
580 |
+
|
581 |
+
</details>
|
582 |
+
|
583 |
+
### Training Logs
|
584 |
+
| Epoch | Step | Training Loss | full_en_cosine_ndcg@200 |
|
585 |
+
|:------:|:----:|:-------------:|:-----------------------:|
|
586 |
+
| -1 | -1 | - | 0.4784 |
|
587 |
+
| 0.0011 | 1 | 9.119 | - |
|
588 |
+
| 0.1116 | 100 | 4.1469 | - |
|
589 |
+
| 0.2232 | 200 | 2.5294 | 0.5362 |
|
590 |
+
| 0.3348 | 300 | 2.3611 | - |
|
591 |
+
| 0.4464 | 400 | 2.192 | 0.5318 |
|
592 |
+
| 0.5580 | 500 | 2.0338 | - |
|
593 |
+
| 0.6696 | 600 | 1.9009 | 0.5383 |
|
594 |
+
| 0.7812 | 700 | 1.8404 | - |
|
595 |
+
| 0.8929 | 800 | 1.7692 | 0.5352 |
|
596 |
+
| 1.0045 | 900 | 1.6921 | - |
|
597 |
+
| 1.1161 | 1000 | 1.3861 | 0.5368 |
|
598 |
+
| 1.2277 | 1100 | 1.3863 | - |
|
599 |
+
| 1.3393 | 1200 | 1.3546 | 0.5259 |
|
600 |
+
| 1.4509 | 1300 | 1.373 | - |
|
601 |
+
| 1.5625 | 1400 | 1.3364 | 0.5303 |
|
602 |
+
| 1.6741 | 1500 | 1.2876 | - |
|
603 |
+
| 1.7857 | 1600 | 1.3094 | 0.5323 |
|
604 |
+
| 1.8973 | 1700 | 1.2784 | - |
|
605 |
+
| 2.0089 | 1800 | 1.2204 | 0.5330 |
|
606 |
+
| 2.1205 | 1900 | 0.9617 | - |
|
607 |
+
| 2.2321 | 2000 | 1.0004 | 0.5277 |
|
608 |
+
| 2.3438 | 2100 | 0.9694 | - |
|
609 |
+
| 2.4554 | 2200 | 0.9843 | 0.5356 |
|
610 |
+
| 2.5670 | 2300 | 0.9743 | - |
|
611 |
+
| 2.6786 | 2400 | 0.9252 | 0.5320 |
|
612 |
+
| 2.7902 | 2500 | 0.9272 | - |
|
613 |
+
| 2.9018 | 2600 | 0.9279 | 0.5333 |
|
614 |
+
| 3.0134 | 2700 | 0.857 | - |
|
615 |
+
| 3.125 | 2800 | 0.7313 | 0.5300 |
|
616 |
+
| 3.2366 | 2900 | 0.7103 | - |
|
617 |
+
| 3.3482 | 3000 | 0.7187 | 0.5319 |
|
618 |
+
| 3.4598 | 3100 | 0.7067 | - |
|
619 |
+
| 3.5714 | 3200 | 0.7157 | 0.5369 |
|
620 |
+
| 3.6830 | 3300 | 0.7113 | - |
|
621 |
+
| 3.7946 | 3400 | 0.7013 | 0.5341 |
|
622 |
+
| 3.9062 | 3500 | 0.6903 | - |
|
623 |
+
| 4.0179 | 3600 | 0.6462 | 0.5335 |
|
624 |
+
| 4.1295 | 3700 | 0.5162 | - |
|
625 |
+
| 4.2411 | 3800 | 0.524 | 0.5352 |
|
626 |
+
| 4.3527 | 3900 | 0.5303 | - |
|
627 |
+
| 4.4643 | 4000 | 0.5269 | 0.5341 |
|
628 |
+
|
629 |
+
|
630 |
+
### Framework Versions
|
631 |
+
- Python: 3.11.11
|
632 |
+
- Sentence Transformers: 4.1.0
|
633 |
+
- Transformers: 4.51.2
|
634 |
+
- PyTorch: 2.6.0+cu124
|
635 |
+
- Accelerate: 1.6.0
|
636 |
+
- Datasets: 3.5.0
|
637 |
+
- Tokenizers: 0.21.1
|
638 |
+
|
639 |
+
## Citation
|
640 |
+
|
641 |
+
### BibTeX
|
642 |
+
|
643 |
+
#### Sentence Transformers
|
644 |
+
```bibtex
|
645 |
+
@inproceedings{reimers-2019-sentence-bert,
|
646 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
647 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
648 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
649 |
+
month = "11",
|
650 |
+
year = "2019",
|
651 |
+
publisher = "Association for Computational Linguistics",
|
652 |
+
url = "https://arxiv.org/abs/1908.10084",
|
653 |
+
}
|
654 |
+
```
|
655 |
+
|
656 |
+
<!--
|
657 |
+
## Glossary
|
658 |
+
|
659 |
+
*Clearly define terms in order to be accessible across audiences.*
|
660 |
+
-->
|
661 |
+
|
662 |
+
<!--
|
663 |
+
## Model Card Authors
|
664 |
+
|
665 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
666 |
+
-->
|
667 |
+
|
668 |
+
<!--
|
669 |
+
## Model Card Contact
|
670 |
+
|
671 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
672 |
+
-->
|
checkpoint-4000/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 1024,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 4096,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 16,
|
23 |
+
"num_hidden_layers": 24,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.51.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
checkpoint-4000/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
checkpoint-4000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:124c507a5c0f750f109aa5f6ce6ff1f06d837c95292222256a33411a7baedb2a
|
3 |
+
size 15894
|
checkpoint-4000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5af3d83b87aaf7159318fcbdc059a07a456e6cb33dcd0bb2861b468a27531136
|
3 |
+
size 988
|
checkpoint-4000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dce29165368521d1668b15eb8f526de9e14c51abee383465aaf2f9bb2e679347
|
3 |
+
size 1064
|
checkpoint-4000/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-4000/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
checkpoint-4000/trainer_state.json
ADDED
@@ -0,0 +1,1221 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": null,
|
3 |
+
"best_metric": null,
|
4 |
+
"best_model_checkpoint": null,
|
5 |
+
"epoch": 4.464285714285714,
|
6 |
+
"eval_steps": 200,
|
7 |
+
"global_step": 4000,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.0011160714285714285,
|
14 |
+
"grad_norm": NaN,
|
15 |
+
"learning_rate": 0.0,
|
16 |
+
"loss": 9.119,
|
17 |
+
"step": 1
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.11160714285714286,
|
21 |
+
"grad_norm": 17.632238388061523,
|
22 |
+
"learning_rate": 2.1428571428571428e-05,
|
23 |
+
"loss": 4.1469,
|
24 |
+
"step": 100
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.22321428571428573,
|
28 |
+
"grad_norm": 14.409270286560059,
|
29 |
+
"learning_rate": 4.375e-05,
|
30 |
+
"loss": 2.5294,
|
31 |
+
"step": 200
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 0.22321428571428573,
|
35 |
+
"eval_full_en_cosine_accuracy@1": 0.7467105263157895,
|
36 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
37 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
38 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
39 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
40 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
41 |
+
"eval_full_en_cosine_map@1": 0.7467105263157895,
|
42 |
+
"eval_full_en_cosine_map@100": 0.2121058701298033,
|
43 |
+
"eval_full_en_cosine_map@150": 0.2294109301872967,
|
44 |
+
"eval_full_en_cosine_map@20": 0.34167650006204187,
|
45 |
+
"eval_full_en_cosine_map@200": 0.2492171685943861,
|
46 |
+
"eval_full_en_cosine_map@50": 0.237336657426832,
|
47 |
+
"eval_full_en_cosine_map@500": 0.3000288940307502,
|
48 |
+
"eval_full_en_cosine_mrr@1": 0.7467105263157895,
|
49 |
+
"eval_full_en_cosine_mrr@100": 0.8460592769803298,
|
50 |
+
"eval_full_en_cosine_mrr@150": 0.8460592769803298,
|
51 |
+
"eval_full_en_cosine_mrr@20": 0.8458948032961192,
|
52 |
+
"eval_full_en_cosine_mrr@200": 0.8460592769803298,
|
53 |
+
"eval_full_en_cosine_mrr@50": 0.8460122844991269,
|
54 |
+
"eval_full_en_cosine_ndcg@1": 0.7467105263157895,
|
55 |
+
"eval_full_en_cosine_ndcg@100": 0.4430509248084704,
|
56 |
+
"eval_full_en_cosine_ndcg@150": 0.4894828917681416,
|
57 |
+
"eval_full_en_cosine_ndcg@20": 0.5367541274871807,
|
58 |
+
"eval_full_en_cosine_ndcg@200": 0.5361903606133726,
|
59 |
+
"eval_full_en_cosine_ndcg@50": 0.448683811733402,
|
60 |
+
"eval_full_en_cosine_precision@1": 0.7467105263157895,
|
61 |
+
"eval_full_en_cosine_precision@100": 0.31240131578947367,
|
62 |
+
"eval_full_en_cosine_precision@150": 0.26592105263157895,
|
63 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
64 |
+
"eval_full_en_cosine_precision@200": 0.23370065789473685,
|
65 |
+
"eval_full_en_cosine_precision@50": 0.3904605263157895,
|
66 |
+
"eval_full_en_cosine_recall@1": 0.010753343030902496,
|
67 |
+
"eval_full_en_cosine_recall@100": 0.39446255566624855,
|
68 |
+
"eval_full_en_cosine_recall@150": 0.49544823712709557,
|
69 |
+
"eval_full_en_cosine_recall@20": 0.13279013317825217,
|
70 |
+
"eval_full_en_cosine_recall@200": 0.5739614992682516,
|
71 |
+
"eval_full_en_cosine_recall@50": 0.25254843470147753,
|
72 |
+
"eval_runtime": 1.5828,
|
73 |
+
"eval_samples_per_second": 0.0,
|
74 |
+
"eval_sequential_score": 0.5361903606133726,
|
75 |
+
"eval_steps_per_second": 0.0,
|
76 |
+
"step": 200
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 0.33482142857142855,
|
80 |
+
"grad_norm": 16.260934829711914,
|
81 |
+
"learning_rate": 4.915413533834587e-05,
|
82 |
+
"loss": 2.3611,
|
83 |
+
"step": 300
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 0.44642857142857145,
|
87 |
+
"grad_norm": 13.242988586425781,
|
88 |
+
"learning_rate": 4.797932330827068e-05,
|
89 |
+
"loss": 2.192,
|
90 |
+
"step": 400
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"epoch": 0.44642857142857145,
|
94 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
95 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
96 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
97 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
98 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
99 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
100 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
101 |
+
"eval_full_en_cosine_map@100": 0.2088144416212806,
|
102 |
+
"eval_full_en_cosine_map@150": 0.22677217670719133,
|
103 |
+
"eval_full_en_cosine_map@20": 0.3349832137166454,
|
104 |
+
"eval_full_en_cosine_map@200": 0.245946497368659,
|
105 |
+
"eval_full_en_cosine_map@50": 0.23473921202287384,
|
106 |
+
"eval_full_en_cosine_map@500": 0.2973985707303743,
|
107 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
108 |
+
"eval_full_en_cosine_mrr@100": 0.8394156306336016,
|
109 |
+
"eval_full_en_cosine_mrr@150": 0.8394156306336016,
|
110 |
+
"eval_full_en_cosine_mrr@20": 0.8392713554720135,
|
111 |
+
"eval_full_en_cosine_mrr@200": 0.8394156306336016,
|
112 |
+
"eval_full_en_cosine_mrr@50": 0.8393810045948205,
|
113 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
114 |
+
"eval_full_en_cosine_ndcg@100": 0.43855475512592684,
|
115 |
+
"eval_full_en_cosine_ndcg@150": 0.48609390907359196,
|
116 |
+
"eval_full_en_cosine_ndcg@20": 0.5288083416910968,
|
117 |
+
"eval_full_en_cosine_ndcg@200": 0.5318117937684201,
|
118 |
+
"eval_full_en_cosine_ndcg@50": 0.4453338982563473,
|
119 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
120 |
+
"eval_full_en_cosine_precision@100": 0.3088157894736842,
|
121 |
+
"eval_full_en_cosine_precision@150": 0.2644517543859649,
|
122 |
+
"eval_full_en_cosine_precision@20": 0.4875,
|
123 |
+
"eval_full_en_cosine_precision@200": 0.23172697368421055,
|
124 |
+
"eval_full_en_cosine_precision@50": 0.38782894736842105,
|
125 |
+
"eval_full_en_cosine_recall@1": 0.010619007443519193,
|
126 |
+
"eval_full_en_cosine_recall@100": 0.3902042311088277,
|
127 |
+
"eval_full_en_cosine_recall@150": 0.4925745165667779,
|
128 |
+
"eval_full_en_cosine_recall@20": 0.1301764615450556,
|
129 |
+
"eval_full_en_cosine_recall@200": 0.5696006364444781,
|
130 |
+
"eval_full_en_cosine_recall@50": 0.2518199886564403,
|
131 |
+
"eval_runtime": 1.5596,
|
132 |
+
"eval_samples_per_second": 0.0,
|
133 |
+
"eval_sequential_score": 0.5318117937684201,
|
134 |
+
"eval_steps_per_second": 0.0,
|
135 |
+
"step": 400
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.5580357142857143,
|
139 |
+
"grad_norm": 13.307888984680176,
|
140 |
+
"learning_rate": 4.680451127819549e-05,
|
141 |
+
"loss": 2.0338,
|
142 |
+
"step": 500
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.6696428571428571,
|
146 |
+
"grad_norm": 12.763930320739746,
|
147 |
+
"learning_rate": 4.56296992481203e-05,
|
148 |
+
"loss": 1.9009,
|
149 |
+
"step": 600
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.6696428571428571,
|
153 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
154 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
155 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
156 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
157 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
158 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
159 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
160 |
+
"eval_full_en_cosine_map@100": 0.2146410944227793,
|
161 |
+
"eval_full_en_cosine_map@150": 0.23271596511985665,
|
162 |
+
"eval_full_en_cosine_map@20": 0.3429678297332613,
|
163 |
+
"eval_full_en_cosine_map@200": 0.2520997707361607,
|
164 |
+
"eval_full_en_cosine_map@50": 0.2404899713826549,
|
165 |
+
"eval_full_en_cosine_map@500": 0.302904619520322,
|
166 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
167 |
+
"eval_full_en_cosine_mrr@100": 0.8306572094298247,
|
168 |
+
"eval_full_en_cosine_mrr@150": 0.8306572094298247,
|
169 |
+
"eval_full_en_cosine_mrr@20": 0.8304491697994989,
|
170 |
+
"eval_full_en_cosine_mrr@200": 0.8306572094298247,
|
171 |
+
"eval_full_en_cosine_mrr@50": 0.8306058114035089,
|
172 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
173 |
+
"eval_full_en_cosine_ndcg@100": 0.4445617284976941,
|
174 |
+
"eval_full_en_cosine_ndcg@150": 0.4922393935902775,
|
175 |
+
"eval_full_en_cosine_ndcg@20": 0.5357880041966661,
|
176 |
+
"eval_full_en_cosine_ndcg@200": 0.5383209000398446,
|
177 |
+
"eval_full_en_cosine_ndcg@50": 0.4504820590447715,
|
178 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
179 |
+
"eval_full_en_cosine_precision@100": 0.31358552631578945,
|
180 |
+
"eval_full_en_cosine_precision@150": 0.2677412280701754,
|
181 |
+
"eval_full_en_cosine_precision@20": 0.49720394736842105,
|
182 |
+
"eval_full_en_cosine_precision@200": 0.23452302631578953,
|
183 |
+
"eval_full_en_cosine_precision@50": 0.3932894736842105,
|
184 |
+
"eval_full_en_cosine_recall@1": 0.010303516134180577,
|
185 |
+
"eval_full_en_cosine_recall@100": 0.3970033142271577,
|
186 |
+
"eval_full_en_cosine_recall@150": 0.5001101850184368,
|
187 |
+
"eval_full_en_cosine_recall@20": 0.13302896177814508,
|
188 |
+
"eval_full_en_cosine_recall@200": 0.5777429812058247,
|
189 |
+
"eval_full_en_cosine_recall@50": 0.254528957048419,
|
190 |
+
"eval_runtime": 1.5616,
|
191 |
+
"eval_samples_per_second": 0.0,
|
192 |
+
"eval_sequential_score": 0.5383209000398446,
|
193 |
+
"eval_steps_per_second": 0.0,
|
194 |
+
"step": 600
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 0.78125,
|
198 |
+
"grad_norm": 13.439990997314453,
|
199 |
+
"learning_rate": 4.4454887218045117e-05,
|
200 |
+
"loss": 1.8404,
|
201 |
+
"step": 700
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 0.8928571428571429,
|
205 |
+
"grad_norm": 12.594465255737305,
|
206 |
+
"learning_rate": 4.3280075187969924e-05,
|
207 |
+
"loss": 1.7692,
|
208 |
+
"step": 800
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 0.8928571428571429,
|
212 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
213 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
214 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
215 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
216 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
217 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
218 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
219 |
+
"eval_full_en_cosine_map@100": 0.21030614519224017,
|
220 |
+
"eval_full_en_cosine_map@150": 0.22737063252522982,
|
221 |
+
"eval_full_en_cosine_map@20": 0.3442880676713117,
|
222 |
+
"eval_full_en_cosine_map@200": 0.24764067563282596,
|
223 |
+
"eval_full_en_cosine_map@50": 0.23827484272575025,
|
224 |
+
"eval_full_en_cosine_map@500": 0.2987091429260604,
|
225 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
226 |
+
"eval_full_en_cosine_mrr@100": 0.8404268619187053,
|
227 |
+
"eval_full_en_cosine_mrr@150": 0.8404268619187053,
|
228 |
+
"eval_full_en_cosine_mrr@20": 0.8402307852965749,
|
229 |
+
"eval_full_en_cosine_mrr@200": 0.8404268619187053,
|
230 |
+
"eval_full_en_cosine_mrr@50": 0.8403738058915406,
|
231 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
232 |
+
"eval_full_en_cosine_ndcg@100": 0.440670430732987,
|
233 |
+
"eval_full_en_cosine_ndcg@150": 0.486778222456143,
|
234 |
+
"eval_full_en_cosine_ndcg@20": 0.5383903905850532,
|
235 |
+
"eval_full_en_cosine_ndcg@200": 0.5352292016764449,
|
236 |
+
"eval_full_en_cosine_ndcg@50": 0.45046850998342597,
|
237 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
238 |
+
"eval_full_en_cosine_precision@100": 0.3099342105263158,
|
239 |
+
"eval_full_en_cosine_precision@150": 0.26390350877192986,
|
240 |
+
"eval_full_en_cosine_precision@20": 0.5,
|
241 |
+
"eval_full_en_cosine_precision@200": 0.23320723684210526,
|
242 |
+
"eval_full_en_cosine_precision@50": 0.39335526315789476,
|
243 |
+
"eval_full_en_cosine_recall@1": 0.01051277780149725,
|
244 |
+
"eval_full_en_cosine_recall@100": 0.39158535797000443,
|
245 |
+
"eval_full_en_cosine_recall@150": 0.4917399858788313,
|
246 |
+
"eval_full_en_cosine_recall@20": 0.13328036442285973,
|
247 |
+
"eval_full_en_cosine_recall@200": 0.5734492892933252,
|
248 |
+
"eval_full_en_cosine_recall@50": 0.254129727850083,
|
249 |
+
"eval_runtime": 1.5752,
|
250 |
+
"eval_samples_per_second": 0.0,
|
251 |
+
"eval_sequential_score": 0.5352292016764449,
|
252 |
+
"eval_steps_per_second": 0.0,
|
253 |
+
"step": 800
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 1.0044642857142858,
|
257 |
+
"grad_norm": 13.140974998474121,
|
258 |
+
"learning_rate": 4.212875939849624e-05,
|
259 |
+
"loss": 1.6921,
|
260 |
+
"step": 900
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 1.1160714285714286,
|
264 |
+
"grad_norm": 12.160736083984375,
|
265 |
+
"learning_rate": 4.096569548872181e-05,
|
266 |
+
"loss": 1.3861,
|
267 |
+
"step": 1000
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"epoch": 1.1160714285714286,
|
271 |
+
"eval_full_en_cosine_accuracy@1": 0.7401315789473685,
|
272 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
273 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
274 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
275 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
276 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
277 |
+
"eval_full_en_cosine_map@1": 0.7401315789473685,
|
278 |
+
"eval_full_en_cosine_map@100": 0.21155466872463927,
|
279 |
+
"eval_full_en_cosine_map@150": 0.2291636549745022,
|
280 |
+
"eval_full_en_cosine_map@20": 0.3373673798048492,
|
281 |
+
"eval_full_en_cosine_map@200": 0.24905074192004603,
|
282 |
+
"eval_full_en_cosine_map@50": 0.2376950112180141,
|
283 |
+
"eval_full_en_cosine_map@500": 0.3006802538137734,
|
284 |
+
"eval_full_en_cosine_mrr@1": 0.7401315789473685,
|
285 |
+
"eval_full_en_cosine_mrr@100": 0.8405236576289212,
|
286 |
+
"eval_full_en_cosine_mrr@150": 0.8405236576289212,
|
287 |
+
"eval_full_en_cosine_mrr@20": 0.8403143274853806,
|
288 |
+
"eval_full_en_cosine_mrr@200": 0.8405236576289212,
|
289 |
+
"eval_full_en_cosine_mrr@50": 0.840463849016481,
|
290 |
+
"eval_full_en_cosine_ndcg@1": 0.7401315789473685,
|
291 |
+
"eval_full_en_cosine_ndcg@100": 0.44212858816477746,
|
292 |
+
"eval_full_en_cosine_ndcg@150": 0.48946706445562127,
|
293 |
+
"eval_full_en_cosine_ndcg@20": 0.5332180756481385,
|
294 |
+
"eval_full_en_cosine_ndcg@200": 0.5367929588661781,
|
295 |
+
"eval_full_en_cosine_ndcg@50": 0.44979391873656477,
|
296 |
+
"eval_full_en_cosine_precision@1": 0.7401315789473685,
|
297 |
+
"eval_full_en_cosine_precision@100": 0.3114473684210526,
|
298 |
+
"eval_full_en_cosine_precision@150": 0.266469298245614,
|
299 |
+
"eval_full_en_cosine_precision@20": 0.49243421052631575,
|
300 |
+
"eval_full_en_cosine_precision@200": 0.2345888157894737,
|
301 |
+
"eval_full_en_cosine_precision@50": 0.3921052631578947,
|
302 |
+
"eval_full_en_cosine_recall@1": 0.010392607884295562,
|
303 |
+
"eval_full_en_cosine_recall@100": 0.3933254279416559,
|
304 |
+
"eval_full_en_cosine_recall@150": 0.4957503189606009,
|
305 |
+
"eval_full_en_cosine_recall@20": 0.13107623492706288,
|
306 |
+
"eval_full_en_cosine_recall@200": 0.5753954619760326,
|
307 |
+
"eval_full_en_cosine_recall@50": 0.2539746341397596,
|
308 |
+
"eval_runtime": 1.6397,
|
309 |
+
"eval_samples_per_second": 0.0,
|
310 |
+
"eval_sequential_score": 0.5367929588661781,
|
311 |
+
"eval_steps_per_second": 0.0,
|
312 |
+
"step": 1000
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"epoch": 1.2276785714285714,
|
316 |
+
"grad_norm": 13.078369140625,
|
317 |
+
"learning_rate": 3.9790883458646615e-05,
|
318 |
+
"loss": 1.3863,
|
319 |
+
"step": 1100
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"epoch": 1.3392857142857144,
|
323 |
+
"grad_norm": 11.990692138671875,
|
324 |
+
"learning_rate": 3.861607142857143e-05,
|
325 |
+
"loss": 1.3546,
|
326 |
+
"step": 1200
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"epoch": 1.3392857142857144,
|
330 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
331 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
332 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
333 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
334 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
335 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
336 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
337 |
+
"eval_full_en_cosine_map@100": 0.20552277525856266,
|
338 |
+
"eval_full_en_cosine_map@150": 0.22274311961933413,
|
339 |
+
"eval_full_en_cosine_map@20": 0.3363904557549852,
|
340 |
+
"eval_full_en_cosine_map@200": 0.24106738760441354,
|
341 |
+
"eval_full_en_cosine_map@50": 0.23370113464760453,
|
342 |
+
"eval_full_en_cosine_map@500": 0.28981293048421486,
|
343 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
344 |
+
"eval_full_en_cosine_mrr@100": 0.8322617799738206,
|
345 |
+
"eval_full_en_cosine_mrr@150": 0.8322617799738206,
|
346 |
+
"eval_full_en_cosine_mrr@20": 0.8320620443153339,
|
347 |
+
"eval_full_en_cosine_mrr@200": 0.8322617799738206,
|
348 |
+
"eval_full_en_cosine_mrr@50": 0.8322050649102997,
|
349 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
350 |
+
"eval_full_en_cosine_ndcg@100": 0.43445871937106545,
|
351 |
+
"eval_full_en_cosine_ndcg@150": 0.48130417146010107,
|
352 |
+
"eval_full_en_cosine_ndcg@20": 0.531477407982968,
|
353 |
+
"eval_full_en_cosine_ndcg@200": 0.5259375639543232,
|
354 |
+
"eval_full_en_cosine_ndcg@50": 0.4444057356887903,
|
355 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
356 |
+
"eval_full_en_cosine_precision@100": 0.3039802631578947,
|
357 |
+
"eval_full_en_cosine_precision@150": 0.25999999999999995,
|
358 |
+
"eval_full_en_cosine_precision@20": 0.4925986842105263,
|
359 |
+
"eval_full_en_cosine_precision@200": 0.22763157894736838,
|
360 |
+
"eval_full_en_cosine_precision@50": 0.3867105263157895,
|
361 |
+
"eval_full_en_cosine_recall@1": 0.010318104890368607,
|
362 |
+
"eval_full_en_cosine_recall@100": 0.385615965839615,
|
363 |
+
"eval_full_en_cosine_recall@150": 0.48656381032984825,
|
364 |
+
"eval_full_en_cosine_recall@20": 0.13139326985918445,
|
365 |
+
"eval_full_en_cosine_recall@200": 0.5617757383007209,
|
366 |
+
"eval_full_en_cosine_recall@50": 0.2506285703289517,
|
367 |
+
"eval_runtime": 1.5585,
|
368 |
+
"eval_samples_per_second": 0.0,
|
369 |
+
"eval_sequential_score": 0.5259375639543232,
|
370 |
+
"eval_steps_per_second": 0.0,
|
371 |
+
"step": 1200
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 1.4508928571428572,
|
375 |
+
"grad_norm": 15.019533157348633,
|
376 |
+
"learning_rate": 3.744125939849624e-05,
|
377 |
+
"loss": 1.373,
|
378 |
+
"step": 1300
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 1.5625,
|
382 |
+
"grad_norm": 10.545878410339355,
|
383 |
+
"learning_rate": 3.626644736842105e-05,
|
384 |
+
"loss": 1.3364,
|
385 |
+
"step": 1400
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 1.5625,
|
389 |
+
"eval_full_en_cosine_accuracy@1": 0.7171052631578947,
|
390 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
391 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
392 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
393 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
394 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
395 |
+
"eval_full_en_cosine_map@1": 0.7171052631578947,
|
396 |
+
"eval_full_en_cosine_map@100": 0.20833018055660496,
|
397 |
+
"eval_full_en_cosine_map@150": 0.22583322401021033,
|
398 |
+
"eval_full_en_cosine_map@20": 0.34006318172507877,
|
399 |
+
"eval_full_en_cosine_map@200": 0.24462161151730188,
|
400 |
+
"eval_full_en_cosine_map@50": 0.23483789231739935,
|
401 |
+
"eval_full_en_cosine_map@500": 0.2946124561805931,
|
402 |
+
"eval_full_en_cosine_mrr@1": 0.7171052631578947,
|
403 |
+
"eval_full_en_cosine_mrr@100": 0.8267713172687238,
|
404 |
+
"eval_full_en_cosine_mrr@150": 0.8267713172687238,
|
405 |
+
"eval_full_en_cosine_mrr@20": 0.8265913362952838,
|
406 |
+
"eval_full_en_cosine_mrr@200": 0.8267713172687238,
|
407 |
+
"eval_full_en_cosine_mrr@50": 0.8267343568902494,
|
408 |
+
"eval_full_en_cosine_ndcg@1": 0.7171052631578947,
|
409 |
+
"eval_full_en_cosine_ndcg@100": 0.4377486787968229,
|
410 |
+
"eval_full_en_cosine_ndcg@150": 0.4850669425848544,
|
411 |
+
"eval_full_en_cosine_ndcg@20": 0.5331724259953773,
|
412 |
+
"eval_full_en_cosine_ndcg@200": 0.5302927064126869,
|
413 |
+
"eval_full_en_cosine_ndcg@50": 0.4451308688476405,
|
414 |
+
"eval_full_en_cosine_precision@1": 0.7171052631578947,
|
415 |
+
"eval_full_en_cosine_precision@100": 0.3074671052631579,
|
416 |
+
"eval_full_en_cosine_precision@150": 0.2625657894736842,
|
417 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
418 |
+
"eval_full_en_cosine_precision@200": 0.23016447368421053,
|
419 |
+
"eval_full_en_cosine_precision@50": 0.38769736842105257,
|
420 |
+
"eval_full_en_cosine_recall@1": 0.010208074045806198,
|
421 |
+
"eval_full_en_cosine_recall@100": 0.3902466549235702,
|
422 |
+
"eval_full_en_cosine_recall@150": 0.49226776551348056,
|
423 |
+
"eval_full_en_cosine_recall@20": 0.13255572846134298,
|
424 |
+
"eval_full_en_cosine_recall@200": 0.5680994353864672,
|
425 |
+
"eval_full_en_cosine_recall@50": 0.25126941591084845,
|
426 |
+
"eval_runtime": 1.5595,
|
427 |
+
"eval_samples_per_second": 0.0,
|
428 |
+
"eval_sequential_score": 0.5302927064126869,
|
429 |
+
"eval_steps_per_second": 0.0,
|
430 |
+
"step": 1400
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"epoch": 1.6741071428571428,
|
434 |
+
"grad_norm": 18.495975494384766,
|
435 |
+
"learning_rate": 3.509163533834587e-05,
|
436 |
+
"loss": 1.2876,
|
437 |
+
"step": 1500
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 1.7857142857142856,
|
441 |
+
"grad_norm": 12.646751403808594,
|
442 |
+
"learning_rate": 3.391682330827068e-05,
|
443 |
+
"loss": 1.3094,
|
444 |
+
"step": 1600
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 1.7857142857142856,
|
448 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
449 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
450 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
451 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
452 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
453 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
454 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
455 |
+
"eval_full_en_cosine_map@100": 0.20923239071614674,
|
456 |
+
"eval_full_en_cosine_map@150": 0.225604138471006,
|
457 |
+
"eval_full_en_cosine_map@20": 0.34034356587585846,
|
458 |
+
"eval_full_en_cosine_map@200": 0.24539737099429304,
|
459 |
+
"eval_full_en_cosine_map@50": 0.23464702413938254,
|
460 |
+
"eval_full_en_cosine_map@500": 0.29597166286299953,
|
461 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
462 |
+
"eval_full_en_cosine_mrr@100": 0.8214137967940215,
|
463 |
+
"eval_full_en_cosine_mrr@150": 0.8214137967940215,
|
464 |
+
"eval_full_en_cosine_mrr@20": 0.8213699371448987,
|
465 |
+
"eval_full_en_cosine_mrr@200": 0.8214137967940215,
|
466 |
+
"eval_full_en_cosine_mrr@50": 0.8213699371448987,
|
467 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
468 |
+
"eval_full_en_cosine_ndcg@100": 0.4396726832556684,
|
469 |
+
"eval_full_en_cosine_ndcg@150": 0.4847816359827512,
|
470 |
+
"eval_full_en_cosine_ndcg@20": 0.532792025753163,
|
471 |
+
"eval_full_en_cosine_ndcg@200": 0.5323403273572274,
|
472 |
+
"eval_full_en_cosine_ndcg@50": 0.4452189433184465,
|
473 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
474 |
+
"eval_full_en_cosine_precision@100": 0.3098026315789474,
|
475 |
+
"eval_full_en_cosine_precision@150": 0.26274122807017547,
|
476 |
+
"eval_full_en_cosine_precision@20": 0.4935855263157895,
|
477 |
+
"eval_full_en_cosine_precision@200": 0.23192434210526314,
|
478 |
+
"eval_full_en_cosine_precision@50": 0.38763157894736844,
|
479 |
+
"eval_full_en_cosine_recall@1": 0.010122149362902188,
|
480 |
+
"eval_full_en_cosine_recall@100": 0.39236988612007834,
|
481 |
+
"eval_full_en_cosine_recall@150": 0.4910778378543689,
|
482 |
+
"eval_full_en_cosine_recall@20": 0.13108496301513997,
|
483 |
+
"eval_full_en_cosine_recall@200": 0.5709689534914331,
|
484 |
+
"eval_full_en_cosine_recall@50": 0.25093448303772187,
|
485 |
+
"eval_runtime": 1.5873,
|
486 |
+
"eval_samples_per_second": 0.0,
|
487 |
+
"eval_sequential_score": 0.5323403273572274,
|
488 |
+
"eval_steps_per_second": 0.0,
|
489 |
+
"step": 1600
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 1.8973214285714286,
|
493 |
+
"grad_norm": 11.858412742614746,
|
494 |
+
"learning_rate": 3.274201127819549e-05,
|
495 |
+
"loss": 1.2784,
|
496 |
+
"step": 1700
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 2.0089285714285716,
|
500 |
+
"grad_norm": 11.152688026428223,
|
501 |
+
"learning_rate": 3.1567199248120306e-05,
|
502 |
+
"loss": 1.2204,
|
503 |
+
"step": 1800
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"epoch": 2.0089285714285716,
|
507 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
508 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
509 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
510 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
511 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
512 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
513 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
514 |
+
"eval_full_en_cosine_map@100": 0.2098412194483687,
|
515 |
+
"eval_full_en_cosine_map@150": 0.22663911455304064,
|
516 |
+
"eval_full_en_cosine_map@20": 0.3433147887298301,
|
517 |
+
"eval_full_en_cosine_map@200": 0.24620266722190678,
|
518 |
+
"eval_full_en_cosine_map@50": 0.23714915519951082,
|
519 |
+
"eval_full_en_cosine_map@500": 0.29690932859887553,
|
520 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
521 |
+
"eval_full_en_cosine_mrr@100": 0.8394024772357531,
|
522 |
+
"eval_full_en_cosine_mrr@150": 0.8394024772357531,
|
523 |
+
"eval_full_en_cosine_mrr@20": 0.8393426686233129,
|
524 |
+
"eval_full_en_cosine_mrr@200": 0.8394024772357531,
|
525 |
+
"eval_full_en_cosine_mrr@50": 0.8393426686233129,
|
526 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
527 |
+
"eval_full_en_cosine_ndcg@100": 0.4396519841053572,
|
528 |
+
"eval_full_en_cosine_ndcg@150": 0.4856325134708184,
|
529 |
+
"eval_full_en_cosine_ndcg@20": 0.5375317893335387,
|
530 |
+
"eval_full_en_cosine_ndcg@200": 0.533015167774829,
|
531 |
+
"eval_full_en_cosine_ndcg@50": 0.44810398395306655,
|
532 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
533 |
+
"eval_full_en_cosine_precision@100": 0.3084539473684211,
|
534 |
+
"eval_full_en_cosine_precision@150": 0.2627631578947368,
|
535 |
+
"eval_full_en_cosine_precision@20": 0.49769736842105267,
|
536 |
+
"eval_full_en_cosine_precision@200": 0.2314309210526316,
|
537 |
+
"eval_full_en_cosine_precision@50": 0.3891447368421053,
|
538 |
+
"eval_full_en_cosine_recall@1": 0.010440810366523372,
|
539 |
+
"eval_full_en_cosine_recall@100": 0.39036009395952986,
|
540 |
+
"eval_full_en_cosine_recall@150": 0.49041982254882954,
|
541 |
+
"eval_full_en_cosine_recall@20": 0.13228070304056636,
|
542 |
+
"eval_full_en_cosine_recall@200": 0.5704962189819233,
|
543 |
+
"eval_full_en_cosine_recall@50": 0.25248213212752935,
|
544 |
+
"eval_runtime": 1.6049,
|
545 |
+
"eval_samples_per_second": 0.0,
|
546 |
+
"eval_sequential_score": 0.533015167774829,
|
547 |
+
"eval_steps_per_second": 0.0,
|
548 |
+
"step": 1800
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 2.1205357142857144,
|
552 |
+
"grad_norm": 12.91015625,
|
553 |
+
"learning_rate": 3.0392387218045114e-05,
|
554 |
+
"loss": 0.9617,
|
555 |
+
"step": 1900
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 2.232142857142857,
|
559 |
+
"grad_norm": 11.646313667297363,
|
560 |
+
"learning_rate": 2.9217575187969924e-05,
|
561 |
+
"loss": 1.0004,
|
562 |
+
"step": 2000
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 2.232142857142857,
|
566 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
567 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
568 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
569 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
570 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
571 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
572 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
573 |
+
"eval_full_en_cosine_map@100": 0.20775225168018954,
|
574 |
+
"eval_full_en_cosine_map@150": 0.22393096419950168,
|
575 |
+
"eval_full_en_cosine_map@20": 0.3380596885262807,
|
576 |
+
"eval_full_en_cosine_map@200": 0.24259765295506924,
|
577 |
+
"eval_full_en_cosine_map@50": 0.23452814948810471,
|
578 |
+
"eval_full_en_cosine_map@500": 0.2920026964508484,
|
579 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
580 |
+
"eval_full_en_cosine_mrr@100": 0.8325452625382137,
|
581 |
+
"eval_full_en_cosine_mrr@150": 0.8325452625382137,
|
582 |
+
"eval_full_en_cosine_mrr@20": 0.8324781304222094,
|
583 |
+
"eval_full_en_cosine_mrr@200": 0.8325452625382137,
|
584 |
+
"eval_full_en_cosine_mrr@50": 0.8325452625382137,
|
585 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
586 |
+
"eval_full_en_cosine_ndcg@100": 0.4376001104057169,
|
587 |
+
"eval_full_en_cosine_ndcg@150": 0.48181431955382,
|
588 |
+
"eval_full_en_cosine_ndcg@20": 0.5323035546433559,
|
589 |
+
"eval_full_en_cosine_ndcg@200": 0.5276663014224582,
|
590 |
+
"eval_full_en_cosine_ndcg@50": 0.44660441452063837,
|
591 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
592 |
+
"eval_full_en_cosine_precision@100": 0.30644736842105263,
|
593 |
+
"eval_full_en_cosine_precision@150": 0.259890350877193,
|
594 |
+
"eval_full_en_cosine_precision@20": 0.4916118421052632,
|
595 |
+
"eval_full_en_cosine_precision@200": 0.2280921052631579,
|
596 |
+
"eval_full_en_cosine_precision@50": 0.3886842105263158,
|
597 |
+
"eval_full_en_cosine_recall@1": 0.010329446437905086,
|
598 |
+
"eval_full_en_cosine_recall@100": 0.38885062846601265,
|
599 |
+
"eval_full_en_cosine_recall@150": 0.4854595951837256,
|
600 |
+
"eval_full_en_cosine_recall@20": 0.131078016933875,
|
601 |
+
"eval_full_en_cosine_recall@200": 0.5630724982932908,
|
602 |
+
"eval_full_en_cosine_recall@50": 0.252357645205228,
|
603 |
+
"eval_runtime": 1.5613,
|
604 |
+
"eval_samples_per_second": 0.0,
|
605 |
+
"eval_sequential_score": 0.5276663014224582,
|
606 |
+
"eval_steps_per_second": 0.0,
|
607 |
+
"step": 2000
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"epoch": 2.34375,
|
611 |
+
"grad_norm": 12.087961196899414,
|
612 |
+
"learning_rate": 2.8042763157894735e-05,
|
613 |
+
"loss": 0.9694,
|
614 |
+
"step": 2100
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 2.455357142857143,
|
618 |
+
"grad_norm": 8.181659698486328,
|
619 |
+
"learning_rate": 2.6867951127819552e-05,
|
620 |
+
"loss": 0.9843,
|
621 |
+
"step": 2200
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"epoch": 2.455357142857143,
|
625 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
626 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
627 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
628 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
629 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
630 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
631 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
632 |
+
"eval_full_en_cosine_map@100": 0.209953160245849,
|
633 |
+
"eval_full_en_cosine_map@150": 0.22760030144833215,
|
634 |
+
"eval_full_en_cosine_map@20": 0.34078157961918865,
|
635 |
+
"eval_full_en_cosine_map@200": 0.24749824184265867,
|
636 |
+
"eval_full_en_cosine_map@50": 0.2365248444512811,
|
637 |
+
"eval_full_en_cosine_map@500": 0.29789431690676116,
|
638 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
639 |
+
"eval_full_en_cosine_mrr@100": 0.8318935359231412,
|
640 |
+
"eval_full_en_cosine_mrr@150": 0.8318935359231412,
|
641 |
+
"eval_full_en_cosine_mrr@20": 0.8316833751044278,
|
642 |
+
"eval_full_en_cosine_mrr@200": 0.8318935359231412,
|
643 |
+
"eval_full_en_cosine_mrr@50": 0.8318935359231412,
|
644 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
645 |
+
"eval_full_en_cosine_ndcg@100": 0.44076958126493176,
|
646 |
+
"eval_full_en_cosine_ndcg@150": 0.48838061313116793,
|
647 |
+
"eval_full_en_cosine_ndcg@20": 0.5350320556020238,
|
648 |
+
"eval_full_en_cosine_ndcg@200": 0.5355574509263721,
|
649 |
+
"eval_full_en_cosine_ndcg@50": 0.44803994906340594,
|
650 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
651 |
+
"eval_full_en_cosine_precision@100": 0.3099671052631579,
|
652 |
+
"eval_full_en_cosine_precision@150": 0.2648464912280702,
|
653 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
654 |
+
"eval_full_en_cosine_precision@200": 0.23342105263157892,
|
655 |
+
"eval_full_en_cosine_precision@50": 0.39052631578947367,
|
656 |
+
"eval_full_en_cosine_recall@1": 0.010284539147879572,
|
657 |
+
"eval_full_en_cosine_recall@100": 0.39296182819932773,
|
658 |
+
"eval_full_en_cosine_recall@150": 0.4959148528891931,
|
659 |
+
"eval_full_en_cosine_recall@20": 0.13200577828629578,
|
660 |
+
"eval_full_en_cosine_recall@200": 0.5749370249014907,
|
661 |
+
"eval_full_en_cosine_recall@50": 0.25310992970173135,
|
662 |
+
"eval_runtime": 1.8632,
|
663 |
+
"eval_samples_per_second": 0.0,
|
664 |
+
"eval_sequential_score": 0.5355574509263721,
|
665 |
+
"eval_steps_per_second": 0.0,
|
666 |
+
"step": 2200
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 2.5669642857142856,
|
670 |
+
"grad_norm": 13.6882905960083,
|
671 |
+
"learning_rate": 2.5693139097744363e-05,
|
672 |
+
"loss": 0.9743,
|
673 |
+
"step": 2300
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"epoch": 2.678571428571429,
|
677 |
+
"grad_norm": 11.966975212097168,
|
678 |
+
"learning_rate": 2.4518327067669177e-05,
|
679 |
+
"loss": 0.9252,
|
680 |
+
"step": 2400
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 2.678571428571429,
|
684 |
+
"eval_full_en_cosine_accuracy@1": 0.7335526315789473,
|
685 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
686 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
687 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
688 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
689 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
690 |
+
"eval_full_en_cosine_map@1": 0.7335526315789473,
|
691 |
+
"eval_full_en_cosine_map@100": 0.20983286336268822,
|
692 |
+
"eval_full_en_cosine_map@150": 0.22675852672419078,
|
693 |
+
"eval_full_en_cosine_map@20": 0.34004090105732804,
|
694 |
+
"eval_full_en_cosine_map@200": 0.24584993568226646,
|
695 |
+
"eval_full_en_cosine_map@50": 0.23672594782424658,
|
696 |
+
"eval_full_en_cosine_map@500": 0.29632183596698103,
|
697 |
+
"eval_full_en_cosine_mrr@1": 0.7335526315789473,
|
698 |
+
"eval_full_en_cosine_mrr@100": 0.83135268727374,
|
699 |
+
"eval_full_en_cosine_mrr@150": 0.83135268727374,
|
700 |
+
"eval_full_en_cosine_mrr@20": 0.8311351294903929,
|
701 |
+
"eval_full_en_cosine_mrr@200": 0.83135268727374,
|
702 |
+
"eval_full_en_cosine_mrr@50": 0.8312917710944029,
|
703 |
+
"eval_full_en_cosine_ndcg@1": 0.7335526315789473,
|
704 |
+
"eval_full_en_cosine_ndcg@100": 0.4400577813719261,
|
705 |
+
"eval_full_en_cosine_ndcg@150": 0.4859220111165228,
|
706 |
+
"eval_full_en_cosine_ndcg@20": 0.5344170691501652,
|
707 |
+
"eval_full_en_cosine_ndcg@200": 0.5320416498978522,
|
708 |
+
"eval_full_en_cosine_ndcg@50": 0.4485020943766835,
|
709 |
+
"eval_full_en_cosine_precision@1": 0.7335526315789473,
|
710 |
+
"eval_full_en_cosine_precision@100": 0.30907894736842106,
|
711 |
+
"eval_full_en_cosine_precision@150": 0.26278508771929826,
|
712 |
+
"eval_full_en_cosine_precision@20": 0.4960526315789474,
|
713 |
+
"eval_full_en_cosine_precision@200": 0.23090460526315787,
|
714 |
+
"eval_full_en_cosine_precision@50": 0.39151315789473684,
|
715 |
+
"eval_full_en_cosine_recall@1": 0.010402156873475942,
|
716 |
+
"eval_full_en_cosine_recall@100": 0.39206565501916524,
|
717 |
+
"eval_full_en_cosine_recall@150": 0.49176955829136443,
|
718 |
+
"eval_full_en_cosine_recall@20": 0.1321996647113643,
|
719 |
+
"eval_full_en_cosine_recall@200": 0.569344104113959,
|
720 |
+
"eval_full_en_cosine_recall@50": 0.2535254041631645,
|
721 |
+
"eval_runtime": 1.5826,
|
722 |
+
"eval_samples_per_second": 0.0,
|
723 |
+
"eval_sequential_score": 0.5320416498978522,
|
724 |
+
"eval_steps_per_second": 0.0,
|
725 |
+
"step": 2400
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 2.790178571428571,
|
729 |
+
"grad_norm": 11.857823371887207,
|
730 |
+
"learning_rate": 2.3343515037593984e-05,
|
731 |
+
"loss": 0.9272,
|
732 |
+
"step": 2500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 2.9017857142857144,
|
736 |
+
"grad_norm": 12.297764778137207,
|
737 |
+
"learning_rate": 2.2168703007518798e-05,
|
738 |
+
"loss": 0.9279,
|
739 |
+
"step": 2600
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"epoch": 2.9017857142857144,
|
743 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
744 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
745 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
746 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
747 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
748 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
749 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
750 |
+
"eval_full_en_cosine_map@100": 0.20939105710550232,
|
751 |
+
"eval_full_en_cosine_map@150": 0.22725165687553775,
|
752 |
+
"eval_full_en_cosine_map@20": 0.3403680329074837,
|
753 |
+
"eval_full_en_cosine_map@200": 0.24658865195474836,
|
754 |
+
"eval_full_en_cosine_map@50": 0.23612691752121232,
|
755 |
+
"eval_full_en_cosine_map@500": 0.29718900909315255,
|
756 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
757 |
+
"eval_full_en_cosine_mrr@100": 0.8391709003546018,
|
758 |
+
"eval_full_en_cosine_mrr@150": 0.8391709003546018,
|
759 |
+
"eval_full_en_cosine_mrr@20": 0.8391064008705977,
|
760 |
+
"eval_full_en_cosine_mrr@200": 0.8391709003546018,
|
761 |
+
"eval_full_en_cosine_mrr@50": 0.8391064008705977,
|
762 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
763 |
+
"eval_full_en_cosine_ndcg@100": 0.4389185422351881,
|
764 |
+
"eval_full_en_cosine_ndcg@150": 0.4868646893605612,
|
765 |
+
"eval_full_en_cosine_ndcg@20": 0.5359014833764041,
|
766 |
+
"eval_full_en_cosine_ndcg@200": 0.5332804255738979,
|
767 |
+
"eval_full_en_cosine_ndcg@50": 0.44749591453362436,
|
768 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
769 |
+
"eval_full_en_cosine_precision@100": 0.30779605263157894,
|
770 |
+
"eval_full_en_cosine_precision@150": 0.26355263157894737,
|
771 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
772 |
+
"eval_full_en_cosine_precision@200": 0.2316282894736842,
|
773 |
+
"eval_full_en_cosine_precision@50": 0.38901315789473684,
|
774 |
+
"eval_full_en_cosine_recall@1": 0.010425572953236805,
|
775 |
+
"eval_full_en_cosine_recall@100": 0.3892001066901767,
|
776 |
+
"eval_full_en_cosine_recall@150": 0.492569756570653,
|
777 |
+
"eval_full_en_cosine_recall@20": 0.13284603422933672,
|
778 |
+
"eval_full_en_cosine_recall@200": 0.5706210722984945,
|
779 |
+
"eval_full_en_cosine_recall@50": 0.2518705529759721,
|
780 |
+
"eval_runtime": 1.577,
|
781 |
+
"eval_samples_per_second": 0.0,
|
782 |
+
"eval_sequential_score": 0.5332804255738979,
|
783 |
+
"eval_steps_per_second": 0.0,
|
784 |
+
"step": 2600
|
785 |
+
},
|
786 |
+
{
|
787 |
+
"epoch": 3.013392857142857,
|
788 |
+
"grad_norm": 12.120986938476562,
|
789 |
+
"learning_rate": 2.099389097744361e-05,
|
790 |
+
"loss": 0.857,
|
791 |
+
"step": 2700
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"epoch": 3.125,
|
795 |
+
"grad_norm": 14.276410102844238,
|
796 |
+
"learning_rate": 1.9819078947368423e-05,
|
797 |
+
"loss": 0.7313,
|
798 |
+
"step": 2800
|
799 |
+
},
|
800 |
+
{
|
801 |
+
"epoch": 3.125,
|
802 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
803 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
804 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
805 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
806 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
807 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
808 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
809 |
+
"eval_full_en_cosine_map@100": 0.20830025965749158,
|
810 |
+
"eval_full_en_cosine_map@150": 0.22525408557521698,
|
811 |
+
"eval_full_en_cosine_map@20": 0.34094306993307805,
|
812 |
+
"eval_full_en_cosine_map@200": 0.24400549054611867,
|
813 |
+
"eval_full_en_cosine_map@50": 0.23400685602624646,
|
814 |
+
"eval_full_en_cosine_map@500": 0.29401532392219154,
|
815 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
816 |
+
"eval_full_en_cosine_mrr@100": 0.8315051952798665,
|
817 |
+
"eval_full_en_cosine_mrr@150": 0.8315051952798665,
|
818 |
+
"eval_full_en_cosine_mrr@20": 0.8314268744778616,
|
819 |
+
"eval_full_en_cosine_mrr@200": 0.8315051952798665,
|
820 |
+
"eval_full_en_cosine_mrr@50": 0.8315051952798665,
|
821 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
822 |
+
"eval_full_en_cosine_ndcg@100": 0.43885977048304636,
|
823 |
+
"eval_full_en_cosine_ndcg@150": 0.48486671483618976,
|
824 |
+
"eval_full_en_cosine_ndcg@20": 0.5365677326031855,
|
825 |
+
"eval_full_en_cosine_ndcg@200": 0.5299990147795507,
|
826 |
+
"eval_full_en_cosine_ndcg@50": 0.44591298214905706,
|
827 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
828 |
+
"eval_full_en_cosine_precision@100": 0.308125,
|
829 |
+
"eval_full_en_cosine_precision@150": 0.2621052631578948,
|
830 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
831 |
+
"eval_full_en_cosine_precision@200": 0.22980263157894737,
|
832 |
+
"eval_full_en_cosine_precision@50": 0.3870394736842105,
|
833 |
+
"eval_full_en_cosine_recall@1": 0.010317820884117123,
|
834 |
+
"eval_full_en_cosine_recall@100": 0.38998825691236244,
|
835 |
+
"eval_full_en_cosine_recall@150": 0.4900687458798103,
|
836 |
+
"eval_full_en_cosine_recall@20": 0.13271573138828288,
|
837 |
+
"eval_full_en_cosine_recall@200": 0.5659226272090475,
|
838 |
+
"eval_full_en_cosine_recall@50": 0.25218483369820577,
|
839 |
+
"eval_runtime": 1.607,
|
840 |
+
"eval_samples_per_second": 0.0,
|
841 |
+
"eval_sequential_score": 0.5299990147795507,
|
842 |
+
"eval_steps_per_second": 0.0,
|
843 |
+
"step": 2800
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"epoch": 3.236607142857143,
|
847 |
+
"grad_norm": 8.85190486907959,
|
848 |
+
"learning_rate": 1.8644266917293237e-05,
|
849 |
+
"loss": 0.7103,
|
850 |
+
"step": 2900
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"epoch": 3.3482142857142856,
|
854 |
+
"grad_norm": 8.932626724243164,
|
855 |
+
"learning_rate": 1.7469454887218044e-05,
|
856 |
+
"loss": 0.7187,
|
857 |
+
"step": 3000
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 3.3482142857142856,
|
861 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
862 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
863 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
864 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
865 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
866 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
867 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
868 |
+
"eval_full_en_cosine_map@100": 0.20842370079433947,
|
869 |
+
"eval_full_en_cosine_map@150": 0.22608431932756923,
|
870 |
+
"eval_full_en_cosine_map@20": 0.34026464907579207,
|
871 |
+
"eval_full_en_cosine_map@200": 0.2451065024940476,
|
872 |
+
"eval_full_en_cosine_map@50": 0.23418777403622906,
|
873 |
+
"eval_full_en_cosine_map@500": 0.2945476002258968,
|
874 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
875 |
+
"eval_full_en_cosine_mrr@100": 0.8303256958684593,
|
876 |
+
"eval_full_en_cosine_mrr@150": 0.8303256958684593,
|
877 |
+
"eval_full_en_cosine_mrr@20": 0.830265887256019,
|
878 |
+
"eval_full_en_cosine_mrr@200": 0.8303256958684593,
|
879 |
+
"eval_full_en_cosine_mrr@50": 0.830265887256019,
|
880 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
881 |
+
"eval_full_en_cosine_ndcg@100": 0.4379203478644915,
|
882 |
+
"eval_full_en_cosine_ndcg@150": 0.4860723616469748,
|
883 |
+
"eval_full_en_cosine_ndcg@20": 0.534483012777908,
|
884 |
+
"eval_full_en_cosine_ndcg@200": 0.5318565059446251,
|
885 |
+
"eval_full_en_cosine_ndcg@50": 0.4443024102705765,
|
886 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
887 |
+
"eval_full_en_cosine_precision@100": 0.30750000000000005,
|
888 |
+
"eval_full_en_cosine_precision@150": 0.26370614035087714,
|
889 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
890 |
+
"eval_full_en_cosine_precision@200": 0.23116776315789475,
|
891 |
+
"eval_full_en_cosine_precision@50": 0.38539473684210523,
|
892 |
+
"eval_full_en_cosine_recall@1": 0.010298189290703101,
|
893 |
+
"eval_full_en_cosine_recall@100": 0.38891472258186655,
|
894 |
+
"eval_full_en_cosine_recall@150": 0.4925623824521817,
|
895 |
+
"eval_full_en_cosine_recall@20": 0.13215936080151625,
|
896 |
+
"eval_full_en_cosine_recall@200": 0.5698259119139981,
|
897 |
+
"eval_full_en_cosine_recall@50": 0.2502092759755724,
|
898 |
+
"eval_runtime": 1.6179,
|
899 |
+
"eval_samples_per_second": 0.0,
|
900 |
+
"eval_sequential_score": 0.5318565059446251,
|
901 |
+
"eval_steps_per_second": 0.0,
|
902 |
+
"step": 3000
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 3.4598214285714284,
|
906 |
+
"grad_norm": 12.761665344238281,
|
907 |
+
"learning_rate": 1.6294642857142858e-05,
|
908 |
+
"loss": 0.7067,
|
909 |
+
"step": 3100
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 3.571428571428571,
|
913 |
+
"grad_norm": 12.318887710571289,
|
914 |
+
"learning_rate": 1.5119830827067668e-05,
|
915 |
+
"loss": 0.7157,
|
916 |
+
"step": 3200
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"epoch": 3.571428571428571,
|
920 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
921 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
922 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
923 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
924 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
925 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
926 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
927 |
+
"eval_full_en_cosine_map@100": 0.21126096647489126,
|
928 |
+
"eval_full_en_cosine_map@150": 0.22897332387217115,
|
929 |
+
"eval_full_en_cosine_map@20": 0.34020926250086975,
|
930 |
+
"eval_full_en_cosine_map@200": 0.24883265008518762,
|
931 |
+
"eval_full_en_cosine_map@50": 0.2366562995235259,
|
932 |
+
"eval_full_en_cosine_map@500": 0.30009134506130936,
|
933 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
934 |
+
"eval_full_en_cosine_mrr@100": 0.8208446325794724,
|
935 |
+
"eval_full_en_cosine_mrr@150": 0.8208446325794724,
|
936 |
+
"eval_full_en_cosine_mrr@20": 0.8206285125693021,
|
937 |
+
"eval_full_en_cosine_mrr@200": 0.8208446325794724,
|
938 |
+
"eval_full_en_cosine_mrr@50": 0.8208446325794724,
|
939 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
940 |
+
"eval_full_en_cosine_ndcg@100": 0.4420871692985379,
|
941 |
+
"eval_full_en_cosine_ndcg@150": 0.48983718804719595,
|
942 |
+
"eval_full_en_cosine_ndcg@20": 0.5349182539944062,
|
943 |
+
"eval_full_en_cosine_ndcg@200": 0.5368995914478877,
|
944 |
+
"eval_full_en_cosine_ndcg@50": 0.4481578438397021,
|
945 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
946 |
+
"eval_full_en_cosine_precision@100": 0.3118421052631579,
|
947 |
+
"eval_full_en_cosine_precision@150": 0.26625,
|
948 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
949 |
+
"eval_full_en_cosine_precision@200": 0.2341282894736842,
|
950 |
+
"eval_full_en_cosine_precision@50": 0.39125,
|
951 |
+
"eval_full_en_cosine_recall@1": 0.010071368365416018,
|
952 |
+
"eval_full_en_cosine_recall@100": 0.39435465355460575,
|
953 |
+
"eval_full_en_cosine_recall@150": 0.49776297598034985,
|
954 |
+
"eval_full_en_cosine_recall@20": 0.1332224887798492,
|
955 |
+
"eval_full_en_cosine_recall@200": 0.5769437157052201,
|
956 |
+
"eval_full_en_cosine_recall@50": 0.25406609475829245,
|
957 |
+
"eval_runtime": 1.5833,
|
958 |
+
"eval_samples_per_second": 0.0,
|
959 |
+
"eval_sequential_score": 0.5368995914478877,
|
960 |
+
"eval_steps_per_second": 0.0,
|
961 |
+
"step": 3200
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 3.6830357142857144,
|
965 |
+
"grad_norm": 10.974320411682129,
|
966 |
+
"learning_rate": 1.3945018796992482e-05,
|
967 |
+
"loss": 0.7113,
|
968 |
+
"step": 3300
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 3.794642857142857,
|
972 |
+
"grad_norm": 11.004631042480469,
|
973 |
+
"learning_rate": 1.2770206766917295e-05,
|
974 |
+
"loss": 0.7013,
|
975 |
+
"step": 3400
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"epoch": 3.794642857142857,
|
979 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
980 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
981 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
982 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
983 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
984 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
985 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
986 |
+
"eval_full_en_cosine_map@100": 0.20998333195374114,
|
987 |
+
"eval_full_en_cosine_map@150": 0.22683318021248486,
|
988 |
+
"eval_full_en_cosine_map@20": 0.34034679376659244,
|
989 |
+
"eval_full_en_cosine_map@200": 0.24654495691213385,
|
990 |
+
"eval_full_en_cosine_map@50": 0.23617479010012724,
|
991 |
+
"eval_full_en_cosine_map@500": 0.29617185416029185,
|
992 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
993 |
+
"eval_full_en_cosine_mrr@100": 0.8291805255603549,
|
994 |
+
"eval_full_en_cosine_mrr@150": 0.8291805255603549,
|
995 |
+
"eval_full_en_cosine_mrr@20": 0.8291105367585632,
|
996 |
+
"eval_full_en_cosine_mrr@200": 0.8291805255603549,
|
997 |
+
"eval_full_en_cosine_mrr@50": 0.8291805255603549,
|
998 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
999 |
+
"eval_full_en_cosine_ndcg@100": 0.4407299508694298,
|
1000 |
+
"eval_full_en_cosine_ndcg@150": 0.48655314671133576,
|
1001 |
+
"eval_full_en_cosine_ndcg@20": 0.5349966588302529,
|
1002 |
+
"eval_full_en_cosine_ndcg@200": 0.5341334488223752,
|
1003 |
+
"eval_full_en_cosine_ndcg@50": 0.448065635044085,
|
1004 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
1005 |
+
"eval_full_en_cosine_precision@100": 0.30973684210526314,
|
1006 |
+
"eval_full_en_cosine_precision@150": 0.26320175438596494,
|
1007 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
1008 |
+
"eval_full_en_cosine_precision@200": 0.23210526315789473,
|
1009 |
+
"eval_full_en_cosine_precision@50": 0.3907894736842106,
|
1010 |
+
"eval_full_en_cosine_recall@1": 0.010311461817674684,
|
1011 |
+
"eval_full_en_cosine_recall@100": 0.3931693265429022,
|
1012 |
+
"eval_full_en_cosine_recall@150": 0.49300140763214356,
|
1013 |
+
"eval_full_en_cosine_recall@20": 0.1329270784727238,
|
1014 |
+
"eval_full_en_cosine_recall@200": 0.573228327517634,
|
1015 |
+
"eval_full_en_cosine_recall@50": 0.25330386821616296,
|
1016 |
+
"eval_runtime": 1.577,
|
1017 |
+
"eval_samples_per_second": 0.0,
|
1018 |
+
"eval_sequential_score": 0.5341334488223752,
|
1019 |
+
"eval_steps_per_second": 0.0,
|
1020 |
+
"step": 3400
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 3.90625,
|
1024 |
+
"grad_norm": 12.102640151977539,
|
1025 |
+
"learning_rate": 1.1595394736842107e-05,
|
1026 |
+
"loss": 0.6903,
|
1027 |
+
"step": 3500
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 4.017857142857143,
|
1031 |
+
"grad_norm": 7.348757743835449,
|
1032 |
+
"learning_rate": 1.0420582706766918e-05,
|
1033 |
+
"loss": 0.6462,
|
1034 |
+
"step": 3600
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 4.017857142857143,
|
1038 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
1039 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1040 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1041 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
1042 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1043 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1044 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
1045 |
+
"eval_full_en_cosine_map@100": 0.2102732775077637,
|
1046 |
+
"eval_full_en_cosine_map@150": 0.22767943965852241,
|
1047 |
+
"eval_full_en_cosine_map@20": 0.338502447126724,
|
1048 |
+
"eval_full_en_cosine_map@200": 0.24667619158922902,
|
1049 |
+
"eval_full_en_cosine_map@50": 0.23576300870587916,
|
1050 |
+
"eval_full_en_cosine_map@500": 0.2971463650911015,
|
1051 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
1052 |
+
"eval_full_en_cosine_mrr@100": 0.8263833835420962,
|
1053 |
+
"eval_full_en_cosine_mrr@150": 0.8263833835420962,
|
1054 |
+
"eval_full_en_cosine_mrr@20": 0.8263213180008847,
|
1055 |
+
"eval_full_en_cosine_mrr@200": 0.8263833835420962,
|
1056 |
+
"eval_full_en_cosine_mrr@50": 0.8263213180008847,
|
1057 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
1058 |
+
"eval_full_en_cosine_ndcg@100": 0.44114478517461736,
|
1059 |
+
"eval_full_en_cosine_ndcg@150": 0.4883455168714466,
|
1060 |
+
"eval_full_en_cosine_ndcg@20": 0.53288860900767,
|
1061 |
+
"eval_full_en_cosine_ndcg@200": 0.5334866046140189,
|
1062 |
+
"eval_full_en_cosine_ndcg@50": 0.4473951526251337,
|
1063 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
1064 |
+
"eval_full_en_cosine_precision@100": 0.31078947368421056,
|
1065 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1066 |
+
"eval_full_en_cosine_precision@20": 0.4934210526315789,
|
1067 |
+
"eval_full_en_cosine_precision@200": 0.23212171052631578,
|
1068 |
+
"eval_full_en_cosine_precision@50": 0.3899342105263158,
|
1069 |
+
"eval_full_en_cosine_recall@1": 0.01018155854728512,
|
1070 |
+
"eval_full_en_cosine_recall@100": 0.3935816727444405,
|
1071 |
+
"eval_full_en_cosine_recall@150": 0.4958028561341766,
|
1072 |
+
"eval_full_en_cosine_recall@20": 0.13181077303144853,
|
1073 |
+
"eval_full_en_cosine_recall@200": 0.5716317929962068,
|
1074 |
+
"eval_full_en_cosine_recall@50": 0.25274553753777246,
|
1075 |
+
"eval_runtime": 1.6024,
|
1076 |
+
"eval_samples_per_second": 0.0,
|
1077 |
+
"eval_sequential_score": 0.5334866046140189,
|
1078 |
+
"eval_steps_per_second": 0.0,
|
1079 |
+
"step": 3600
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"epoch": 4.129464285714286,
|
1083 |
+
"grad_norm": 8.786450386047363,
|
1084 |
+
"learning_rate": 9.24577067669173e-06,
|
1085 |
+
"loss": 0.5162,
|
1086 |
+
"step": 3700
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"epoch": 4.241071428571429,
|
1090 |
+
"grad_norm": 10.602435111999512,
|
1091 |
+
"learning_rate": 8.070958646616542e-06,
|
1092 |
+
"loss": 0.524,
|
1093 |
+
"step": 3800
|
1094 |
+
},
|
1095 |
+
{
|
1096 |
+
"epoch": 4.241071428571429,
|
1097 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1098 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1099 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1100 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1101 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1102 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1103 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1104 |
+
"eval_full_en_cosine_map@100": 0.21150798737582682,
|
1105 |
+
"eval_full_en_cosine_map@150": 0.22868847990327232,
|
1106 |
+
"eval_full_en_cosine_map@20": 0.3411525812655742,
|
1107 |
+
"eval_full_en_cosine_map@200": 0.2480155691306444,
|
1108 |
+
"eval_full_en_cosine_map@50": 0.23814436251631807,
|
1109 |
+
"eval_full_en_cosine_map@500": 0.29792672341621373,
|
1110 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1111 |
+
"eval_full_en_cosine_mrr@100": 0.8323485085820613,
|
1112 |
+
"eval_full_en_cosine_mrr@150": 0.8323485085820613,
|
1113 |
+
"eval_full_en_cosine_mrr@20": 0.8321467731829576,
|
1114 |
+
"eval_full_en_cosine_mrr@200": 0.8323485085820613,
|
1115 |
+
"eval_full_en_cosine_mrr@50": 0.832296294714058,
|
1116 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1117 |
+
"eval_full_en_cosine_ndcg@100": 0.44247378999755477,
|
1118 |
+
"eval_full_en_cosine_ndcg@150": 0.48886293038433404,
|
1119 |
+
"eval_full_en_cosine_ndcg@20": 0.5351701323930714,
|
1120 |
+
"eval_full_en_cosine_ndcg@200": 0.5352268343210608,
|
1121 |
+
"eval_full_en_cosine_ndcg@50": 0.4502625298651447,
|
1122 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1123 |
+
"eval_full_en_cosine_precision@100": 0.311546052631579,
|
1124 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1125 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
1126 |
+
"eval_full_en_cosine_precision@200": 0.23268092105263163,
|
1127 |
+
"eval_full_en_cosine_precision@50": 0.3930921052631579,
|
1128 |
+
"eval_full_en_cosine_recall@1": 0.010244630514181254,
|
1129 |
+
"eval_full_en_cosine_recall@100": 0.39498767852245736,
|
1130 |
+
"eval_full_en_cosine_recall@150": 0.49574169519464223,
|
1131 |
+
"eval_full_en_cosine_recall@20": 0.1324589336710221,
|
1132 |
+
"eval_full_en_cosine_recall@200": 0.574019804020236,
|
1133 |
+
"eval_full_en_cosine_recall@50": 0.2548099607629461,
|
1134 |
+
"eval_runtime": 1.5919,
|
1135 |
+
"eval_samples_per_second": 0.0,
|
1136 |
+
"eval_sequential_score": 0.5352268343210608,
|
1137 |
+
"eval_steps_per_second": 0.0,
|
1138 |
+
"step": 3800
|
1139 |
+
},
|
1140 |
+
{
|
1141 |
+
"epoch": 4.352678571428571,
|
1142 |
+
"grad_norm": 11.65066909790039,
|
1143 |
+
"learning_rate": 6.896146616541354e-06,
|
1144 |
+
"loss": 0.5303,
|
1145 |
+
"step": 3900
|
1146 |
+
},
|
1147 |
+
{
|
1148 |
+
"epoch": 4.464285714285714,
|
1149 |
+
"grad_norm": 10.764215469360352,
|
1150 |
+
"learning_rate": 5.721334586466166e-06,
|
1151 |
+
"loss": 0.5269,
|
1152 |
+
"step": 4000
|
1153 |
+
},
|
1154 |
+
{
|
1155 |
+
"epoch": 4.464285714285714,
|
1156 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
1157 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1158 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1159 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1160 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1161 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1162 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
1163 |
+
"eval_full_en_cosine_map@100": 0.2101198919267321,
|
1164 |
+
"eval_full_en_cosine_map@150": 0.2276536266469315,
|
1165 |
+
"eval_full_en_cosine_map@20": 0.34076177455520346,
|
1166 |
+
"eval_full_en_cosine_map@200": 0.24678319516569472,
|
1167 |
+
"eval_full_en_cosine_map@50": 0.23677969810249233,
|
1168 |
+
"eval_full_en_cosine_map@500": 0.297249372287514,
|
1169 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
1170 |
+
"eval_full_en_cosine_mrr@100": 0.8373899157616261,
|
1171 |
+
"eval_full_en_cosine_mrr@150": 0.8373899157616261,
|
1172 |
+
"eval_full_en_cosine_mrr@20": 0.837172357978279,
|
1173 |
+
"eval_full_en_cosine_mrr@200": 0.8373899157616261,
|
1174 |
+
"eval_full_en_cosine_mrr@50": 0.837328999582289,
|
1175 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
1176 |
+
"eval_full_en_cosine_ndcg@100": 0.4408521323246635,
|
1177 |
+
"eval_full_en_cosine_ndcg@150": 0.48834055710549873,
|
1178 |
+
"eval_full_en_cosine_ndcg@20": 0.5353264293739176,
|
1179 |
+
"eval_full_en_cosine_ndcg@200": 0.5341206282180626,
|
1180 |
+
"eval_full_en_cosine_ndcg@50": 0.44939083758113085,
|
1181 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
1182 |
+
"eval_full_en_cosine_precision@100": 0.30953947368421053,
|
1183 |
+
"eval_full_en_cosine_precision@150": 0.26460526315789473,
|
1184 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
1185 |
+
"eval_full_en_cosine_precision@200": 0.23187500000000003,
|
1186 |
+
"eval_full_en_cosine_precision@50": 0.3913815789473684,
|
1187 |
+
"eval_full_en_cosine_recall@1": 0.010305566449078924,
|
1188 |
+
"eval_full_en_cosine_recall@100": 0.3922740640225546,
|
1189 |
+
"eval_full_en_cosine_recall@150": 0.4949163913773604,
|
1190 |
+
"eval_full_en_cosine_recall@20": 0.13233275450376297,
|
1191 |
+
"eval_full_en_cosine_recall@200": 0.572041877895568,
|
1192 |
+
"eval_full_en_cosine_recall@50": 0.2535655251683108,
|
1193 |
+
"eval_runtime": 1.5798,
|
1194 |
+
"eval_samples_per_second": 0.0,
|
1195 |
+
"eval_sequential_score": 0.5341206282180626,
|
1196 |
+
"eval_steps_per_second": 0.0,
|
1197 |
+
"step": 4000
|
1198 |
+
}
|
1199 |
+
],
|
1200 |
+
"logging_steps": 100,
|
1201 |
+
"max_steps": 4480,
|
1202 |
+
"num_input_tokens_seen": 0,
|
1203 |
+
"num_train_epochs": 5,
|
1204 |
+
"save_steps": 200,
|
1205 |
+
"stateful_callbacks": {
|
1206 |
+
"TrainerControl": {
|
1207 |
+
"args": {
|
1208 |
+
"should_epoch_stop": false,
|
1209 |
+
"should_evaluate": false,
|
1210 |
+
"should_log": false,
|
1211 |
+
"should_save": true,
|
1212 |
+
"should_training_stop": false
|
1213 |
+
},
|
1214 |
+
"attributes": {}
|
1215 |
+
}
|
1216 |
+
},
|
1217 |
+
"total_flos": 0.0,
|
1218 |
+
"train_batch_size": 64,
|
1219 |
+
"trial_name": null,
|
1220 |
+
"trial_params": null
|
1221 |
+
}
|
checkpoint-4000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab218b8b16eec7c543d0bc5420b88d78608af29dcbf60891a693f97306d3aba4
|
3 |
+
size 5560
|
checkpoint-4000/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-4200/README.md
ADDED
@@ -0,0 +1,674 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:114699
|
8 |
+
- loss:CachedGISTEmbedLoss
|
9 |
+
base_model: BAAI/bge-large-en-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: For roles such as 'physiotherapist', 'neuromusculoskeletal physiotherapist',
|
12 |
+
'osteopath', and 'chiropractor', the skills needed include a deep understanding
|
13 |
+
of human anatomy and physiology, strong diagnostic skills, and the ability to
|
14 |
+
apply manual therapy techniques to treat musculoskeletal issues. Additionally,
|
15 |
+
effective communication skills are crucial for explaining treatments and exercises
|
16 |
+
to patients, while adaptability and problem-solving skills are essential for tailoring
|
17 |
+
treatments to individual patient needs.
|
18 |
+
sentences:
|
19 |
+
- Job roles such as insulation installers, HVAC technicians, and construction engineers
|
20 |
+
require knowledge of various types and characteristics of insulation materials
|
21 |
+
to effectively reduce heat transfer and improve energy efficiency in buildings
|
22 |
+
and systems. Understanding the typology of insulation materials, including their
|
23 |
+
thermal properties, durability, and environmental impact, is crucial for these
|
24 |
+
professionals to select the most appropriate materials for specific applications.
|
25 |
+
- Job roles such as Contract Managers, Legal Analysts, and Compliance Officers require
|
26 |
+
the skill of reviewing or auditing completed contracts to ensure legal accuracy,
|
27 |
+
compliance with regulations, and alignment with organizational goals.
|
28 |
+
- Job roles that require skills in dealing with emergency care situations include
|
29 |
+
emergency medical technicians (EMTs), paramedics, and emergency room nurses or
|
30 |
+
doctors, all of whom must quickly and effectively manage critical health situations
|
31 |
+
to save lives.
|
32 |
+
- source_sentence: Bus drivers, including those operating in various sectors like
|
33 |
+
public transit, intercity, private, or school services, need strong driving skills,
|
34 |
+
knowledge of traffic laws, and the ability to operate safely in diverse conditions.
|
35 |
+
Additionally, effective communication skills and the ability to handle passenger
|
36 |
+
inquiries and emergencies are crucial.
|
37 |
+
sentences:
|
38 |
+
- Job roles that require the skill to calibrate electronic instruments include calibration
|
39 |
+
technicians, quality control engineers, and instrumentation specialists. These
|
40 |
+
professionals ensure the accuracy and reliability of various electronic devices
|
41 |
+
and systems across different industries such as manufacturing, aerospace, and
|
42 |
+
automotive.
|
43 |
+
- Job roles such as Building Engineer, Architect, and Construction Specialist require
|
44 |
+
skills in designing, engineering, or developing air-tight building structures
|
45 |
+
to ensure energy efficiency and environmental control within the building.
|
46 |
+
- Job roles such as customer service representatives, flight attendants, and hotel
|
47 |
+
concierges require a strong focus on passengers or customers, ensuring their needs
|
48 |
+
and comfort are prioritized to provide excellent service and support.
|
49 |
+
- source_sentence: A mine surveyor, also known as a mining surveyor or mine planning
|
50 |
+
surveyor, requires expertise in geomatics and mining engineering to accurately
|
51 |
+
map and plan mine operations, ensuring safety and efficiency. They must also possess
|
52 |
+
strong analytical skills and the ability to use specialized software for creating
|
53 |
+
detailed mine plans and maintaining accurate records.
|
54 |
+
sentences:
|
55 |
+
- Job roles such as data analysts, business analysts, and financial analysts require
|
56 |
+
the skill to present reports or prepare statistical reports, as they often need
|
57 |
+
to communicate complex data insights clearly and effectively to stakeholders.
|
58 |
+
- Job roles that require monitoring flour unloading equipment include Quality Control
|
59 |
+
Technicians, Process Operators, and Mill Supervisors, who ensure the efficient
|
60 |
+
and safe operation of flour processing systems and the proper unloading of flour
|
61 |
+
from transport vehicles.
|
62 |
+
- Job roles that require skills in the manufacturing of made-up textile articles
|
63 |
+
include textile production managers, machinery operators, and quality control
|
64 |
+
inspectors, all of whom utilize specific technology and machinery to produce finished
|
65 |
+
textile products such as clothing, home textiles, and industrial fabrics.
|
66 |
+
- source_sentence: An insulation supervisor, regardless of the specific type of insulation
|
67 |
+
material or installation area, requires strong project management skills, knowledge
|
68 |
+
of building codes and safety regulations, and expertise in insulation techniques
|
69 |
+
to oversee the installation process effectively and ensure quality standards are
|
70 |
+
met.
|
71 |
+
sentences:
|
72 |
+
- Job roles that require skills in energy efficiency, such as promoting energy efficiency
|
73 |
+
or efficient energy use, include Energy Managers, Sustainability Specialists,
|
74 |
+
and Building Engineers, who focus on reducing energy consumption and improving
|
75 |
+
energy use in various settings. Additionally, roles like Battery Technicians or
|
76 |
+
Engineers involve battery benchmarking to enhance energy storage and efficiency
|
77 |
+
in technological devices and systems.
|
78 |
+
- The skill of applying or installing waterproofing and damp-proofing membranes
|
79 |
+
is primarily required by construction workers such as waterproofing specialists,
|
80 |
+
roofers, and building envelope technicians, who use these membranes to prevent
|
81 |
+
water damage in buildings and structures.
|
82 |
+
- Job roles such as laboratory technicians, chemists, and materials scientists require
|
83 |
+
skills in laboratory techniques, including electronic and thermic methods, gas
|
84 |
+
chromatography, and gravimetric analysis, to conduct precise experiments and analyze
|
85 |
+
materials. These professionals must apply natural science techniques and use various
|
86 |
+
lab techniques to ensure accurate and reliable results in their research or quality
|
87 |
+
control processes.
|
88 |
+
- source_sentence: For roles such as import/export manager, graduate export manager,
|
89 |
+
senior export manager, and other related positions in meat and meat products,
|
90 |
+
the key skills include a strong understanding of international trade regulations,
|
91 |
+
meat product knowledge, customs compliance, and excellent negotiation and communication
|
92 |
+
skills to manage global supply chains effectively. Additionally, proficiency in
|
93 |
+
relevant trade software and languages can be highly beneficial.
|
94 |
+
sentences:
|
95 |
+
- Job roles that require skills such as managing staff, coordinating employees,
|
96 |
+
and performing HR activities include Human Resources Managers, Team Leaders, Supervisors,
|
97 |
+
and Department Heads, all of whom are responsible for overseeing personnel, implementing
|
98 |
+
HR policies, and ensuring efficient team operations.
|
99 |
+
- Job roles such as Control Systems Engineer, Automation Engineer, and Systems Designer
|
100 |
+
require skills in designing, planning, and developing control systems to manage
|
101 |
+
and optimize the performance of various technological processes and machinery.
|
102 |
+
These professionals are tasked with creating efficient and reliable systems that
|
103 |
+
can operate autonomously or with minimal human intervention.
|
104 |
+
- Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager
|
105 |
+
require skills in conducting performance measurement and organizing or managing
|
106 |
+
conversion testing to ensure software and systems meet performance standards and
|
107 |
+
function correctly in real-world scenarios.
|
108 |
+
pipeline_tag: sentence-similarity
|
109 |
+
library_name: sentence-transformers
|
110 |
+
metrics:
|
111 |
+
- cosine_accuracy@1
|
112 |
+
- cosine_accuracy@20
|
113 |
+
- cosine_accuracy@50
|
114 |
+
- cosine_accuracy@100
|
115 |
+
- cosine_accuracy@150
|
116 |
+
- cosine_accuracy@200
|
117 |
+
- cosine_precision@1
|
118 |
+
- cosine_precision@20
|
119 |
+
- cosine_precision@50
|
120 |
+
- cosine_precision@100
|
121 |
+
- cosine_precision@150
|
122 |
+
- cosine_precision@200
|
123 |
+
- cosine_recall@1
|
124 |
+
- cosine_recall@20
|
125 |
+
- cosine_recall@50
|
126 |
+
- cosine_recall@100
|
127 |
+
- cosine_recall@150
|
128 |
+
- cosine_recall@200
|
129 |
+
- cosine_ndcg@1
|
130 |
+
- cosine_ndcg@20
|
131 |
+
- cosine_ndcg@50
|
132 |
+
- cosine_ndcg@100
|
133 |
+
- cosine_ndcg@150
|
134 |
+
- cosine_ndcg@200
|
135 |
+
- cosine_mrr@1
|
136 |
+
- cosine_mrr@20
|
137 |
+
- cosine_mrr@50
|
138 |
+
- cosine_mrr@100
|
139 |
+
- cosine_mrr@150
|
140 |
+
- cosine_mrr@200
|
141 |
+
- cosine_map@1
|
142 |
+
- cosine_map@20
|
143 |
+
- cosine_map@50
|
144 |
+
- cosine_map@100
|
145 |
+
- cosine_map@150
|
146 |
+
- cosine_map@200
|
147 |
+
- cosine_map@500
|
148 |
+
model-index:
|
149 |
+
- name: SentenceTransformer based on BAAI/bge-large-en-v1.5
|
150 |
+
results:
|
151 |
+
- task:
|
152 |
+
type: information-retrieval
|
153 |
+
name: Information Retrieval
|
154 |
+
dataset:
|
155 |
+
name: full en
|
156 |
+
type: full_en
|
157 |
+
metrics:
|
158 |
+
- type: cosine_accuracy@1
|
159 |
+
value: 0.7302631578947368
|
160 |
+
name: Cosine Accuracy@1
|
161 |
+
- type: cosine_accuracy@20
|
162 |
+
value: 0.993421052631579
|
163 |
+
name: Cosine Accuracy@20
|
164 |
+
- type: cosine_accuracy@50
|
165 |
+
value: 0.9967105263157895
|
166 |
+
name: Cosine Accuracy@50
|
167 |
+
- type: cosine_accuracy@100
|
168 |
+
value: 1.0
|
169 |
+
name: Cosine Accuracy@100
|
170 |
+
- type: cosine_accuracy@150
|
171 |
+
value: 1.0
|
172 |
+
name: Cosine Accuracy@150
|
173 |
+
- type: cosine_accuracy@200
|
174 |
+
value: 1.0
|
175 |
+
name: Cosine Accuracy@200
|
176 |
+
- type: cosine_precision@1
|
177 |
+
value: 0.7302631578947368
|
178 |
+
name: Cosine Precision@1
|
179 |
+
- type: cosine_precision@20
|
180 |
+
value: 0.49786184210526313
|
181 |
+
name: Cosine Precision@20
|
182 |
+
- type: cosine_precision@50
|
183 |
+
value: 0.3921710526315789
|
184 |
+
name: Cosine Precision@50
|
185 |
+
- type: cosine_precision@100
|
186 |
+
value: 0.3101973684210526
|
187 |
+
name: Cosine Precision@100
|
188 |
+
- type: cosine_precision@150
|
189 |
+
value: 0.2649780701754386
|
190 |
+
name: Cosine Precision@150
|
191 |
+
- type: cosine_precision@200
|
192 |
+
value: 0.23199013157894738
|
193 |
+
name: Cosine Precision@200
|
194 |
+
- type: cosine_recall@1
|
195 |
+
value: 0.01024195976751409
|
196 |
+
name: Cosine Recall@1
|
197 |
+
- type: cosine_recall@20
|
198 |
+
value: 0.13293505289394864
|
199 |
+
name: Cosine Recall@20
|
200 |
+
- type: cosine_recall@50
|
201 |
+
value: 0.2542542782427721
|
202 |
+
name: Cosine Recall@50
|
203 |
+
- type: cosine_recall@100
|
204 |
+
value: 0.3937143686320033
|
205 |
+
name: Cosine Recall@100
|
206 |
+
- type: cosine_recall@150
|
207 |
+
value: 0.4954717634968576
|
208 |
+
name: Cosine Recall@150
|
209 |
+
- type: cosine_recall@200
|
210 |
+
value: 0.5715937768635994
|
211 |
+
name: Cosine Recall@200
|
212 |
+
- type: cosine_ndcg@1
|
213 |
+
value: 0.7302631578947368
|
214 |
+
name: Cosine Ndcg@1
|
215 |
+
- type: cosine_ndcg@20
|
216 |
+
value: 0.5372965681233445
|
217 |
+
name: Cosine Ndcg@20
|
218 |
+
- type: cosine_ndcg@50
|
219 |
+
value: 0.4501724823363586
|
220 |
+
name: Cosine Ndcg@50
|
221 |
+
- type: cosine_ndcg@100
|
222 |
+
value: 0.44188546614809043
|
223 |
+
name: Cosine Ndcg@100
|
224 |
+
- type: cosine_ndcg@150
|
225 |
+
value: 0.48899866366733713
|
226 |
+
name: Cosine Ndcg@150
|
227 |
+
- type: cosine_ndcg@200
|
228 |
+
value: 0.5342393130950145
|
229 |
+
name: Cosine Ndcg@200
|
230 |
+
- type: cosine_mrr@1
|
231 |
+
value: 0.7302631578947368
|
232 |
+
name: Cosine Mrr@1
|
233 |
+
- type: cosine_mrr@20
|
234 |
+
value: 0.8354127506265665
|
235 |
+
name: Cosine Mrr@20
|
236 |
+
- type: cosine_mrr@50
|
237 |
+
value: 0.8355693922305765
|
238 |
+
name: Cosine Mrr@50
|
239 |
+
- type: cosine_mrr@100
|
240 |
+
value: 0.8356281328320803
|
241 |
+
name: Cosine Mrr@100
|
242 |
+
- type: cosine_mrr@150
|
243 |
+
value: 0.8356281328320803
|
244 |
+
name: Cosine Mrr@150
|
245 |
+
- type: cosine_mrr@200
|
246 |
+
value: 0.8356281328320803
|
247 |
+
name: Cosine Mrr@200
|
248 |
+
- type: cosine_map@1
|
249 |
+
value: 0.7302631578947368
|
250 |
+
name: Cosine Map@1
|
251 |
+
- type: cosine_map@20
|
252 |
+
value: 0.342326318294358
|
253 |
+
name: Cosine Map@20
|
254 |
+
- type: cosine_map@50
|
255 |
+
value: 0.23774626029530496
|
256 |
+
name: Cosine Map@50
|
257 |
+
- type: cosine_map@100
|
258 |
+
value: 0.21090472549603356
|
259 |
+
name: Cosine Map@100
|
260 |
+
- type: cosine_map@150
|
261 |
+
value: 0.22845220292726734
|
262 |
+
name: Cosine Map@150
|
263 |
+
- type: cosine_map@200
|
264 |
+
value: 0.24733168088568283
|
265 |
+
name: Cosine Map@200
|
266 |
+
- type: cosine_map@500
|
267 |
+
value: 0.2977609786459198
|
268 |
+
name: Cosine Map@500
|
269 |
+
---
|
270 |
+
|
271 |
+
# SentenceTransformer based on BAAI/bge-large-en-v1.5
|
272 |
+
|
273 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5). It maps sentences & paragraphs to a 1024-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
274 |
+
|
275 |
+
## Model Details
|
276 |
+
|
277 |
+
### Model Description
|
278 |
+
- **Model Type:** Sentence Transformer
|
279 |
+
- **Base model:** [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) <!-- at revision d4aa6901d3a41ba39fb536a557fa166f842b0e09 -->
|
280 |
+
- **Maximum Sequence Length:** 256 tokens
|
281 |
+
- **Output Dimensionality:** 1024 dimensions
|
282 |
+
- **Similarity Function:** Cosine Similarity
|
283 |
+
<!-- - **Training Dataset:** Unknown -->
|
284 |
+
<!-- - **Language:** Unknown -->
|
285 |
+
<!-- - **License:** Unknown -->
|
286 |
+
|
287 |
+
### Model Sources
|
288 |
+
|
289 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
290 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
291 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
292 |
+
|
293 |
+
### Full Model Architecture
|
294 |
+
|
295 |
+
```
|
296 |
+
SentenceTransformer(
|
297 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': True}) with Transformer model: BertModel
|
298 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
299 |
+
(2): Normalize()
|
300 |
+
)
|
301 |
+
```
|
302 |
+
|
303 |
+
## Usage
|
304 |
+
|
305 |
+
### Direct Usage (Sentence Transformers)
|
306 |
+
|
307 |
+
First install the Sentence Transformers library:
|
308 |
+
|
309 |
+
```bash
|
310 |
+
pip install -U sentence-transformers
|
311 |
+
```
|
312 |
+
|
313 |
+
Then you can load this model and run inference.
|
314 |
+
```python
|
315 |
+
from sentence_transformers import SentenceTransformer
|
316 |
+
|
317 |
+
# Download from the 🤗 Hub
|
318 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
319 |
+
# Run inference
|
320 |
+
sentences = [
|
321 |
+
'For roles such as import/export manager, graduate export manager, senior export manager, and other related positions in meat and meat products, the key skills include a strong understanding of international trade regulations, meat product knowledge, customs compliance, and excellent negotiation and communication skills to manage global supply chains effectively. Additionally, proficiency in relevant trade software and languages can be highly beneficial.',
|
322 |
+
'Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager require skills in conducting performance measurement and organizing or managing conversion testing to ensure software and systems meet performance standards and function correctly in real-world scenarios.',
|
323 |
+
'Job roles that require skills such as managing staff, coordinating employees, and performing HR activities include Human Resources Managers, Team Leaders, Supervisors, and Department Heads, all of whom are responsible for overseeing personnel, implementing HR policies, and ensuring efficient team operations.',
|
324 |
+
]
|
325 |
+
embeddings = model.encode(sentences)
|
326 |
+
print(embeddings.shape)
|
327 |
+
# [3, 1024]
|
328 |
+
|
329 |
+
# Get the similarity scores for the embeddings
|
330 |
+
similarities = model.similarity(embeddings, embeddings)
|
331 |
+
print(similarities.shape)
|
332 |
+
# [3, 3]
|
333 |
+
```
|
334 |
+
|
335 |
+
<!--
|
336 |
+
### Direct Usage (Transformers)
|
337 |
+
|
338 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
339 |
+
|
340 |
+
</details>
|
341 |
+
-->
|
342 |
+
|
343 |
+
<!--
|
344 |
+
### Downstream Usage (Sentence Transformers)
|
345 |
+
|
346 |
+
You can finetune this model on your own dataset.
|
347 |
+
|
348 |
+
<details><summary>Click to expand</summary>
|
349 |
+
|
350 |
+
</details>
|
351 |
+
-->
|
352 |
+
|
353 |
+
<!--
|
354 |
+
### Out-of-Scope Use
|
355 |
+
|
356 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
357 |
+
-->
|
358 |
+
|
359 |
+
## Evaluation
|
360 |
+
|
361 |
+
### Metrics
|
362 |
+
|
363 |
+
#### Information Retrieval
|
364 |
+
|
365 |
+
* Dataset: `full_en`
|
366 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
367 |
+
|
368 |
+
| Metric | Value |
|
369 |
+
|:---------------------|:-----------|
|
370 |
+
| cosine_accuracy@1 | 0.7303 |
|
371 |
+
| cosine_accuracy@20 | 0.9934 |
|
372 |
+
| cosine_accuracy@50 | 0.9967 |
|
373 |
+
| cosine_accuracy@100 | 1.0 |
|
374 |
+
| cosine_accuracy@150 | 1.0 |
|
375 |
+
| cosine_accuracy@200 | 1.0 |
|
376 |
+
| cosine_precision@1 | 0.7303 |
|
377 |
+
| cosine_precision@20 | 0.4979 |
|
378 |
+
| cosine_precision@50 | 0.3922 |
|
379 |
+
| cosine_precision@100 | 0.3102 |
|
380 |
+
| cosine_precision@150 | 0.265 |
|
381 |
+
| cosine_precision@200 | 0.232 |
|
382 |
+
| cosine_recall@1 | 0.0102 |
|
383 |
+
| cosine_recall@20 | 0.1329 |
|
384 |
+
| cosine_recall@50 | 0.2543 |
|
385 |
+
| cosine_recall@100 | 0.3937 |
|
386 |
+
| cosine_recall@150 | 0.4955 |
|
387 |
+
| cosine_recall@200 | 0.5716 |
|
388 |
+
| cosine_ndcg@1 | 0.7303 |
|
389 |
+
| cosine_ndcg@20 | 0.5373 |
|
390 |
+
| cosine_ndcg@50 | 0.4502 |
|
391 |
+
| cosine_ndcg@100 | 0.4419 |
|
392 |
+
| cosine_ndcg@150 | 0.489 |
|
393 |
+
| **cosine_ndcg@200** | **0.5342** |
|
394 |
+
| cosine_mrr@1 | 0.7303 |
|
395 |
+
| cosine_mrr@20 | 0.8354 |
|
396 |
+
| cosine_mrr@50 | 0.8356 |
|
397 |
+
| cosine_mrr@100 | 0.8356 |
|
398 |
+
| cosine_mrr@150 | 0.8356 |
|
399 |
+
| cosine_mrr@200 | 0.8356 |
|
400 |
+
| cosine_map@1 | 0.7303 |
|
401 |
+
| cosine_map@20 | 0.3423 |
|
402 |
+
| cosine_map@50 | 0.2377 |
|
403 |
+
| cosine_map@100 | 0.2109 |
|
404 |
+
| cosine_map@150 | 0.2285 |
|
405 |
+
| cosine_map@200 | 0.2473 |
|
406 |
+
| cosine_map@500 | 0.2978 |
|
407 |
+
|
408 |
+
<!--
|
409 |
+
## Bias, Risks and Limitations
|
410 |
+
|
411 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
412 |
+
-->
|
413 |
+
|
414 |
+
<!--
|
415 |
+
### Recommendations
|
416 |
+
|
417 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
418 |
+
-->
|
419 |
+
|
420 |
+
## Training Details
|
421 |
+
|
422 |
+
### Training Dataset
|
423 |
+
|
424 |
+
#### Unnamed Dataset
|
425 |
+
|
426 |
+
* Size: 114,699 training samples
|
427 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
428 |
+
* Approximate statistics based on the first 1000 samples:
|
429 |
+
| | anchor | positive |
|
430 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
431 |
+
| type | string | string |
|
432 |
+
| details | <ul><li>min: 43 tokens</li><li>mean: 65.45 tokens</li><li>max: 116 tokens</li></ul> | <ul><li>min: 34 tokens</li><li>mean: 55.34 tokens</li><li>max: 162 tokens</li></ul> |
|
433 |
+
* Samples:
|
434 |
+
| anchor | positive |
|
435 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
436 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require promoting health and safety include occupational health and safety specialists, safety managers, and public health educators, all of whom work to ensure safe and healthy environments in workplaces and communities.</code> |
|
437 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require organizing rehearsals include directors, choreographers, and conductors in theater, dance, and music ensembles, who must efficiently plan and schedule practice sessions to prepare performers for a successful final performance.</code> |
|
438 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles such as Health and Safety Managers, Environmental Health Officers, and Risk Management Specialists often require the skill of negotiating health and safety issues with third parties to ensure compliance and protection standards are met across different organizations and sites.</code> |
|
439 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
440 |
+
```json
|
441 |
+
{'guide': SentenceTransformer(
|
442 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
443 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
444 |
+
(2): Normalize()
|
445 |
+
), 'temperature': 0.01, 'mini_batch_size': 32, 'margin_strategy': 'absolute', 'margin': 0.0}
|
446 |
+
```
|
447 |
+
|
448 |
+
### Training Hyperparameters
|
449 |
+
#### Non-Default Hyperparameters
|
450 |
+
|
451 |
+
- `eval_strategy`: steps
|
452 |
+
- `per_device_train_batch_size`: 64
|
453 |
+
- `per_device_eval_batch_size`: 128
|
454 |
+
- `gradient_accumulation_steps`: 2
|
455 |
+
- `num_train_epochs`: 5
|
456 |
+
- `warmup_ratio`: 0.05
|
457 |
+
- `log_on_each_node`: False
|
458 |
+
- `fp16`: True
|
459 |
+
- `dataloader_num_workers`: 4
|
460 |
+
- `ddp_find_unused_parameters`: True
|
461 |
+
- `batch_sampler`: no_duplicates
|
462 |
+
|
463 |
+
#### All Hyperparameters
|
464 |
+
<details><summary>Click to expand</summary>
|
465 |
+
|
466 |
+
- `overwrite_output_dir`: False
|
467 |
+
- `do_predict`: False
|
468 |
+
- `eval_strategy`: steps
|
469 |
+
- `prediction_loss_only`: True
|
470 |
+
- `per_device_train_batch_size`: 64
|
471 |
+
- `per_device_eval_batch_size`: 128
|
472 |
+
- `per_gpu_train_batch_size`: None
|
473 |
+
- `per_gpu_eval_batch_size`: None
|
474 |
+
- `gradient_accumulation_steps`: 2
|
475 |
+
- `eval_accumulation_steps`: None
|
476 |
+
- `torch_empty_cache_steps`: None
|
477 |
+
- `learning_rate`: 5e-05
|
478 |
+
- `weight_decay`: 0.0
|
479 |
+
- `adam_beta1`: 0.9
|
480 |
+
- `adam_beta2`: 0.999
|
481 |
+
- `adam_epsilon`: 1e-08
|
482 |
+
- `max_grad_norm`: 1.0
|
483 |
+
- `num_train_epochs`: 5
|
484 |
+
- `max_steps`: -1
|
485 |
+
- `lr_scheduler_type`: linear
|
486 |
+
- `lr_scheduler_kwargs`: {}
|
487 |
+
- `warmup_ratio`: 0.05
|
488 |
+
- `warmup_steps`: 0
|
489 |
+
- `log_level`: passive
|
490 |
+
- `log_level_replica`: warning
|
491 |
+
- `log_on_each_node`: False
|
492 |
+
- `logging_nan_inf_filter`: True
|
493 |
+
- `save_safetensors`: True
|
494 |
+
- `save_on_each_node`: False
|
495 |
+
- `save_only_model`: False
|
496 |
+
- `restore_callback_states_from_checkpoint`: False
|
497 |
+
- `no_cuda`: False
|
498 |
+
- `use_cpu`: False
|
499 |
+
- `use_mps_device`: False
|
500 |
+
- `seed`: 42
|
501 |
+
- `data_seed`: None
|
502 |
+
- `jit_mode_eval`: False
|
503 |
+
- `use_ipex`: False
|
504 |
+
- `bf16`: False
|
505 |
+
- `fp16`: True
|
506 |
+
- `fp16_opt_level`: O1
|
507 |
+
- `half_precision_backend`: auto
|
508 |
+
- `bf16_full_eval`: False
|
509 |
+
- `fp16_full_eval`: False
|
510 |
+
- `tf32`: None
|
511 |
+
- `local_rank`: 0
|
512 |
+
- `ddp_backend`: None
|
513 |
+
- `tpu_num_cores`: None
|
514 |
+
- `tpu_metrics_debug`: False
|
515 |
+
- `debug`: []
|
516 |
+
- `dataloader_drop_last`: True
|
517 |
+
- `dataloader_num_workers`: 4
|
518 |
+
- `dataloader_prefetch_factor`: None
|
519 |
+
- `past_index`: -1
|
520 |
+
- `disable_tqdm`: False
|
521 |
+
- `remove_unused_columns`: True
|
522 |
+
- `label_names`: None
|
523 |
+
- `load_best_model_at_end`: False
|
524 |
+
- `ignore_data_skip`: False
|
525 |
+
- `fsdp`: []
|
526 |
+
- `fsdp_min_num_params`: 0
|
527 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
528 |
+
- `tp_size`: 0
|
529 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
530 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
531 |
+
- `deepspeed`: None
|
532 |
+
- `label_smoothing_factor`: 0.0
|
533 |
+
- `optim`: adamw_torch
|
534 |
+
- `optim_args`: None
|
535 |
+
- `adafactor`: False
|
536 |
+
- `group_by_length`: False
|
537 |
+
- `length_column_name`: length
|
538 |
+
- `ddp_find_unused_parameters`: True
|
539 |
+
- `ddp_bucket_cap_mb`: None
|
540 |
+
- `ddp_broadcast_buffers`: False
|
541 |
+
- `dataloader_pin_memory`: True
|
542 |
+
- `dataloader_persistent_workers`: False
|
543 |
+
- `skip_memory_metrics`: True
|
544 |
+
- `use_legacy_prediction_loop`: False
|
545 |
+
- `push_to_hub`: False
|
546 |
+
- `resume_from_checkpoint`: None
|
547 |
+
- `hub_model_id`: None
|
548 |
+
- `hub_strategy`: every_save
|
549 |
+
- `hub_private_repo`: None
|
550 |
+
- `hub_always_push`: False
|
551 |
+
- `gradient_checkpointing`: False
|
552 |
+
- `gradient_checkpointing_kwargs`: None
|
553 |
+
- `include_inputs_for_metrics`: False
|
554 |
+
- `include_for_metrics`: []
|
555 |
+
- `eval_do_concat_batches`: True
|
556 |
+
- `fp16_backend`: auto
|
557 |
+
- `push_to_hub_model_id`: None
|
558 |
+
- `push_to_hub_organization`: None
|
559 |
+
- `mp_parameters`:
|
560 |
+
- `auto_find_batch_size`: False
|
561 |
+
- `full_determinism`: False
|
562 |
+
- `torchdynamo`: None
|
563 |
+
- `ray_scope`: last
|
564 |
+
- `ddp_timeout`: 1800
|
565 |
+
- `torch_compile`: False
|
566 |
+
- `torch_compile_backend`: None
|
567 |
+
- `torch_compile_mode`: None
|
568 |
+
- `include_tokens_per_second`: False
|
569 |
+
- `include_num_input_tokens_seen`: False
|
570 |
+
- `neftune_noise_alpha`: None
|
571 |
+
- `optim_target_modules`: None
|
572 |
+
- `batch_eval_metrics`: False
|
573 |
+
- `eval_on_start`: False
|
574 |
+
- `use_liger_kernel`: False
|
575 |
+
- `eval_use_gather_object`: False
|
576 |
+
- `average_tokens_across_devices`: False
|
577 |
+
- `prompts`: None
|
578 |
+
- `batch_sampler`: no_duplicates
|
579 |
+
- `multi_dataset_batch_sampler`: proportional
|
580 |
+
|
581 |
+
</details>
|
582 |
+
|
583 |
+
### Training Logs
|
584 |
+
| Epoch | Step | Training Loss | full_en_cosine_ndcg@200 |
|
585 |
+
|:------:|:----:|:-------------:|:-----------------------:|
|
586 |
+
| -1 | -1 | - | 0.4784 |
|
587 |
+
| 0.0011 | 1 | 9.119 | - |
|
588 |
+
| 0.1116 | 100 | 4.1469 | - |
|
589 |
+
| 0.2232 | 200 | 2.5294 | 0.5362 |
|
590 |
+
| 0.3348 | 300 | 2.3611 | - |
|
591 |
+
| 0.4464 | 400 | 2.192 | 0.5318 |
|
592 |
+
| 0.5580 | 500 | 2.0338 | - |
|
593 |
+
| 0.6696 | 600 | 1.9009 | 0.5383 |
|
594 |
+
| 0.7812 | 700 | 1.8404 | - |
|
595 |
+
| 0.8929 | 800 | 1.7692 | 0.5352 |
|
596 |
+
| 1.0045 | 900 | 1.6921 | - |
|
597 |
+
| 1.1161 | 1000 | 1.3861 | 0.5368 |
|
598 |
+
| 1.2277 | 1100 | 1.3863 | - |
|
599 |
+
| 1.3393 | 1200 | 1.3546 | 0.5259 |
|
600 |
+
| 1.4509 | 1300 | 1.373 | - |
|
601 |
+
| 1.5625 | 1400 | 1.3364 | 0.5303 |
|
602 |
+
| 1.6741 | 1500 | 1.2876 | - |
|
603 |
+
| 1.7857 | 1600 | 1.3094 | 0.5323 |
|
604 |
+
| 1.8973 | 1700 | 1.2784 | - |
|
605 |
+
| 2.0089 | 1800 | 1.2204 | 0.5330 |
|
606 |
+
| 2.1205 | 1900 | 0.9617 | - |
|
607 |
+
| 2.2321 | 2000 | 1.0004 | 0.5277 |
|
608 |
+
| 2.3438 | 2100 | 0.9694 | - |
|
609 |
+
| 2.4554 | 2200 | 0.9843 | 0.5356 |
|
610 |
+
| 2.5670 | 2300 | 0.9743 | - |
|
611 |
+
| 2.6786 | 2400 | 0.9252 | 0.5320 |
|
612 |
+
| 2.7902 | 2500 | 0.9272 | - |
|
613 |
+
| 2.9018 | 2600 | 0.9279 | 0.5333 |
|
614 |
+
| 3.0134 | 2700 | 0.857 | - |
|
615 |
+
| 3.125 | 2800 | 0.7313 | 0.5300 |
|
616 |
+
| 3.2366 | 2900 | 0.7103 | - |
|
617 |
+
| 3.3482 | 3000 | 0.7187 | 0.5319 |
|
618 |
+
| 3.4598 | 3100 | 0.7067 | - |
|
619 |
+
| 3.5714 | 3200 | 0.7157 | 0.5369 |
|
620 |
+
| 3.6830 | 3300 | 0.7113 | - |
|
621 |
+
| 3.7946 | 3400 | 0.7013 | 0.5341 |
|
622 |
+
| 3.9062 | 3500 | 0.6903 | - |
|
623 |
+
| 4.0179 | 3600 | 0.6462 | 0.5335 |
|
624 |
+
| 4.1295 | 3700 | 0.5162 | - |
|
625 |
+
| 4.2411 | 3800 | 0.524 | 0.5352 |
|
626 |
+
| 4.3527 | 3900 | 0.5303 | - |
|
627 |
+
| 4.4643 | 4000 | 0.5269 | 0.5341 |
|
628 |
+
| 4.5759 | 4100 | 0.4824 | - |
|
629 |
+
| 4.6875 | 4200 | 0.5222 | 0.5342 |
|
630 |
+
|
631 |
+
|
632 |
+
### Framework Versions
|
633 |
+
- Python: 3.11.11
|
634 |
+
- Sentence Transformers: 4.1.0
|
635 |
+
- Transformers: 4.51.2
|
636 |
+
- PyTorch: 2.6.0+cu124
|
637 |
+
- Accelerate: 1.6.0
|
638 |
+
- Datasets: 3.5.0
|
639 |
+
- Tokenizers: 0.21.1
|
640 |
+
|
641 |
+
## Citation
|
642 |
+
|
643 |
+
### BibTeX
|
644 |
+
|
645 |
+
#### Sentence Transformers
|
646 |
+
```bibtex
|
647 |
+
@inproceedings{reimers-2019-sentence-bert,
|
648 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
649 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
650 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
651 |
+
month = "11",
|
652 |
+
year = "2019",
|
653 |
+
publisher = "Association for Computational Linguistics",
|
654 |
+
url = "https://arxiv.org/abs/1908.10084",
|
655 |
+
}
|
656 |
+
```
|
657 |
+
|
658 |
+
<!--
|
659 |
+
## Glossary
|
660 |
+
|
661 |
+
*Clearly define terms in order to be accessible across audiences.*
|
662 |
+
-->
|
663 |
+
|
664 |
+
<!--
|
665 |
+
## Model Card Authors
|
666 |
+
|
667 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
668 |
+
-->
|
669 |
+
|
670 |
+
<!--
|
671 |
+
## Model Card Contact
|
672 |
+
|
673 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
674 |
+
-->
|
checkpoint-4200/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c27e6b0c36040d1118cc767b2e1ff5fb54d9331aa992e0b369ad429c3bb25e97
|
3 |
+
size 15894
|
checkpoint-4200/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2a5c19b12a3edb7a8ea7e02ec7aa23d7cff707b35d8ade6103eb94a891f7e75e
|
3 |
+
size 988
|
checkpoint-4200/trainer_state.json
ADDED
@@ -0,0 +1,1280 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": null,
|
3 |
+
"best_metric": null,
|
4 |
+
"best_model_checkpoint": null,
|
5 |
+
"epoch": 4.6875,
|
6 |
+
"eval_steps": 200,
|
7 |
+
"global_step": 4200,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.0011160714285714285,
|
14 |
+
"grad_norm": NaN,
|
15 |
+
"learning_rate": 0.0,
|
16 |
+
"loss": 9.119,
|
17 |
+
"step": 1
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.11160714285714286,
|
21 |
+
"grad_norm": 17.632238388061523,
|
22 |
+
"learning_rate": 2.1428571428571428e-05,
|
23 |
+
"loss": 4.1469,
|
24 |
+
"step": 100
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.22321428571428573,
|
28 |
+
"grad_norm": 14.409270286560059,
|
29 |
+
"learning_rate": 4.375e-05,
|
30 |
+
"loss": 2.5294,
|
31 |
+
"step": 200
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 0.22321428571428573,
|
35 |
+
"eval_full_en_cosine_accuracy@1": 0.7467105263157895,
|
36 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
37 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
38 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
39 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
40 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
41 |
+
"eval_full_en_cosine_map@1": 0.7467105263157895,
|
42 |
+
"eval_full_en_cosine_map@100": 0.2121058701298033,
|
43 |
+
"eval_full_en_cosine_map@150": 0.2294109301872967,
|
44 |
+
"eval_full_en_cosine_map@20": 0.34167650006204187,
|
45 |
+
"eval_full_en_cosine_map@200": 0.2492171685943861,
|
46 |
+
"eval_full_en_cosine_map@50": 0.237336657426832,
|
47 |
+
"eval_full_en_cosine_map@500": 0.3000288940307502,
|
48 |
+
"eval_full_en_cosine_mrr@1": 0.7467105263157895,
|
49 |
+
"eval_full_en_cosine_mrr@100": 0.8460592769803298,
|
50 |
+
"eval_full_en_cosine_mrr@150": 0.8460592769803298,
|
51 |
+
"eval_full_en_cosine_mrr@20": 0.8458948032961192,
|
52 |
+
"eval_full_en_cosine_mrr@200": 0.8460592769803298,
|
53 |
+
"eval_full_en_cosine_mrr@50": 0.8460122844991269,
|
54 |
+
"eval_full_en_cosine_ndcg@1": 0.7467105263157895,
|
55 |
+
"eval_full_en_cosine_ndcg@100": 0.4430509248084704,
|
56 |
+
"eval_full_en_cosine_ndcg@150": 0.4894828917681416,
|
57 |
+
"eval_full_en_cosine_ndcg@20": 0.5367541274871807,
|
58 |
+
"eval_full_en_cosine_ndcg@200": 0.5361903606133726,
|
59 |
+
"eval_full_en_cosine_ndcg@50": 0.448683811733402,
|
60 |
+
"eval_full_en_cosine_precision@1": 0.7467105263157895,
|
61 |
+
"eval_full_en_cosine_precision@100": 0.31240131578947367,
|
62 |
+
"eval_full_en_cosine_precision@150": 0.26592105263157895,
|
63 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
64 |
+
"eval_full_en_cosine_precision@200": 0.23370065789473685,
|
65 |
+
"eval_full_en_cosine_precision@50": 0.3904605263157895,
|
66 |
+
"eval_full_en_cosine_recall@1": 0.010753343030902496,
|
67 |
+
"eval_full_en_cosine_recall@100": 0.39446255566624855,
|
68 |
+
"eval_full_en_cosine_recall@150": 0.49544823712709557,
|
69 |
+
"eval_full_en_cosine_recall@20": 0.13279013317825217,
|
70 |
+
"eval_full_en_cosine_recall@200": 0.5739614992682516,
|
71 |
+
"eval_full_en_cosine_recall@50": 0.25254843470147753,
|
72 |
+
"eval_runtime": 1.5828,
|
73 |
+
"eval_samples_per_second": 0.0,
|
74 |
+
"eval_sequential_score": 0.5361903606133726,
|
75 |
+
"eval_steps_per_second": 0.0,
|
76 |
+
"step": 200
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 0.33482142857142855,
|
80 |
+
"grad_norm": 16.260934829711914,
|
81 |
+
"learning_rate": 4.915413533834587e-05,
|
82 |
+
"loss": 2.3611,
|
83 |
+
"step": 300
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 0.44642857142857145,
|
87 |
+
"grad_norm": 13.242988586425781,
|
88 |
+
"learning_rate": 4.797932330827068e-05,
|
89 |
+
"loss": 2.192,
|
90 |
+
"step": 400
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"epoch": 0.44642857142857145,
|
94 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
95 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
96 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
97 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
98 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
99 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
100 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
101 |
+
"eval_full_en_cosine_map@100": 0.2088144416212806,
|
102 |
+
"eval_full_en_cosine_map@150": 0.22677217670719133,
|
103 |
+
"eval_full_en_cosine_map@20": 0.3349832137166454,
|
104 |
+
"eval_full_en_cosine_map@200": 0.245946497368659,
|
105 |
+
"eval_full_en_cosine_map@50": 0.23473921202287384,
|
106 |
+
"eval_full_en_cosine_map@500": 0.2973985707303743,
|
107 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
108 |
+
"eval_full_en_cosine_mrr@100": 0.8394156306336016,
|
109 |
+
"eval_full_en_cosine_mrr@150": 0.8394156306336016,
|
110 |
+
"eval_full_en_cosine_mrr@20": 0.8392713554720135,
|
111 |
+
"eval_full_en_cosine_mrr@200": 0.8394156306336016,
|
112 |
+
"eval_full_en_cosine_mrr@50": 0.8393810045948205,
|
113 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
114 |
+
"eval_full_en_cosine_ndcg@100": 0.43855475512592684,
|
115 |
+
"eval_full_en_cosine_ndcg@150": 0.48609390907359196,
|
116 |
+
"eval_full_en_cosine_ndcg@20": 0.5288083416910968,
|
117 |
+
"eval_full_en_cosine_ndcg@200": 0.5318117937684201,
|
118 |
+
"eval_full_en_cosine_ndcg@50": 0.4453338982563473,
|
119 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
120 |
+
"eval_full_en_cosine_precision@100": 0.3088157894736842,
|
121 |
+
"eval_full_en_cosine_precision@150": 0.2644517543859649,
|
122 |
+
"eval_full_en_cosine_precision@20": 0.4875,
|
123 |
+
"eval_full_en_cosine_precision@200": 0.23172697368421055,
|
124 |
+
"eval_full_en_cosine_precision@50": 0.38782894736842105,
|
125 |
+
"eval_full_en_cosine_recall@1": 0.010619007443519193,
|
126 |
+
"eval_full_en_cosine_recall@100": 0.3902042311088277,
|
127 |
+
"eval_full_en_cosine_recall@150": 0.4925745165667779,
|
128 |
+
"eval_full_en_cosine_recall@20": 0.1301764615450556,
|
129 |
+
"eval_full_en_cosine_recall@200": 0.5696006364444781,
|
130 |
+
"eval_full_en_cosine_recall@50": 0.2518199886564403,
|
131 |
+
"eval_runtime": 1.5596,
|
132 |
+
"eval_samples_per_second": 0.0,
|
133 |
+
"eval_sequential_score": 0.5318117937684201,
|
134 |
+
"eval_steps_per_second": 0.0,
|
135 |
+
"step": 400
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.5580357142857143,
|
139 |
+
"grad_norm": 13.307888984680176,
|
140 |
+
"learning_rate": 4.680451127819549e-05,
|
141 |
+
"loss": 2.0338,
|
142 |
+
"step": 500
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.6696428571428571,
|
146 |
+
"grad_norm": 12.763930320739746,
|
147 |
+
"learning_rate": 4.56296992481203e-05,
|
148 |
+
"loss": 1.9009,
|
149 |
+
"step": 600
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.6696428571428571,
|
153 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
154 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
155 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
156 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
157 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
158 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
159 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
160 |
+
"eval_full_en_cosine_map@100": 0.2146410944227793,
|
161 |
+
"eval_full_en_cosine_map@150": 0.23271596511985665,
|
162 |
+
"eval_full_en_cosine_map@20": 0.3429678297332613,
|
163 |
+
"eval_full_en_cosine_map@200": 0.2520997707361607,
|
164 |
+
"eval_full_en_cosine_map@50": 0.2404899713826549,
|
165 |
+
"eval_full_en_cosine_map@500": 0.302904619520322,
|
166 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
167 |
+
"eval_full_en_cosine_mrr@100": 0.8306572094298247,
|
168 |
+
"eval_full_en_cosine_mrr@150": 0.8306572094298247,
|
169 |
+
"eval_full_en_cosine_mrr@20": 0.8304491697994989,
|
170 |
+
"eval_full_en_cosine_mrr@200": 0.8306572094298247,
|
171 |
+
"eval_full_en_cosine_mrr@50": 0.8306058114035089,
|
172 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
173 |
+
"eval_full_en_cosine_ndcg@100": 0.4445617284976941,
|
174 |
+
"eval_full_en_cosine_ndcg@150": 0.4922393935902775,
|
175 |
+
"eval_full_en_cosine_ndcg@20": 0.5357880041966661,
|
176 |
+
"eval_full_en_cosine_ndcg@200": 0.5383209000398446,
|
177 |
+
"eval_full_en_cosine_ndcg@50": 0.4504820590447715,
|
178 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
179 |
+
"eval_full_en_cosine_precision@100": 0.31358552631578945,
|
180 |
+
"eval_full_en_cosine_precision@150": 0.2677412280701754,
|
181 |
+
"eval_full_en_cosine_precision@20": 0.49720394736842105,
|
182 |
+
"eval_full_en_cosine_precision@200": 0.23452302631578953,
|
183 |
+
"eval_full_en_cosine_precision@50": 0.3932894736842105,
|
184 |
+
"eval_full_en_cosine_recall@1": 0.010303516134180577,
|
185 |
+
"eval_full_en_cosine_recall@100": 0.3970033142271577,
|
186 |
+
"eval_full_en_cosine_recall@150": 0.5001101850184368,
|
187 |
+
"eval_full_en_cosine_recall@20": 0.13302896177814508,
|
188 |
+
"eval_full_en_cosine_recall@200": 0.5777429812058247,
|
189 |
+
"eval_full_en_cosine_recall@50": 0.254528957048419,
|
190 |
+
"eval_runtime": 1.5616,
|
191 |
+
"eval_samples_per_second": 0.0,
|
192 |
+
"eval_sequential_score": 0.5383209000398446,
|
193 |
+
"eval_steps_per_second": 0.0,
|
194 |
+
"step": 600
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 0.78125,
|
198 |
+
"grad_norm": 13.439990997314453,
|
199 |
+
"learning_rate": 4.4454887218045117e-05,
|
200 |
+
"loss": 1.8404,
|
201 |
+
"step": 700
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 0.8928571428571429,
|
205 |
+
"grad_norm": 12.594465255737305,
|
206 |
+
"learning_rate": 4.3280075187969924e-05,
|
207 |
+
"loss": 1.7692,
|
208 |
+
"step": 800
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 0.8928571428571429,
|
212 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
213 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
214 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
215 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
216 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
217 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
218 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
219 |
+
"eval_full_en_cosine_map@100": 0.21030614519224017,
|
220 |
+
"eval_full_en_cosine_map@150": 0.22737063252522982,
|
221 |
+
"eval_full_en_cosine_map@20": 0.3442880676713117,
|
222 |
+
"eval_full_en_cosine_map@200": 0.24764067563282596,
|
223 |
+
"eval_full_en_cosine_map@50": 0.23827484272575025,
|
224 |
+
"eval_full_en_cosine_map@500": 0.2987091429260604,
|
225 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
226 |
+
"eval_full_en_cosine_mrr@100": 0.8404268619187053,
|
227 |
+
"eval_full_en_cosine_mrr@150": 0.8404268619187053,
|
228 |
+
"eval_full_en_cosine_mrr@20": 0.8402307852965749,
|
229 |
+
"eval_full_en_cosine_mrr@200": 0.8404268619187053,
|
230 |
+
"eval_full_en_cosine_mrr@50": 0.8403738058915406,
|
231 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
232 |
+
"eval_full_en_cosine_ndcg@100": 0.440670430732987,
|
233 |
+
"eval_full_en_cosine_ndcg@150": 0.486778222456143,
|
234 |
+
"eval_full_en_cosine_ndcg@20": 0.5383903905850532,
|
235 |
+
"eval_full_en_cosine_ndcg@200": 0.5352292016764449,
|
236 |
+
"eval_full_en_cosine_ndcg@50": 0.45046850998342597,
|
237 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
238 |
+
"eval_full_en_cosine_precision@100": 0.3099342105263158,
|
239 |
+
"eval_full_en_cosine_precision@150": 0.26390350877192986,
|
240 |
+
"eval_full_en_cosine_precision@20": 0.5,
|
241 |
+
"eval_full_en_cosine_precision@200": 0.23320723684210526,
|
242 |
+
"eval_full_en_cosine_precision@50": 0.39335526315789476,
|
243 |
+
"eval_full_en_cosine_recall@1": 0.01051277780149725,
|
244 |
+
"eval_full_en_cosine_recall@100": 0.39158535797000443,
|
245 |
+
"eval_full_en_cosine_recall@150": 0.4917399858788313,
|
246 |
+
"eval_full_en_cosine_recall@20": 0.13328036442285973,
|
247 |
+
"eval_full_en_cosine_recall@200": 0.5734492892933252,
|
248 |
+
"eval_full_en_cosine_recall@50": 0.254129727850083,
|
249 |
+
"eval_runtime": 1.5752,
|
250 |
+
"eval_samples_per_second": 0.0,
|
251 |
+
"eval_sequential_score": 0.5352292016764449,
|
252 |
+
"eval_steps_per_second": 0.0,
|
253 |
+
"step": 800
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 1.0044642857142858,
|
257 |
+
"grad_norm": 13.140974998474121,
|
258 |
+
"learning_rate": 4.212875939849624e-05,
|
259 |
+
"loss": 1.6921,
|
260 |
+
"step": 900
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 1.1160714285714286,
|
264 |
+
"grad_norm": 12.160736083984375,
|
265 |
+
"learning_rate": 4.096569548872181e-05,
|
266 |
+
"loss": 1.3861,
|
267 |
+
"step": 1000
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"epoch": 1.1160714285714286,
|
271 |
+
"eval_full_en_cosine_accuracy@1": 0.7401315789473685,
|
272 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
273 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
274 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
275 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
276 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
277 |
+
"eval_full_en_cosine_map@1": 0.7401315789473685,
|
278 |
+
"eval_full_en_cosine_map@100": 0.21155466872463927,
|
279 |
+
"eval_full_en_cosine_map@150": 0.2291636549745022,
|
280 |
+
"eval_full_en_cosine_map@20": 0.3373673798048492,
|
281 |
+
"eval_full_en_cosine_map@200": 0.24905074192004603,
|
282 |
+
"eval_full_en_cosine_map@50": 0.2376950112180141,
|
283 |
+
"eval_full_en_cosine_map@500": 0.3006802538137734,
|
284 |
+
"eval_full_en_cosine_mrr@1": 0.7401315789473685,
|
285 |
+
"eval_full_en_cosine_mrr@100": 0.8405236576289212,
|
286 |
+
"eval_full_en_cosine_mrr@150": 0.8405236576289212,
|
287 |
+
"eval_full_en_cosine_mrr@20": 0.8403143274853806,
|
288 |
+
"eval_full_en_cosine_mrr@200": 0.8405236576289212,
|
289 |
+
"eval_full_en_cosine_mrr@50": 0.840463849016481,
|
290 |
+
"eval_full_en_cosine_ndcg@1": 0.7401315789473685,
|
291 |
+
"eval_full_en_cosine_ndcg@100": 0.44212858816477746,
|
292 |
+
"eval_full_en_cosine_ndcg@150": 0.48946706445562127,
|
293 |
+
"eval_full_en_cosine_ndcg@20": 0.5332180756481385,
|
294 |
+
"eval_full_en_cosine_ndcg@200": 0.5367929588661781,
|
295 |
+
"eval_full_en_cosine_ndcg@50": 0.44979391873656477,
|
296 |
+
"eval_full_en_cosine_precision@1": 0.7401315789473685,
|
297 |
+
"eval_full_en_cosine_precision@100": 0.3114473684210526,
|
298 |
+
"eval_full_en_cosine_precision@150": 0.266469298245614,
|
299 |
+
"eval_full_en_cosine_precision@20": 0.49243421052631575,
|
300 |
+
"eval_full_en_cosine_precision@200": 0.2345888157894737,
|
301 |
+
"eval_full_en_cosine_precision@50": 0.3921052631578947,
|
302 |
+
"eval_full_en_cosine_recall@1": 0.010392607884295562,
|
303 |
+
"eval_full_en_cosine_recall@100": 0.3933254279416559,
|
304 |
+
"eval_full_en_cosine_recall@150": 0.4957503189606009,
|
305 |
+
"eval_full_en_cosine_recall@20": 0.13107623492706288,
|
306 |
+
"eval_full_en_cosine_recall@200": 0.5753954619760326,
|
307 |
+
"eval_full_en_cosine_recall@50": 0.2539746341397596,
|
308 |
+
"eval_runtime": 1.6397,
|
309 |
+
"eval_samples_per_second": 0.0,
|
310 |
+
"eval_sequential_score": 0.5367929588661781,
|
311 |
+
"eval_steps_per_second": 0.0,
|
312 |
+
"step": 1000
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"epoch": 1.2276785714285714,
|
316 |
+
"grad_norm": 13.078369140625,
|
317 |
+
"learning_rate": 3.9790883458646615e-05,
|
318 |
+
"loss": 1.3863,
|
319 |
+
"step": 1100
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"epoch": 1.3392857142857144,
|
323 |
+
"grad_norm": 11.990692138671875,
|
324 |
+
"learning_rate": 3.861607142857143e-05,
|
325 |
+
"loss": 1.3546,
|
326 |
+
"step": 1200
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"epoch": 1.3392857142857144,
|
330 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
331 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
332 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
333 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
334 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
335 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
336 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
337 |
+
"eval_full_en_cosine_map@100": 0.20552277525856266,
|
338 |
+
"eval_full_en_cosine_map@150": 0.22274311961933413,
|
339 |
+
"eval_full_en_cosine_map@20": 0.3363904557549852,
|
340 |
+
"eval_full_en_cosine_map@200": 0.24106738760441354,
|
341 |
+
"eval_full_en_cosine_map@50": 0.23370113464760453,
|
342 |
+
"eval_full_en_cosine_map@500": 0.28981293048421486,
|
343 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
344 |
+
"eval_full_en_cosine_mrr@100": 0.8322617799738206,
|
345 |
+
"eval_full_en_cosine_mrr@150": 0.8322617799738206,
|
346 |
+
"eval_full_en_cosine_mrr@20": 0.8320620443153339,
|
347 |
+
"eval_full_en_cosine_mrr@200": 0.8322617799738206,
|
348 |
+
"eval_full_en_cosine_mrr@50": 0.8322050649102997,
|
349 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
350 |
+
"eval_full_en_cosine_ndcg@100": 0.43445871937106545,
|
351 |
+
"eval_full_en_cosine_ndcg@150": 0.48130417146010107,
|
352 |
+
"eval_full_en_cosine_ndcg@20": 0.531477407982968,
|
353 |
+
"eval_full_en_cosine_ndcg@200": 0.5259375639543232,
|
354 |
+
"eval_full_en_cosine_ndcg@50": 0.4444057356887903,
|
355 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
356 |
+
"eval_full_en_cosine_precision@100": 0.3039802631578947,
|
357 |
+
"eval_full_en_cosine_precision@150": 0.25999999999999995,
|
358 |
+
"eval_full_en_cosine_precision@20": 0.4925986842105263,
|
359 |
+
"eval_full_en_cosine_precision@200": 0.22763157894736838,
|
360 |
+
"eval_full_en_cosine_precision@50": 0.3867105263157895,
|
361 |
+
"eval_full_en_cosine_recall@1": 0.010318104890368607,
|
362 |
+
"eval_full_en_cosine_recall@100": 0.385615965839615,
|
363 |
+
"eval_full_en_cosine_recall@150": 0.48656381032984825,
|
364 |
+
"eval_full_en_cosine_recall@20": 0.13139326985918445,
|
365 |
+
"eval_full_en_cosine_recall@200": 0.5617757383007209,
|
366 |
+
"eval_full_en_cosine_recall@50": 0.2506285703289517,
|
367 |
+
"eval_runtime": 1.5585,
|
368 |
+
"eval_samples_per_second": 0.0,
|
369 |
+
"eval_sequential_score": 0.5259375639543232,
|
370 |
+
"eval_steps_per_second": 0.0,
|
371 |
+
"step": 1200
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 1.4508928571428572,
|
375 |
+
"grad_norm": 15.019533157348633,
|
376 |
+
"learning_rate": 3.744125939849624e-05,
|
377 |
+
"loss": 1.373,
|
378 |
+
"step": 1300
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 1.5625,
|
382 |
+
"grad_norm": 10.545878410339355,
|
383 |
+
"learning_rate": 3.626644736842105e-05,
|
384 |
+
"loss": 1.3364,
|
385 |
+
"step": 1400
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 1.5625,
|
389 |
+
"eval_full_en_cosine_accuracy@1": 0.7171052631578947,
|
390 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
391 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
392 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
393 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
394 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
395 |
+
"eval_full_en_cosine_map@1": 0.7171052631578947,
|
396 |
+
"eval_full_en_cosine_map@100": 0.20833018055660496,
|
397 |
+
"eval_full_en_cosine_map@150": 0.22583322401021033,
|
398 |
+
"eval_full_en_cosine_map@20": 0.34006318172507877,
|
399 |
+
"eval_full_en_cosine_map@200": 0.24462161151730188,
|
400 |
+
"eval_full_en_cosine_map@50": 0.23483789231739935,
|
401 |
+
"eval_full_en_cosine_map@500": 0.2946124561805931,
|
402 |
+
"eval_full_en_cosine_mrr@1": 0.7171052631578947,
|
403 |
+
"eval_full_en_cosine_mrr@100": 0.8267713172687238,
|
404 |
+
"eval_full_en_cosine_mrr@150": 0.8267713172687238,
|
405 |
+
"eval_full_en_cosine_mrr@20": 0.8265913362952838,
|
406 |
+
"eval_full_en_cosine_mrr@200": 0.8267713172687238,
|
407 |
+
"eval_full_en_cosine_mrr@50": 0.8267343568902494,
|
408 |
+
"eval_full_en_cosine_ndcg@1": 0.7171052631578947,
|
409 |
+
"eval_full_en_cosine_ndcg@100": 0.4377486787968229,
|
410 |
+
"eval_full_en_cosine_ndcg@150": 0.4850669425848544,
|
411 |
+
"eval_full_en_cosine_ndcg@20": 0.5331724259953773,
|
412 |
+
"eval_full_en_cosine_ndcg@200": 0.5302927064126869,
|
413 |
+
"eval_full_en_cosine_ndcg@50": 0.4451308688476405,
|
414 |
+
"eval_full_en_cosine_precision@1": 0.7171052631578947,
|
415 |
+
"eval_full_en_cosine_precision@100": 0.3074671052631579,
|
416 |
+
"eval_full_en_cosine_precision@150": 0.2625657894736842,
|
417 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
418 |
+
"eval_full_en_cosine_precision@200": 0.23016447368421053,
|
419 |
+
"eval_full_en_cosine_precision@50": 0.38769736842105257,
|
420 |
+
"eval_full_en_cosine_recall@1": 0.010208074045806198,
|
421 |
+
"eval_full_en_cosine_recall@100": 0.3902466549235702,
|
422 |
+
"eval_full_en_cosine_recall@150": 0.49226776551348056,
|
423 |
+
"eval_full_en_cosine_recall@20": 0.13255572846134298,
|
424 |
+
"eval_full_en_cosine_recall@200": 0.5680994353864672,
|
425 |
+
"eval_full_en_cosine_recall@50": 0.25126941591084845,
|
426 |
+
"eval_runtime": 1.5595,
|
427 |
+
"eval_samples_per_second": 0.0,
|
428 |
+
"eval_sequential_score": 0.5302927064126869,
|
429 |
+
"eval_steps_per_second": 0.0,
|
430 |
+
"step": 1400
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"epoch": 1.6741071428571428,
|
434 |
+
"grad_norm": 18.495975494384766,
|
435 |
+
"learning_rate": 3.509163533834587e-05,
|
436 |
+
"loss": 1.2876,
|
437 |
+
"step": 1500
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 1.7857142857142856,
|
441 |
+
"grad_norm": 12.646751403808594,
|
442 |
+
"learning_rate": 3.391682330827068e-05,
|
443 |
+
"loss": 1.3094,
|
444 |
+
"step": 1600
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 1.7857142857142856,
|
448 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
449 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
450 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
451 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
452 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
453 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
454 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
455 |
+
"eval_full_en_cosine_map@100": 0.20923239071614674,
|
456 |
+
"eval_full_en_cosine_map@150": 0.225604138471006,
|
457 |
+
"eval_full_en_cosine_map@20": 0.34034356587585846,
|
458 |
+
"eval_full_en_cosine_map@200": 0.24539737099429304,
|
459 |
+
"eval_full_en_cosine_map@50": 0.23464702413938254,
|
460 |
+
"eval_full_en_cosine_map@500": 0.29597166286299953,
|
461 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
462 |
+
"eval_full_en_cosine_mrr@100": 0.8214137967940215,
|
463 |
+
"eval_full_en_cosine_mrr@150": 0.8214137967940215,
|
464 |
+
"eval_full_en_cosine_mrr@20": 0.8213699371448987,
|
465 |
+
"eval_full_en_cosine_mrr@200": 0.8214137967940215,
|
466 |
+
"eval_full_en_cosine_mrr@50": 0.8213699371448987,
|
467 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
468 |
+
"eval_full_en_cosine_ndcg@100": 0.4396726832556684,
|
469 |
+
"eval_full_en_cosine_ndcg@150": 0.4847816359827512,
|
470 |
+
"eval_full_en_cosine_ndcg@20": 0.532792025753163,
|
471 |
+
"eval_full_en_cosine_ndcg@200": 0.5323403273572274,
|
472 |
+
"eval_full_en_cosine_ndcg@50": 0.4452189433184465,
|
473 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
474 |
+
"eval_full_en_cosine_precision@100": 0.3098026315789474,
|
475 |
+
"eval_full_en_cosine_precision@150": 0.26274122807017547,
|
476 |
+
"eval_full_en_cosine_precision@20": 0.4935855263157895,
|
477 |
+
"eval_full_en_cosine_precision@200": 0.23192434210526314,
|
478 |
+
"eval_full_en_cosine_precision@50": 0.38763157894736844,
|
479 |
+
"eval_full_en_cosine_recall@1": 0.010122149362902188,
|
480 |
+
"eval_full_en_cosine_recall@100": 0.39236988612007834,
|
481 |
+
"eval_full_en_cosine_recall@150": 0.4910778378543689,
|
482 |
+
"eval_full_en_cosine_recall@20": 0.13108496301513997,
|
483 |
+
"eval_full_en_cosine_recall@200": 0.5709689534914331,
|
484 |
+
"eval_full_en_cosine_recall@50": 0.25093448303772187,
|
485 |
+
"eval_runtime": 1.5873,
|
486 |
+
"eval_samples_per_second": 0.0,
|
487 |
+
"eval_sequential_score": 0.5323403273572274,
|
488 |
+
"eval_steps_per_second": 0.0,
|
489 |
+
"step": 1600
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 1.8973214285714286,
|
493 |
+
"grad_norm": 11.858412742614746,
|
494 |
+
"learning_rate": 3.274201127819549e-05,
|
495 |
+
"loss": 1.2784,
|
496 |
+
"step": 1700
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 2.0089285714285716,
|
500 |
+
"grad_norm": 11.152688026428223,
|
501 |
+
"learning_rate": 3.1567199248120306e-05,
|
502 |
+
"loss": 1.2204,
|
503 |
+
"step": 1800
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"epoch": 2.0089285714285716,
|
507 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
508 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
509 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
510 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
511 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
512 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
513 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
514 |
+
"eval_full_en_cosine_map@100": 0.2098412194483687,
|
515 |
+
"eval_full_en_cosine_map@150": 0.22663911455304064,
|
516 |
+
"eval_full_en_cosine_map@20": 0.3433147887298301,
|
517 |
+
"eval_full_en_cosine_map@200": 0.24620266722190678,
|
518 |
+
"eval_full_en_cosine_map@50": 0.23714915519951082,
|
519 |
+
"eval_full_en_cosine_map@500": 0.29690932859887553,
|
520 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
521 |
+
"eval_full_en_cosine_mrr@100": 0.8394024772357531,
|
522 |
+
"eval_full_en_cosine_mrr@150": 0.8394024772357531,
|
523 |
+
"eval_full_en_cosine_mrr@20": 0.8393426686233129,
|
524 |
+
"eval_full_en_cosine_mrr@200": 0.8394024772357531,
|
525 |
+
"eval_full_en_cosine_mrr@50": 0.8393426686233129,
|
526 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
527 |
+
"eval_full_en_cosine_ndcg@100": 0.4396519841053572,
|
528 |
+
"eval_full_en_cosine_ndcg@150": 0.4856325134708184,
|
529 |
+
"eval_full_en_cosine_ndcg@20": 0.5375317893335387,
|
530 |
+
"eval_full_en_cosine_ndcg@200": 0.533015167774829,
|
531 |
+
"eval_full_en_cosine_ndcg@50": 0.44810398395306655,
|
532 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
533 |
+
"eval_full_en_cosine_precision@100": 0.3084539473684211,
|
534 |
+
"eval_full_en_cosine_precision@150": 0.2627631578947368,
|
535 |
+
"eval_full_en_cosine_precision@20": 0.49769736842105267,
|
536 |
+
"eval_full_en_cosine_precision@200": 0.2314309210526316,
|
537 |
+
"eval_full_en_cosine_precision@50": 0.3891447368421053,
|
538 |
+
"eval_full_en_cosine_recall@1": 0.010440810366523372,
|
539 |
+
"eval_full_en_cosine_recall@100": 0.39036009395952986,
|
540 |
+
"eval_full_en_cosine_recall@150": 0.49041982254882954,
|
541 |
+
"eval_full_en_cosine_recall@20": 0.13228070304056636,
|
542 |
+
"eval_full_en_cosine_recall@200": 0.5704962189819233,
|
543 |
+
"eval_full_en_cosine_recall@50": 0.25248213212752935,
|
544 |
+
"eval_runtime": 1.6049,
|
545 |
+
"eval_samples_per_second": 0.0,
|
546 |
+
"eval_sequential_score": 0.533015167774829,
|
547 |
+
"eval_steps_per_second": 0.0,
|
548 |
+
"step": 1800
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 2.1205357142857144,
|
552 |
+
"grad_norm": 12.91015625,
|
553 |
+
"learning_rate": 3.0392387218045114e-05,
|
554 |
+
"loss": 0.9617,
|
555 |
+
"step": 1900
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 2.232142857142857,
|
559 |
+
"grad_norm": 11.646313667297363,
|
560 |
+
"learning_rate": 2.9217575187969924e-05,
|
561 |
+
"loss": 1.0004,
|
562 |
+
"step": 2000
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 2.232142857142857,
|
566 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
567 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
568 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
569 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
570 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
571 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
572 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
573 |
+
"eval_full_en_cosine_map@100": 0.20775225168018954,
|
574 |
+
"eval_full_en_cosine_map@150": 0.22393096419950168,
|
575 |
+
"eval_full_en_cosine_map@20": 0.3380596885262807,
|
576 |
+
"eval_full_en_cosine_map@200": 0.24259765295506924,
|
577 |
+
"eval_full_en_cosine_map@50": 0.23452814948810471,
|
578 |
+
"eval_full_en_cosine_map@500": 0.2920026964508484,
|
579 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
580 |
+
"eval_full_en_cosine_mrr@100": 0.8325452625382137,
|
581 |
+
"eval_full_en_cosine_mrr@150": 0.8325452625382137,
|
582 |
+
"eval_full_en_cosine_mrr@20": 0.8324781304222094,
|
583 |
+
"eval_full_en_cosine_mrr@200": 0.8325452625382137,
|
584 |
+
"eval_full_en_cosine_mrr@50": 0.8325452625382137,
|
585 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
586 |
+
"eval_full_en_cosine_ndcg@100": 0.4376001104057169,
|
587 |
+
"eval_full_en_cosine_ndcg@150": 0.48181431955382,
|
588 |
+
"eval_full_en_cosine_ndcg@20": 0.5323035546433559,
|
589 |
+
"eval_full_en_cosine_ndcg@200": 0.5276663014224582,
|
590 |
+
"eval_full_en_cosine_ndcg@50": 0.44660441452063837,
|
591 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
592 |
+
"eval_full_en_cosine_precision@100": 0.30644736842105263,
|
593 |
+
"eval_full_en_cosine_precision@150": 0.259890350877193,
|
594 |
+
"eval_full_en_cosine_precision@20": 0.4916118421052632,
|
595 |
+
"eval_full_en_cosine_precision@200": 0.2280921052631579,
|
596 |
+
"eval_full_en_cosine_precision@50": 0.3886842105263158,
|
597 |
+
"eval_full_en_cosine_recall@1": 0.010329446437905086,
|
598 |
+
"eval_full_en_cosine_recall@100": 0.38885062846601265,
|
599 |
+
"eval_full_en_cosine_recall@150": 0.4854595951837256,
|
600 |
+
"eval_full_en_cosine_recall@20": 0.131078016933875,
|
601 |
+
"eval_full_en_cosine_recall@200": 0.5630724982932908,
|
602 |
+
"eval_full_en_cosine_recall@50": 0.252357645205228,
|
603 |
+
"eval_runtime": 1.5613,
|
604 |
+
"eval_samples_per_second": 0.0,
|
605 |
+
"eval_sequential_score": 0.5276663014224582,
|
606 |
+
"eval_steps_per_second": 0.0,
|
607 |
+
"step": 2000
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"epoch": 2.34375,
|
611 |
+
"grad_norm": 12.087961196899414,
|
612 |
+
"learning_rate": 2.8042763157894735e-05,
|
613 |
+
"loss": 0.9694,
|
614 |
+
"step": 2100
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 2.455357142857143,
|
618 |
+
"grad_norm": 8.181659698486328,
|
619 |
+
"learning_rate": 2.6867951127819552e-05,
|
620 |
+
"loss": 0.9843,
|
621 |
+
"step": 2200
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"epoch": 2.455357142857143,
|
625 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
626 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
627 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
628 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
629 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
630 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
631 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
632 |
+
"eval_full_en_cosine_map@100": 0.209953160245849,
|
633 |
+
"eval_full_en_cosine_map@150": 0.22760030144833215,
|
634 |
+
"eval_full_en_cosine_map@20": 0.34078157961918865,
|
635 |
+
"eval_full_en_cosine_map@200": 0.24749824184265867,
|
636 |
+
"eval_full_en_cosine_map@50": 0.2365248444512811,
|
637 |
+
"eval_full_en_cosine_map@500": 0.29789431690676116,
|
638 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
639 |
+
"eval_full_en_cosine_mrr@100": 0.8318935359231412,
|
640 |
+
"eval_full_en_cosine_mrr@150": 0.8318935359231412,
|
641 |
+
"eval_full_en_cosine_mrr@20": 0.8316833751044278,
|
642 |
+
"eval_full_en_cosine_mrr@200": 0.8318935359231412,
|
643 |
+
"eval_full_en_cosine_mrr@50": 0.8318935359231412,
|
644 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
645 |
+
"eval_full_en_cosine_ndcg@100": 0.44076958126493176,
|
646 |
+
"eval_full_en_cosine_ndcg@150": 0.48838061313116793,
|
647 |
+
"eval_full_en_cosine_ndcg@20": 0.5350320556020238,
|
648 |
+
"eval_full_en_cosine_ndcg@200": 0.5355574509263721,
|
649 |
+
"eval_full_en_cosine_ndcg@50": 0.44803994906340594,
|
650 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
651 |
+
"eval_full_en_cosine_precision@100": 0.3099671052631579,
|
652 |
+
"eval_full_en_cosine_precision@150": 0.2648464912280702,
|
653 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
654 |
+
"eval_full_en_cosine_precision@200": 0.23342105263157892,
|
655 |
+
"eval_full_en_cosine_precision@50": 0.39052631578947367,
|
656 |
+
"eval_full_en_cosine_recall@1": 0.010284539147879572,
|
657 |
+
"eval_full_en_cosine_recall@100": 0.39296182819932773,
|
658 |
+
"eval_full_en_cosine_recall@150": 0.4959148528891931,
|
659 |
+
"eval_full_en_cosine_recall@20": 0.13200577828629578,
|
660 |
+
"eval_full_en_cosine_recall@200": 0.5749370249014907,
|
661 |
+
"eval_full_en_cosine_recall@50": 0.25310992970173135,
|
662 |
+
"eval_runtime": 1.8632,
|
663 |
+
"eval_samples_per_second": 0.0,
|
664 |
+
"eval_sequential_score": 0.5355574509263721,
|
665 |
+
"eval_steps_per_second": 0.0,
|
666 |
+
"step": 2200
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 2.5669642857142856,
|
670 |
+
"grad_norm": 13.6882905960083,
|
671 |
+
"learning_rate": 2.5693139097744363e-05,
|
672 |
+
"loss": 0.9743,
|
673 |
+
"step": 2300
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"epoch": 2.678571428571429,
|
677 |
+
"grad_norm": 11.966975212097168,
|
678 |
+
"learning_rate": 2.4518327067669177e-05,
|
679 |
+
"loss": 0.9252,
|
680 |
+
"step": 2400
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 2.678571428571429,
|
684 |
+
"eval_full_en_cosine_accuracy@1": 0.7335526315789473,
|
685 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
686 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
687 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
688 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
689 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
690 |
+
"eval_full_en_cosine_map@1": 0.7335526315789473,
|
691 |
+
"eval_full_en_cosine_map@100": 0.20983286336268822,
|
692 |
+
"eval_full_en_cosine_map@150": 0.22675852672419078,
|
693 |
+
"eval_full_en_cosine_map@20": 0.34004090105732804,
|
694 |
+
"eval_full_en_cosine_map@200": 0.24584993568226646,
|
695 |
+
"eval_full_en_cosine_map@50": 0.23672594782424658,
|
696 |
+
"eval_full_en_cosine_map@500": 0.29632183596698103,
|
697 |
+
"eval_full_en_cosine_mrr@1": 0.7335526315789473,
|
698 |
+
"eval_full_en_cosine_mrr@100": 0.83135268727374,
|
699 |
+
"eval_full_en_cosine_mrr@150": 0.83135268727374,
|
700 |
+
"eval_full_en_cosine_mrr@20": 0.8311351294903929,
|
701 |
+
"eval_full_en_cosine_mrr@200": 0.83135268727374,
|
702 |
+
"eval_full_en_cosine_mrr@50": 0.8312917710944029,
|
703 |
+
"eval_full_en_cosine_ndcg@1": 0.7335526315789473,
|
704 |
+
"eval_full_en_cosine_ndcg@100": 0.4400577813719261,
|
705 |
+
"eval_full_en_cosine_ndcg@150": 0.4859220111165228,
|
706 |
+
"eval_full_en_cosine_ndcg@20": 0.5344170691501652,
|
707 |
+
"eval_full_en_cosine_ndcg@200": 0.5320416498978522,
|
708 |
+
"eval_full_en_cosine_ndcg@50": 0.4485020943766835,
|
709 |
+
"eval_full_en_cosine_precision@1": 0.7335526315789473,
|
710 |
+
"eval_full_en_cosine_precision@100": 0.30907894736842106,
|
711 |
+
"eval_full_en_cosine_precision@150": 0.26278508771929826,
|
712 |
+
"eval_full_en_cosine_precision@20": 0.4960526315789474,
|
713 |
+
"eval_full_en_cosine_precision@200": 0.23090460526315787,
|
714 |
+
"eval_full_en_cosine_precision@50": 0.39151315789473684,
|
715 |
+
"eval_full_en_cosine_recall@1": 0.010402156873475942,
|
716 |
+
"eval_full_en_cosine_recall@100": 0.39206565501916524,
|
717 |
+
"eval_full_en_cosine_recall@150": 0.49176955829136443,
|
718 |
+
"eval_full_en_cosine_recall@20": 0.1321996647113643,
|
719 |
+
"eval_full_en_cosine_recall@200": 0.569344104113959,
|
720 |
+
"eval_full_en_cosine_recall@50": 0.2535254041631645,
|
721 |
+
"eval_runtime": 1.5826,
|
722 |
+
"eval_samples_per_second": 0.0,
|
723 |
+
"eval_sequential_score": 0.5320416498978522,
|
724 |
+
"eval_steps_per_second": 0.0,
|
725 |
+
"step": 2400
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 2.790178571428571,
|
729 |
+
"grad_norm": 11.857823371887207,
|
730 |
+
"learning_rate": 2.3343515037593984e-05,
|
731 |
+
"loss": 0.9272,
|
732 |
+
"step": 2500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 2.9017857142857144,
|
736 |
+
"grad_norm": 12.297764778137207,
|
737 |
+
"learning_rate": 2.2168703007518798e-05,
|
738 |
+
"loss": 0.9279,
|
739 |
+
"step": 2600
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"epoch": 2.9017857142857144,
|
743 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
744 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
745 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
746 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
747 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
748 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
749 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
750 |
+
"eval_full_en_cosine_map@100": 0.20939105710550232,
|
751 |
+
"eval_full_en_cosine_map@150": 0.22725165687553775,
|
752 |
+
"eval_full_en_cosine_map@20": 0.3403680329074837,
|
753 |
+
"eval_full_en_cosine_map@200": 0.24658865195474836,
|
754 |
+
"eval_full_en_cosine_map@50": 0.23612691752121232,
|
755 |
+
"eval_full_en_cosine_map@500": 0.29718900909315255,
|
756 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
757 |
+
"eval_full_en_cosine_mrr@100": 0.8391709003546018,
|
758 |
+
"eval_full_en_cosine_mrr@150": 0.8391709003546018,
|
759 |
+
"eval_full_en_cosine_mrr@20": 0.8391064008705977,
|
760 |
+
"eval_full_en_cosine_mrr@200": 0.8391709003546018,
|
761 |
+
"eval_full_en_cosine_mrr@50": 0.8391064008705977,
|
762 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
763 |
+
"eval_full_en_cosine_ndcg@100": 0.4389185422351881,
|
764 |
+
"eval_full_en_cosine_ndcg@150": 0.4868646893605612,
|
765 |
+
"eval_full_en_cosine_ndcg@20": 0.5359014833764041,
|
766 |
+
"eval_full_en_cosine_ndcg@200": 0.5332804255738979,
|
767 |
+
"eval_full_en_cosine_ndcg@50": 0.44749591453362436,
|
768 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
769 |
+
"eval_full_en_cosine_precision@100": 0.30779605263157894,
|
770 |
+
"eval_full_en_cosine_precision@150": 0.26355263157894737,
|
771 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
772 |
+
"eval_full_en_cosine_precision@200": 0.2316282894736842,
|
773 |
+
"eval_full_en_cosine_precision@50": 0.38901315789473684,
|
774 |
+
"eval_full_en_cosine_recall@1": 0.010425572953236805,
|
775 |
+
"eval_full_en_cosine_recall@100": 0.3892001066901767,
|
776 |
+
"eval_full_en_cosine_recall@150": 0.492569756570653,
|
777 |
+
"eval_full_en_cosine_recall@20": 0.13284603422933672,
|
778 |
+
"eval_full_en_cosine_recall@200": 0.5706210722984945,
|
779 |
+
"eval_full_en_cosine_recall@50": 0.2518705529759721,
|
780 |
+
"eval_runtime": 1.577,
|
781 |
+
"eval_samples_per_second": 0.0,
|
782 |
+
"eval_sequential_score": 0.5332804255738979,
|
783 |
+
"eval_steps_per_second": 0.0,
|
784 |
+
"step": 2600
|
785 |
+
},
|
786 |
+
{
|
787 |
+
"epoch": 3.013392857142857,
|
788 |
+
"grad_norm": 12.120986938476562,
|
789 |
+
"learning_rate": 2.099389097744361e-05,
|
790 |
+
"loss": 0.857,
|
791 |
+
"step": 2700
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"epoch": 3.125,
|
795 |
+
"grad_norm": 14.276410102844238,
|
796 |
+
"learning_rate": 1.9819078947368423e-05,
|
797 |
+
"loss": 0.7313,
|
798 |
+
"step": 2800
|
799 |
+
},
|
800 |
+
{
|
801 |
+
"epoch": 3.125,
|
802 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
803 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
804 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
805 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
806 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
807 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
808 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
809 |
+
"eval_full_en_cosine_map@100": 0.20830025965749158,
|
810 |
+
"eval_full_en_cosine_map@150": 0.22525408557521698,
|
811 |
+
"eval_full_en_cosine_map@20": 0.34094306993307805,
|
812 |
+
"eval_full_en_cosine_map@200": 0.24400549054611867,
|
813 |
+
"eval_full_en_cosine_map@50": 0.23400685602624646,
|
814 |
+
"eval_full_en_cosine_map@500": 0.29401532392219154,
|
815 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
816 |
+
"eval_full_en_cosine_mrr@100": 0.8315051952798665,
|
817 |
+
"eval_full_en_cosine_mrr@150": 0.8315051952798665,
|
818 |
+
"eval_full_en_cosine_mrr@20": 0.8314268744778616,
|
819 |
+
"eval_full_en_cosine_mrr@200": 0.8315051952798665,
|
820 |
+
"eval_full_en_cosine_mrr@50": 0.8315051952798665,
|
821 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
822 |
+
"eval_full_en_cosine_ndcg@100": 0.43885977048304636,
|
823 |
+
"eval_full_en_cosine_ndcg@150": 0.48486671483618976,
|
824 |
+
"eval_full_en_cosine_ndcg@20": 0.5365677326031855,
|
825 |
+
"eval_full_en_cosine_ndcg@200": 0.5299990147795507,
|
826 |
+
"eval_full_en_cosine_ndcg@50": 0.44591298214905706,
|
827 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
828 |
+
"eval_full_en_cosine_precision@100": 0.308125,
|
829 |
+
"eval_full_en_cosine_precision@150": 0.2621052631578948,
|
830 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
831 |
+
"eval_full_en_cosine_precision@200": 0.22980263157894737,
|
832 |
+
"eval_full_en_cosine_precision@50": 0.3870394736842105,
|
833 |
+
"eval_full_en_cosine_recall@1": 0.010317820884117123,
|
834 |
+
"eval_full_en_cosine_recall@100": 0.38998825691236244,
|
835 |
+
"eval_full_en_cosine_recall@150": 0.4900687458798103,
|
836 |
+
"eval_full_en_cosine_recall@20": 0.13271573138828288,
|
837 |
+
"eval_full_en_cosine_recall@200": 0.5659226272090475,
|
838 |
+
"eval_full_en_cosine_recall@50": 0.25218483369820577,
|
839 |
+
"eval_runtime": 1.607,
|
840 |
+
"eval_samples_per_second": 0.0,
|
841 |
+
"eval_sequential_score": 0.5299990147795507,
|
842 |
+
"eval_steps_per_second": 0.0,
|
843 |
+
"step": 2800
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"epoch": 3.236607142857143,
|
847 |
+
"grad_norm": 8.85190486907959,
|
848 |
+
"learning_rate": 1.8644266917293237e-05,
|
849 |
+
"loss": 0.7103,
|
850 |
+
"step": 2900
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"epoch": 3.3482142857142856,
|
854 |
+
"grad_norm": 8.932626724243164,
|
855 |
+
"learning_rate": 1.7469454887218044e-05,
|
856 |
+
"loss": 0.7187,
|
857 |
+
"step": 3000
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 3.3482142857142856,
|
861 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
862 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
863 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
864 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
865 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
866 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
867 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
868 |
+
"eval_full_en_cosine_map@100": 0.20842370079433947,
|
869 |
+
"eval_full_en_cosine_map@150": 0.22608431932756923,
|
870 |
+
"eval_full_en_cosine_map@20": 0.34026464907579207,
|
871 |
+
"eval_full_en_cosine_map@200": 0.2451065024940476,
|
872 |
+
"eval_full_en_cosine_map@50": 0.23418777403622906,
|
873 |
+
"eval_full_en_cosine_map@500": 0.2945476002258968,
|
874 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
875 |
+
"eval_full_en_cosine_mrr@100": 0.8303256958684593,
|
876 |
+
"eval_full_en_cosine_mrr@150": 0.8303256958684593,
|
877 |
+
"eval_full_en_cosine_mrr@20": 0.830265887256019,
|
878 |
+
"eval_full_en_cosine_mrr@200": 0.8303256958684593,
|
879 |
+
"eval_full_en_cosine_mrr@50": 0.830265887256019,
|
880 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
881 |
+
"eval_full_en_cosine_ndcg@100": 0.4379203478644915,
|
882 |
+
"eval_full_en_cosine_ndcg@150": 0.4860723616469748,
|
883 |
+
"eval_full_en_cosine_ndcg@20": 0.534483012777908,
|
884 |
+
"eval_full_en_cosine_ndcg@200": 0.5318565059446251,
|
885 |
+
"eval_full_en_cosine_ndcg@50": 0.4443024102705765,
|
886 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
887 |
+
"eval_full_en_cosine_precision@100": 0.30750000000000005,
|
888 |
+
"eval_full_en_cosine_precision@150": 0.26370614035087714,
|
889 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
890 |
+
"eval_full_en_cosine_precision@200": 0.23116776315789475,
|
891 |
+
"eval_full_en_cosine_precision@50": 0.38539473684210523,
|
892 |
+
"eval_full_en_cosine_recall@1": 0.010298189290703101,
|
893 |
+
"eval_full_en_cosine_recall@100": 0.38891472258186655,
|
894 |
+
"eval_full_en_cosine_recall@150": 0.4925623824521817,
|
895 |
+
"eval_full_en_cosine_recall@20": 0.13215936080151625,
|
896 |
+
"eval_full_en_cosine_recall@200": 0.5698259119139981,
|
897 |
+
"eval_full_en_cosine_recall@50": 0.2502092759755724,
|
898 |
+
"eval_runtime": 1.6179,
|
899 |
+
"eval_samples_per_second": 0.0,
|
900 |
+
"eval_sequential_score": 0.5318565059446251,
|
901 |
+
"eval_steps_per_second": 0.0,
|
902 |
+
"step": 3000
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 3.4598214285714284,
|
906 |
+
"grad_norm": 12.761665344238281,
|
907 |
+
"learning_rate": 1.6294642857142858e-05,
|
908 |
+
"loss": 0.7067,
|
909 |
+
"step": 3100
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 3.571428571428571,
|
913 |
+
"grad_norm": 12.318887710571289,
|
914 |
+
"learning_rate": 1.5119830827067668e-05,
|
915 |
+
"loss": 0.7157,
|
916 |
+
"step": 3200
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"epoch": 3.571428571428571,
|
920 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
921 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
922 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
923 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
924 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
925 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
926 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
927 |
+
"eval_full_en_cosine_map@100": 0.21126096647489126,
|
928 |
+
"eval_full_en_cosine_map@150": 0.22897332387217115,
|
929 |
+
"eval_full_en_cosine_map@20": 0.34020926250086975,
|
930 |
+
"eval_full_en_cosine_map@200": 0.24883265008518762,
|
931 |
+
"eval_full_en_cosine_map@50": 0.2366562995235259,
|
932 |
+
"eval_full_en_cosine_map@500": 0.30009134506130936,
|
933 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
934 |
+
"eval_full_en_cosine_mrr@100": 0.8208446325794724,
|
935 |
+
"eval_full_en_cosine_mrr@150": 0.8208446325794724,
|
936 |
+
"eval_full_en_cosine_mrr@20": 0.8206285125693021,
|
937 |
+
"eval_full_en_cosine_mrr@200": 0.8208446325794724,
|
938 |
+
"eval_full_en_cosine_mrr@50": 0.8208446325794724,
|
939 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
940 |
+
"eval_full_en_cosine_ndcg@100": 0.4420871692985379,
|
941 |
+
"eval_full_en_cosine_ndcg@150": 0.48983718804719595,
|
942 |
+
"eval_full_en_cosine_ndcg@20": 0.5349182539944062,
|
943 |
+
"eval_full_en_cosine_ndcg@200": 0.5368995914478877,
|
944 |
+
"eval_full_en_cosine_ndcg@50": 0.4481578438397021,
|
945 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
946 |
+
"eval_full_en_cosine_precision@100": 0.3118421052631579,
|
947 |
+
"eval_full_en_cosine_precision@150": 0.26625,
|
948 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
949 |
+
"eval_full_en_cosine_precision@200": 0.2341282894736842,
|
950 |
+
"eval_full_en_cosine_precision@50": 0.39125,
|
951 |
+
"eval_full_en_cosine_recall@1": 0.010071368365416018,
|
952 |
+
"eval_full_en_cosine_recall@100": 0.39435465355460575,
|
953 |
+
"eval_full_en_cosine_recall@150": 0.49776297598034985,
|
954 |
+
"eval_full_en_cosine_recall@20": 0.1332224887798492,
|
955 |
+
"eval_full_en_cosine_recall@200": 0.5769437157052201,
|
956 |
+
"eval_full_en_cosine_recall@50": 0.25406609475829245,
|
957 |
+
"eval_runtime": 1.5833,
|
958 |
+
"eval_samples_per_second": 0.0,
|
959 |
+
"eval_sequential_score": 0.5368995914478877,
|
960 |
+
"eval_steps_per_second": 0.0,
|
961 |
+
"step": 3200
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 3.6830357142857144,
|
965 |
+
"grad_norm": 10.974320411682129,
|
966 |
+
"learning_rate": 1.3945018796992482e-05,
|
967 |
+
"loss": 0.7113,
|
968 |
+
"step": 3300
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 3.794642857142857,
|
972 |
+
"grad_norm": 11.004631042480469,
|
973 |
+
"learning_rate": 1.2770206766917295e-05,
|
974 |
+
"loss": 0.7013,
|
975 |
+
"step": 3400
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"epoch": 3.794642857142857,
|
979 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
980 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
981 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
982 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
983 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
984 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
985 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
986 |
+
"eval_full_en_cosine_map@100": 0.20998333195374114,
|
987 |
+
"eval_full_en_cosine_map@150": 0.22683318021248486,
|
988 |
+
"eval_full_en_cosine_map@20": 0.34034679376659244,
|
989 |
+
"eval_full_en_cosine_map@200": 0.24654495691213385,
|
990 |
+
"eval_full_en_cosine_map@50": 0.23617479010012724,
|
991 |
+
"eval_full_en_cosine_map@500": 0.29617185416029185,
|
992 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
993 |
+
"eval_full_en_cosine_mrr@100": 0.8291805255603549,
|
994 |
+
"eval_full_en_cosine_mrr@150": 0.8291805255603549,
|
995 |
+
"eval_full_en_cosine_mrr@20": 0.8291105367585632,
|
996 |
+
"eval_full_en_cosine_mrr@200": 0.8291805255603549,
|
997 |
+
"eval_full_en_cosine_mrr@50": 0.8291805255603549,
|
998 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
999 |
+
"eval_full_en_cosine_ndcg@100": 0.4407299508694298,
|
1000 |
+
"eval_full_en_cosine_ndcg@150": 0.48655314671133576,
|
1001 |
+
"eval_full_en_cosine_ndcg@20": 0.5349966588302529,
|
1002 |
+
"eval_full_en_cosine_ndcg@200": 0.5341334488223752,
|
1003 |
+
"eval_full_en_cosine_ndcg@50": 0.448065635044085,
|
1004 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
1005 |
+
"eval_full_en_cosine_precision@100": 0.30973684210526314,
|
1006 |
+
"eval_full_en_cosine_precision@150": 0.26320175438596494,
|
1007 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
1008 |
+
"eval_full_en_cosine_precision@200": 0.23210526315789473,
|
1009 |
+
"eval_full_en_cosine_precision@50": 0.3907894736842106,
|
1010 |
+
"eval_full_en_cosine_recall@1": 0.010311461817674684,
|
1011 |
+
"eval_full_en_cosine_recall@100": 0.3931693265429022,
|
1012 |
+
"eval_full_en_cosine_recall@150": 0.49300140763214356,
|
1013 |
+
"eval_full_en_cosine_recall@20": 0.1329270784727238,
|
1014 |
+
"eval_full_en_cosine_recall@200": 0.573228327517634,
|
1015 |
+
"eval_full_en_cosine_recall@50": 0.25330386821616296,
|
1016 |
+
"eval_runtime": 1.577,
|
1017 |
+
"eval_samples_per_second": 0.0,
|
1018 |
+
"eval_sequential_score": 0.5341334488223752,
|
1019 |
+
"eval_steps_per_second": 0.0,
|
1020 |
+
"step": 3400
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 3.90625,
|
1024 |
+
"grad_norm": 12.102640151977539,
|
1025 |
+
"learning_rate": 1.1595394736842107e-05,
|
1026 |
+
"loss": 0.6903,
|
1027 |
+
"step": 3500
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 4.017857142857143,
|
1031 |
+
"grad_norm": 7.348757743835449,
|
1032 |
+
"learning_rate": 1.0420582706766918e-05,
|
1033 |
+
"loss": 0.6462,
|
1034 |
+
"step": 3600
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 4.017857142857143,
|
1038 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
1039 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1040 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1041 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
1042 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1043 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1044 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
1045 |
+
"eval_full_en_cosine_map@100": 0.2102732775077637,
|
1046 |
+
"eval_full_en_cosine_map@150": 0.22767943965852241,
|
1047 |
+
"eval_full_en_cosine_map@20": 0.338502447126724,
|
1048 |
+
"eval_full_en_cosine_map@200": 0.24667619158922902,
|
1049 |
+
"eval_full_en_cosine_map@50": 0.23576300870587916,
|
1050 |
+
"eval_full_en_cosine_map@500": 0.2971463650911015,
|
1051 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
1052 |
+
"eval_full_en_cosine_mrr@100": 0.8263833835420962,
|
1053 |
+
"eval_full_en_cosine_mrr@150": 0.8263833835420962,
|
1054 |
+
"eval_full_en_cosine_mrr@20": 0.8263213180008847,
|
1055 |
+
"eval_full_en_cosine_mrr@200": 0.8263833835420962,
|
1056 |
+
"eval_full_en_cosine_mrr@50": 0.8263213180008847,
|
1057 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
1058 |
+
"eval_full_en_cosine_ndcg@100": 0.44114478517461736,
|
1059 |
+
"eval_full_en_cosine_ndcg@150": 0.4883455168714466,
|
1060 |
+
"eval_full_en_cosine_ndcg@20": 0.53288860900767,
|
1061 |
+
"eval_full_en_cosine_ndcg@200": 0.5334866046140189,
|
1062 |
+
"eval_full_en_cosine_ndcg@50": 0.4473951526251337,
|
1063 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
1064 |
+
"eval_full_en_cosine_precision@100": 0.31078947368421056,
|
1065 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1066 |
+
"eval_full_en_cosine_precision@20": 0.4934210526315789,
|
1067 |
+
"eval_full_en_cosine_precision@200": 0.23212171052631578,
|
1068 |
+
"eval_full_en_cosine_precision@50": 0.3899342105263158,
|
1069 |
+
"eval_full_en_cosine_recall@1": 0.01018155854728512,
|
1070 |
+
"eval_full_en_cosine_recall@100": 0.3935816727444405,
|
1071 |
+
"eval_full_en_cosine_recall@150": 0.4958028561341766,
|
1072 |
+
"eval_full_en_cosine_recall@20": 0.13181077303144853,
|
1073 |
+
"eval_full_en_cosine_recall@200": 0.5716317929962068,
|
1074 |
+
"eval_full_en_cosine_recall@50": 0.25274553753777246,
|
1075 |
+
"eval_runtime": 1.6024,
|
1076 |
+
"eval_samples_per_second": 0.0,
|
1077 |
+
"eval_sequential_score": 0.5334866046140189,
|
1078 |
+
"eval_steps_per_second": 0.0,
|
1079 |
+
"step": 3600
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"epoch": 4.129464285714286,
|
1083 |
+
"grad_norm": 8.786450386047363,
|
1084 |
+
"learning_rate": 9.24577067669173e-06,
|
1085 |
+
"loss": 0.5162,
|
1086 |
+
"step": 3700
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"epoch": 4.241071428571429,
|
1090 |
+
"grad_norm": 10.602435111999512,
|
1091 |
+
"learning_rate": 8.070958646616542e-06,
|
1092 |
+
"loss": 0.524,
|
1093 |
+
"step": 3800
|
1094 |
+
},
|
1095 |
+
{
|
1096 |
+
"epoch": 4.241071428571429,
|
1097 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1098 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1099 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1100 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1101 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1102 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1103 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1104 |
+
"eval_full_en_cosine_map@100": 0.21150798737582682,
|
1105 |
+
"eval_full_en_cosine_map@150": 0.22868847990327232,
|
1106 |
+
"eval_full_en_cosine_map@20": 0.3411525812655742,
|
1107 |
+
"eval_full_en_cosine_map@200": 0.2480155691306444,
|
1108 |
+
"eval_full_en_cosine_map@50": 0.23814436251631807,
|
1109 |
+
"eval_full_en_cosine_map@500": 0.29792672341621373,
|
1110 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1111 |
+
"eval_full_en_cosine_mrr@100": 0.8323485085820613,
|
1112 |
+
"eval_full_en_cosine_mrr@150": 0.8323485085820613,
|
1113 |
+
"eval_full_en_cosine_mrr@20": 0.8321467731829576,
|
1114 |
+
"eval_full_en_cosine_mrr@200": 0.8323485085820613,
|
1115 |
+
"eval_full_en_cosine_mrr@50": 0.832296294714058,
|
1116 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1117 |
+
"eval_full_en_cosine_ndcg@100": 0.44247378999755477,
|
1118 |
+
"eval_full_en_cosine_ndcg@150": 0.48886293038433404,
|
1119 |
+
"eval_full_en_cosine_ndcg@20": 0.5351701323930714,
|
1120 |
+
"eval_full_en_cosine_ndcg@200": 0.5352268343210608,
|
1121 |
+
"eval_full_en_cosine_ndcg@50": 0.4502625298651447,
|
1122 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1123 |
+
"eval_full_en_cosine_precision@100": 0.311546052631579,
|
1124 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1125 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
1126 |
+
"eval_full_en_cosine_precision@200": 0.23268092105263163,
|
1127 |
+
"eval_full_en_cosine_precision@50": 0.3930921052631579,
|
1128 |
+
"eval_full_en_cosine_recall@1": 0.010244630514181254,
|
1129 |
+
"eval_full_en_cosine_recall@100": 0.39498767852245736,
|
1130 |
+
"eval_full_en_cosine_recall@150": 0.49574169519464223,
|
1131 |
+
"eval_full_en_cosine_recall@20": 0.1324589336710221,
|
1132 |
+
"eval_full_en_cosine_recall@200": 0.574019804020236,
|
1133 |
+
"eval_full_en_cosine_recall@50": 0.2548099607629461,
|
1134 |
+
"eval_runtime": 1.5919,
|
1135 |
+
"eval_samples_per_second": 0.0,
|
1136 |
+
"eval_sequential_score": 0.5352268343210608,
|
1137 |
+
"eval_steps_per_second": 0.0,
|
1138 |
+
"step": 3800
|
1139 |
+
},
|
1140 |
+
{
|
1141 |
+
"epoch": 4.352678571428571,
|
1142 |
+
"grad_norm": 11.65066909790039,
|
1143 |
+
"learning_rate": 6.896146616541354e-06,
|
1144 |
+
"loss": 0.5303,
|
1145 |
+
"step": 3900
|
1146 |
+
},
|
1147 |
+
{
|
1148 |
+
"epoch": 4.464285714285714,
|
1149 |
+
"grad_norm": 10.764215469360352,
|
1150 |
+
"learning_rate": 5.721334586466166e-06,
|
1151 |
+
"loss": 0.5269,
|
1152 |
+
"step": 4000
|
1153 |
+
},
|
1154 |
+
{
|
1155 |
+
"epoch": 4.464285714285714,
|
1156 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
1157 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1158 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1159 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1160 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1161 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1162 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
1163 |
+
"eval_full_en_cosine_map@100": 0.2101198919267321,
|
1164 |
+
"eval_full_en_cosine_map@150": 0.2276536266469315,
|
1165 |
+
"eval_full_en_cosine_map@20": 0.34076177455520346,
|
1166 |
+
"eval_full_en_cosine_map@200": 0.24678319516569472,
|
1167 |
+
"eval_full_en_cosine_map@50": 0.23677969810249233,
|
1168 |
+
"eval_full_en_cosine_map@500": 0.297249372287514,
|
1169 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
1170 |
+
"eval_full_en_cosine_mrr@100": 0.8373899157616261,
|
1171 |
+
"eval_full_en_cosine_mrr@150": 0.8373899157616261,
|
1172 |
+
"eval_full_en_cosine_mrr@20": 0.837172357978279,
|
1173 |
+
"eval_full_en_cosine_mrr@200": 0.8373899157616261,
|
1174 |
+
"eval_full_en_cosine_mrr@50": 0.837328999582289,
|
1175 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
1176 |
+
"eval_full_en_cosine_ndcg@100": 0.4408521323246635,
|
1177 |
+
"eval_full_en_cosine_ndcg@150": 0.48834055710549873,
|
1178 |
+
"eval_full_en_cosine_ndcg@20": 0.5353264293739176,
|
1179 |
+
"eval_full_en_cosine_ndcg@200": 0.5341206282180626,
|
1180 |
+
"eval_full_en_cosine_ndcg@50": 0.44939083758113085,
|
1181 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
1182 |
+
"eval_full_en_cosine_precision@100": 0.30953947368421053,
|
1183 |
+
"eval_full_en_cosine_precision@150": 0.26460526315789473,
|
1184 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
1185 |
+
"eval_full_en_cosine_precision@200": 0.23187500000000003,
|
1186 |
+
"eval_full_en_cosine_precision@50": 0.3913815789473684,
|
1187 |
+
"eval_full_en_cosine_recall@1": 0.010305566449078924,
|
1188 |
+
"eval_full_en_cosine_recall@100": 0.3922740640225546,
|
1189 |
+
"eval_full_en_cosine_recall@150": 0.4949163913773604,
|
1190 |
+
"eval_full_en_cosine_recall@20": 0.13233275450376297,
|
1191 |
+
"eval_full_en_cosine_recall@200": 0.572041877895568,
|
1192 |
+
"eval_full_en_cosine_recall@50": 0.2535655251683108,
|
1193 |
+
"eval_runtime": 1.5798,
|
1194 |
+
"eval_samples_per_second": 0.0,
|
1195 |
+
"eval_sequential_score": 0.5341206282180626,
|
1196 |
+
"eval_steps_per_second": 0.0,
|
1197 |
+
"step": 4000
|
1198 |
+
},
|
1199 |
+
{
|
1200 |
+
"epoch": 4.575892857142857,
|
1201 |
+
"grad_norm": 10.328286170959473,
|
1202 |
+
"learning_rate": 4.546522556390977e-06,
|
1203 |
+
"loss": 0.4824,
|
1204 |
+
"step": 4100
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"epoch": 4.6875,
|
1208 |
+
"grad_norm": 10.712604522705078,
|
1209 |
+
"learning_rate": 3.3717105263157897e-06,
|
1210 |
+
"loss": 0.5222,
|
1211 |
+
"step": 4200
|
1212 |
+
},
|
1213 |
+
{
|
1214 |
+
"epoch": 4.6875,
|
1215 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1216 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1217 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1218 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1219 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1220 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1221 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1222 |
+
"eval_full_en_cosine_map@100": 0.21090472549603356,
|
1223 |
+
"eval_full_en_cosine_map@150": 0.22845220292726734,
|
1224 |
+
"eval_full_en_cosine_map@20": 0.342326318294358,
|
1225 |
+
"eval_full_en_cosine_map@200": 0.24733168088568283,
|
1226 |
+
"eval_full_en_cosine_map@50": 0.23774626029530496,
|
1227 |
+
"eval_full_en_cosine_map@500": 0.2977609786459198,
|
1228 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1229 |
+
"eval_full_en_cosine_mrr@100": 0.8356281328320803,
|
1230 |
+
"eval_full_en_cosine_mrr@150": 0.8356281328320803,
|
1231 |
+
"eval_full_en_cosine_mrr@20": 0.8354127506265665,
|
1232 |
+
"eval_full_en_cosine_mrr@200": 0.8356281328320803,
|
1233 |
+
"eval_full_en_cosine_mrr@50": 0.8355693922305765,
|
1234 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1235 |
+
"eval_full_en_cosine_ndcg@100": 0.44188546614809043,
|
1236 |
+
"eval_full_en_cosine_ndcg@150": 0.48899866366733713,
|
1237 |
+
"eval_full_en_cosine_ndcg@20": 0.5372965681233445,
|
1238 |
+
"eval_full_en_cosine_ndcg@200": 0.5342393130950145,
|
1239 |
+
"eval_full_en_cosine_ndcg@50": 0.4501724823363586,
|
1240 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1241 |
+
"eval_full_en_cosine_precision@100": 0.3101973684210526,
|
1242 |
+
"eval_full_en_cosine_precision@150": 0.2649780701754386,
|
1243 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
1244 |
+
"eval_full_en_cosine_precision@200": 0.23199013157894738,
|
1245 |
+
"eval_full_en_cosine_precision@50": 0.3921710526315789,
|
1246 |
+
"eval_full_en_cosine_recall@1": 0.01024195976751409,
|
1247 |
+
"eval_full_en_cosine_recall@100": 0.3937143686320033,
|
1248 |
+
"eval_full_en_cosine_recall@150": 0.4954717634968576,
|
1249 |
+
"eval_full_en_cosine_recall@20": 0.13293505289394864,
|
1250 |
+
"eval_full_en_cosine_recall@200": 0.5715937768635994,
|
1251 |
+
"eval_full_en_cosine_recall@50": 0.2542542782427721,
|
1252 |
+
"eval_runtime": 1.579,
|
1253 |
+
"eval_samples_per_second": 0.0,
|
1254 |
+
"eval_sequential_score": 0.5342393130950145,
|
1255 |
+
"eval_steps_per_second": 0.0,
|
1256 |
+
"step": 4200
|
1257 |
+
}
|
1258 |
+
],
|
1259 |
+
"logging_steps": 100,
|
1260 |
+
"max_steps": 4480,
|
1261 |
+
"num_input_tokens_seen": 0,
|
1262 |
+
"num_train_epochs": 5,
|
1263 |
+
"save_steps": 200,
|
1264 |
+
"stateful_callbacks": {
|
1265 |
+
"TrainerControl": {
|
1266 |
+
"args": {
|
1267 |
+
"should_epoch_stop": false,
|
1268 |
+
"should_evaluate": false,
|
1269 |
+
"should_log": false,
|
1270 |
+
"should_save": true,
|
1271 |
+
"should_training_stop": false
|
1272 |
+
},
|
1273 |
+
"attributes": {}
|
1274 |
+
}
|
1275 |
+
},
|
1276 |
+
"total_flos": 0.0,
|
1277 |
+
"train_batch_size": 64,
|
1278 |
+
"trial_name": null,
|
1279 |
+
"trial_params": null
|
1280 |
+
}
|
checkpoint-4200/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab218b8b16eec7c543d0bc5420b88d78608af29dcbf60891a693f97306d3aba4
|
3 |
+
size 5560
|
checkpoint-4200/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-4400/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 1024,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 4096,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 16,
|
23 |
+
"num_hidden_layers": 24,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.51.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
checkpoint-4400/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "4.1.0",
|
4 |
+
"transformers": "4.51.2",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
checkpoint-4400/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
checkpoint-4400/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
checkpoint-4480/1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 1024,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
checkpoint-4480/README.md
ADDED
@@ -0,0 +1,676 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:114699
|
8 |
+
- loss:CachedGISTEmbedLoss
|
9 |
+
base_model: BAAI/bge-large-en-v1.5
|
10 |
+
widget:
|
11 |
+
- source_sentence: For roles such as 'physiotherapist', 'neuromusculoskeletal physiotherapist',
|
12 |
+
'osteopath', and 'chiropractor', the skills needed include a deep understanding
|
13 |
+
of human anatomy and physiology, strong diagnostic skills, and the ability to
|
14 |
+
apply manual therapy techniques to treat musculoskeletal issues. Additionally,
|
15 |
+
effective communication skills are crucial for explaining treatments and exercises
|
16 |
+
to patients, while adaptability and problem-solving skills are essential for tailoring
|
17 |
+
treatments to individual patient needs.
|
18 |
+
sentences:
|
19 |
+
- Job roles such as insulation installers, HVAC technicians, and construction engineers
|
20 |
+
require knowledge of various types and characteristics of insulation materials
|
21 |
+
to effectively reduce heat transfer and improve energy efficiency in buildings
|
22 |
+
and systems. Understanding the typology of insulation materials, including their
|
23 |
+
thermal properties, durability, and environmental impact, is crucial for these
|
24 |
+
professionals to select the most appropriate materials for specific applications.
|
25 |
+
- Job roles such as Contract Managers, Legal Analysts, and Compliance Officers require
|
26 |
+
the skill of reviewing or auditing completed contracts to ensure legal accuracy,
|
27 |
+
compliance with regulations, and alignment with organizational goals.
|
28 |
+
- Job roles that require skills in dealing with emergency care situations include
|
29 |
+
emergency medical technicians (EMTs), paramedics, and emergency room nurses or
|
30 |
+
doctors, all of whom must quickly and effectively manage critical health situations
|
31 |
+
to save lives.
|
32 |
+
- source_sentence: Bus drivers, including those operating in various sectors like
|
33 |
+
public transit, intercity, private, or school services, need strong driving skills,
|
34 |
+
knowledge of traffic laws, and the ability to operate safely in diverse conditions.
|
35 |
+
Additionally, effective communication skills and the ability to handle passenger
|
36 |
+
inquiries and emergencies are crucial.
|
37 |
+
sentences:
|
38 |
+
- Job roles that require the skill to calibrate electronic instruments include calibration
|
39 |
+
technicians, quality control engineers, and instrumentation specialists. These
|
40 |
+
professionals ensure the accuracy and reliability of various electronic devices
|
41 |
+
and systems across different industries such as manufacturing, aerospace, and
|
42 |
+
automotive.
|
43 |
+
- Job roles such as Building Engineer, Architect, and Construction Specialist require
|
44 |
+
skills in designing, engineering, or developing air-tight building structures
|
45 |
+
to ensure energy efficiency and environmental control within the building.
|
46 |
+
- Job roles such as customer service representatives, flight attendants, and hotel
|
47 |
+
concierges require a strong focus on passengers or customers, ensuring their needs
|
48 |
+
and comfort are prioritized to provide excellent service and support.
|
49 |
+
- source_sentence: A mine surveyor, also known as a mining surveyor or mine planning
|
50 |
+
surveyor, requires expertise in geomatics and mining engineering to accurately
|
51 |
+
map and plan mine operations, ensuring safety and efficiency. They must also possess
|
52 |
+
strong analytical skills and the ability to use specialized software for creating
|
53 |
+
detailed mine plans and maintaining accurate records.
|
54 |
+
sentences:
|
55 |
+
- Job roles such as data analysts, business analysts, and financial analysts require
|
56 |
+
the skill to present reports or prepare statistical reports, as they often need
|
57 |
+
to communicate complex data insights clearly and effectively to stakeholders.
|
58 |
+
- Job roles that require monitoring flour unloading equipment include Quality Control
|
59 |
+
Technicians, Process Operators, and Mill Supervisors, who ensure the efficient
|
60 |
+
and safe operation of flour processing systems and the proper unloading of flour
|
61 |
+
from transport vehicles.
|
62 |
+
- Job roles that require skills in the manufacturing of made-up textile articles
|
63 |
+
include textile production managers, machinery operators, and quality control
|
64 |
+
inspectors, all of whom utilize specific technology and machinery to produce finished
|
65 |
+
textile products such as clothing, home textiles, and industrial fabrics.
|
66 |
+
- source_sentence: An insulation supervisor, regardless of the specific type of insulation
|
67 |
+
material or installation area, requires strong project management skills, knowledge
|
68 |
+
of building codes and safety regulations, and expertise in insulation techniques
|
69 |
+
to oversee the installation process effectively and ensure quality standards are
|
70 |
+
met.
|
71 |
+
sentences:
|
72 |
+
- Job roles that require skills in energy efficiency, such as promoting energy efficiency
|
73 |
+
or efficient energy use, include Energy Managers, Sustainability Specialists,
|
74 |
+
and Building Engineers, who focus on reducing energy consumption and improving
|
75 |
+
energy use in various settings. Additionally, roles like Battery Technicians or
|
76 |
+
Engineers involve battery benchmarking to enhance energy storage and efficiency
|
77 |
+
in technological devices and systems.
|
78 |
+
- The skill of applying or installing waterproofing and damp-proofing membranes
|
79 |
+
is primarily required by construction workers such as waterproofing specialists,
|
80 |
+
roofers, and building envelope technicians, who use these membranes to prevent
|
81 |
+
water damage in buildings and structures.
|
82 |
+
- Job roles such as laboratory technicians, chemists, and materials scientists require
|
83 |
+
skills in laboratory techniques, including electronic and thermic methods, gas
|
84 |
+
chromatography, and gravimetric analysis, to conduct precise experiments and analyze
|
85 |
+
materials. These professionals must apply natural science techniques and use various
|
86 |
+
lab techniques to ensure accurate and reliable results in their research or quality
|
87 |
+
control processes.
|
88 |
+
- source_sentence: For roles such as import/export manager, graduate export manager,
|
89 |
+
senior export manager, and other related positions in meat and meat products,
|
90 |
+
the key skills include a strong understanding of international trade regulations,
|
91 |
+
meat product knowledge, customs compliance, and excellent negotiation and communication
|
92 |
+
skills to manage global supply chains effectively. Additionally, proficiency in
|
93 |
+
relevant trade software and languages can be highly beneficial.
|
94 |
+
sentences:
|
95 |
+
- Job roles that require skills such as managing staff, coordinating employees,
|
96 |
+
and performing HR activities include Human Resources Managers, Team Leaders, Supervisors,
|
97 |
+
and Department Heads, all of whom are responsible for overseeing personnel, implementing
|
98 |
+
HR policies, and ensuring efficient team operations.
|
99 |
+
- Job roles such as Control Systems Engineer, Automation Engineer, and Systems Designer
|
100 |
+
require skills in designing, planning, and developing control systems to manage
|
101 |
+
and optimize the performance of various technological processes and machinery.
|
102 |
+
These professionals are tasked with creating efficient and reliable systems that
|
103 |
+
can operate autonomously or with minimal human intervention.
|
104 |
+
- Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager
|
105 |
+
require skills in conducting performance measurement and organizing or managing
|
106 |
+
conversion testing to ensure software and systems meet performance standards and
|
107 |
+
function correctly in real-world scenarios.
|
108 |
+
pipeline_tag: sentence-similarity
|
109 |
+
library_name: sentence-transformers
|
110 |
+
metrics:
|
111 |
+
- cosine_accuracy@1
|
112 |
+
- cosine_accuracy@20
|
113 |
+
- cosine_accuracy@50
|
114 |
+
- cosine_accuracy@100
|
115 |
+
- cosine_accuracy@150
|
116 |
+
- cosine_accuracy@200
|
117 |
+
- cosine_precision@1
|
118 |
+
- cosine_precision@20
|
119 |
+
- cosine_precision@50
|
120 |
+
- cosine_precision@100
|
121 |
+
- cosine_precision@150
|
122 |
+
- cosine_precision@200
|
123 |
+
- cosine_recall@1
|
124 |
+
- cosine_recall@20
|
125 |
+
- cosine_recall@50
|
126 |
+
- cosine_recall@100
|
127 |
+
- cosine_recall@150
|
128 |
+
- cosine_recall@200
|
129 |
+
- cosine_ndcg@1
|
130 |
+
- cosine_ndcg@20
|
131 |
+
- cosine_ndcg@50
|
132 |
+
- cosine_ndcg@100
|
133 |
+
- cosine_ndcg@150
|
134 |
+
- cosine_ndcg@200
|
135 |
+
- cosine_mrr@1
|
136 |
+
- cosine_mrr@20
|
137 |
+
- cosine_mrr@50
|
138 |
+
- cosine_mrr@100
|
139 |
+
- cosine_mrr@150
|
140 |
+
- cosine_mrr@200
|
141 |
+
- cosine_map@1
|
142 |
+
- cosine_map@20
|
143 |
+
- cosine_map@50
|
144 |
+
- cosine_map@100
|
145 |
+
- cosine_map@150
|
146 |
+
- cosine_map@200
|
147 |
+
- cosine_map@500
|
148 |
+
model-index:
|
149 |
+
- name: SentenceTransformer based on BAAI/bge-large-en-v1.5
|
150 |
+
results:
|
151 |
+
- task:
|
152 |
+
type: information-retrieval
|
153 |
+
name: Information Retrieval
|
154 |
+
dataset:
|
155 |
+
name: full en
|
156 |
+
type: full_en
|
157 |
+
metrics:
|
158 |
+
- type: cosine_accuracy@1
|
159 |
+
value: 0.7302631578947368
|
160 |
+
name: Cosine Accuracy@1
|
161 |
+
- type: cosine_accuracy@20
|
162 |
+
value: 0.993421052631579
|
163 |
+
name: Cosine Accuracy@20
|
164 |
+
- type: cosine_accuracy@50
|
165 |
+
value: 0.9967105263157895
|
166 |
+
name: Cosine Accuracy@50
|
167 |
+
- type: cosine_accuracy@100
|
168 |
+
value: 1.0
|
169 |
+
name: Cosine Accuracy@100
|
170 |
+
- type: cosine_accuracy@150
|
171 |
+
value: 1.0
|
172 |
+
name: Cosine Accuracy@150
|
173 |
+
- type: cosine_accuracy@200
|
174 |
+
value: 1.0
|
175 |
+
name: Cosine Accuracy@200
|
176 |
+
- type: cosine_precision@1
|
177 |
+
value: 0.7302631578947368
|
178 |
+
name: Cosine Precision@1
|
179 |
+
- type: cosine_precision@20
|
180 |
+
value: 0.4998355263157894
|
181 |
+
name: Cosine Precision@20
|
182 |
+
- type: cosine_precision@50
|
183 |
+
value: 0.39184210526315794
|
184 |
+
name: Cosine Precision@50
|
185 |
+
- type: cosine_precision@100
|
186 |
+
value: 0.3111842105263158
|
187 |
+
name: Cosine Precision@100
|
188 |
+
- type: cosine_precision@150
|
189 |
+
value: 0.2652412280701754
|
190 |
+
name: Cosine Precision@150
|
191 |
+
- type: cosine_precision@200
|
192 |
+
value: 0.232171052631579
|
193 |
+
name: Cosine Precision@200
|
194 |
+
- type: cosine_recall@1
|
195 |
+
value: 0.010227350724729817
|
196 |
+
name: Cosine Recall@1
|
197 |
+
- type: cosine_recall@20
|
198 |
+
value: 0.13368254620254577
|
199 |
+
name: Cosine Recall@20
|
200 |
+
- type: cosine_recall@50
|
201 |
+
value: 0.2541249933594102
|
202 |
+
name: Cosine Recall@50
|
203 |
+
- type: cosine_recall@100
|
204 |
+
value: 0.3948435268881245
|
205 |
+
name: Cosine Recall@100
|
206 |
+
- type: cosine_recall@150
|
207 |
+
value: 0.49626849018850344
|
208 |
+
name: Cosine Recall@150
|
209 |
+
- type: cosine_recall@200
|
210 |
+
value: 0.5720837677245543
|
211 |
+
name: Cosine Recall@200
|
212 |
+
- type: cosine_ndcg@1
|
213 |
+
value: 0.7302631578947368
|
214 |
+
name: Cosine Ndcg@1
|
215 |
+
- type: cosine_ndcg@20
|
216 |
+
value: 0.5384654647855256
|
217 |
+
name: Cosine Ndcg@20
|
218 |
+
- type: cosine_ndcg@50
|
219 |
+
value: 0.44986527953229877
|
220 |
+
name: Cosine Ndcg@50
|
221 |
+
- type: cosine_ndcg@100
|
222 |
+
value: 0.44277699637488865
|
223 |
+
name: Cosine Ndcg@100
|
224 |
+
- type: cosine_ndcg@150
|
225 |
+
value: 0.4895063673734854
|
226 |
+
name: Cosine Ndcg@150
|
227 |
+
- type: cosine_ndcg@200
|
228 |
+
value: 0.5346148440105628
|
229 |
+
name: Cosine Ndcg@200
|
230 |
+
- type: cosine_mrr@1
|
231 |
+
value: 0.7302631578947368
|
232 |
+
name: Cosine Mrr@1
|
233 |
+
- type: cosine_mrr@20
|
234 |
+
value: 0.8341772399749373
|
235 |
+
name: Cosine Mrr@20
|
236 |
+
- type: cosine_mrr@50
|
237 |
+
value: 0.8343338815789473
|
238 |
+
name: Cosine Mrr@50
|
239 |
+
- type: cosine_mrr@100
|
240 |
+
value: 0.8343905966424682
|
241 |
+
name: Cosine Mrr@100
|
242 |
+
- type: cosine_mrr@150
|
243 |
+
value: 0.8343905966424682
|
244 |
+
name: Cosine Mrr@150
|
245 |
+
- type: cosine_mrr@200
|
246 |
+
value: 0.8343905966424682
|
247 |
+
name: Cosine Mrr@200
|
248 |
+
- type: cosine_map@1
|
249 |
+
value: 0.7302631578947368
|
250 |
+
name: Cosine Map@1
|
251 |
+
- type: cosine_map@20
|
252 |
+
value: 0.3434603918412553
|
253 |
+
name: Cosine Map@20
|
254 |
+
- type: cosine_map@50
|
255 |
+
value: 0.23779270403918282
|
256 |
+
name: Cosine Map@50
|
257 |
+
- type: cosine_map@100
|
258 |
+
value: 0.21161540263537876
|
259 |
+
name: Cosine Map@100
|
260 |
+
- type: cosine_map@150
|
261 |
+
value: 0.22899252179487295
|
262 |
+
name: Cosine Map@150
|
263 |
+
- type: cosine_map@200
|
264 |
+
value: 0.24784282323083537
|
265 |
+
name: Cosine Map@200
|
266 |
+
- type: cosine_map@500
|
267 |
+
value: 0.298154972004029
|
268 |
+
name: Cosine Map@500
|
269 |
+
---
|
270 |
+
|
271 |
+
# Job-Skill matching fintuned BAAI/bge-large-en-v1.5
|
272 |
+
|
273 |
+
Top performing model on [TalentCLEF 2025](https://talentclef.github.io/talentclef/) Task B. Use it for job title <-> skill set matching
|
274 |
+
|
275 |
+
## Model Details
|
276 |
+
|
277 |
+
### Model Description
|
278 |
+
- **Model Type:** Sentence Transformer
|
279 |
+
- **Base model:** [BAAI/bge-large-en-v1.5](https://huggingface.co/BAAI/bge-large-en-v1.5) <!-- at revision d4aa6901d3a41ba39fb536a557fa166f842b0e09 -->
|
280 |
+
- **Maximum Sequence Length:** 256 tokens
|
281 |
+
- **Output Dimensionality:** 1024 dimensions
|
282 |
+
- **Similarity Function:** Cosine Similarity
|
283 |
+
<!-- - **Training Dataset:** Unknown -->
|
284 |
+
<!-- - **Language:** Unknown -->
|
285 |
+
<!-- - **License:** Unknown -->
|
286 |
+
|
287 |
+
### Model Sources
|
288 |
+
|
289 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
290 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
291 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
292 |
+
|
293 |
+
### Full Model Architecture
|
294 |
+
|
295 |
+
```
|
296 |
+
SentenceTransformer(
|
297 |
+
(0): Transformer({'max_seq_length': 256, 'do_lower_case': True}) with Transformer model: BertModel
|
298 |
+
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
299 |
+
(2): Normalize()
|
300 |
+
)
|
301 |
+
```
|
302 |
+
|
303 |
+
## Usage
|
304 |
+
|
305 |
+
### Direct Usage (Sentence Transformers)
|
306 |
+
|
307 |
+
First install the Sentence Transformers library:
|
308 |
+
|
309 |
+
```bash
|
310 |
+
pip install -U sentence-transformers
|
311 |
+
```
|
312 |
+
|
313 |
+
Then you can load this model and run inference.
|
314 |
+
```python
|
315 |
+
from sentence_transformers import SentenceTransformer
|
316 |
+
|
317 |
+
# Download from the 🤗 Hub
|
318 |
+
model = SentenceTransformer("pj-mathematician/JobSkillBGE-large-en-v1.5")
|
319 |
+
# Run inference
|
320 |
+
sentences = [
|
321 |
+
'For roles such as import/export manager, graduate export manager, senior export manager, and other related positions in meat and meat products, the key skills include a strong understanding of international trade regulations, meat product knowledge, customs compliance, and excellent negotiation and communication skills to manage global supply chains effectively. Additionally, proficiency in relevant trade software and languages can be highly beneficial.',
|
322 |
+
'Job roles such as Performance Analyst, Quality Assurance Engineer, and Test Manager require skills in conducting performance measurement and organizing or managing conversion testing to ensure software and systems meet performance standards and function correctly in real-world scenarios.',
|
323 |
+
'Job roles that require skills such as managing staff, coordinating employees, and performing HR activities include Human Resources Managers, Team Leaders, Supervisors, and Department Heads, all of whom are responsible for overseeing personnel, implementing HR policies, and ensuring efficient team operations.',
|
324 |
+
]
|
325 |
+
embeddings = model.encode(sentences)
|
326 |
+
print(embeddings.shape)
|
327 |
+
# [3, 1024]
|
328 |
+
|
329 |
+
# Get the similarity scores for the embeddings
|
330 |
+
similarities = model.similarity(embeddings, embeddings)
|
331 |
+
print(similarities.shape)
|
332 |
+
# [3, 3]
|
333 |
+
```
|
334 |
+
|
335 |
+
<!--
|
336 |
+
### Direct Usage (Transformers)
|
337 |
+
|
338 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
339 |
+
|
340 |
+
</details>
|
341 |
+
-->
|
342 |
+
|
343 |
+
<!--
|
344 |
+
### Downstream Usage (Sentence Transformers)
|
345 |
+
|
346 |
+
You can finetune this model on your own dataset.
|
347 |
+
|
348 |
+
<details><summary>Click to expand</summary>
|
349 |
+
|
350 |
+
</details>
|
351 |
+
-->
|
352 |
+
|
353 |
+
<!--
|
354 |
+
### Out-of-Scope Use
|
355 |
+
|
356 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
357 |
+
-->
|
358 |
+
|
359 |
+
## Evaluation
|
360 |
+
|
361 |
+
### Metrics
|
362 |
+
|
363 |
+
#### Information Retrieval
|
364 |
+
|
365 |
+
* Dataset: `full_en`
|
366 |
+
* Evaluated with [<code>InformationRetrievalEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.InformationRetrievalEvaluator)
|
367 |
+
|
368 |
+
| Metric | Value |
|
369 |
+
|:---------------------|:-----------|
|
370 |
+
| cosine_accuracy@1 | 0.7303 |
|
371 |
+
| cosine_accuracy@20 | 0.9934 |
|
372 |
+
| cosine_accuracy@50 | 0.9967 |
|
373 |
+
| cosine_accuracy@100 | 1.0 |
|
374 |
+
| cosine_accuracy@150 | 1.0 |
|
375 |
+
| cosine_accuracy@200 | 1.0 |
|
376 |
+
| cosine_precision@1 | 0.7303 |
|
377 |
+
| cosine_precision@20 | 0.4998 |
|
378 |
+
| cosine_precision@50 | 0.3918 |
|
379 |
+
| cosine_precision@100 | 0.3112 |
|
380 |
+
| cosine_precision@150 | 0.2652 |
|
381 |
+
| cosine_precision@200 | 0.2322 |
|
382 |
+
| cosine_recall@1 | 0.0102 |
|
383 |
+
| cosine_recall@20 | 0.1337 |
|
384 |
+
| cosine_recall@50 | 0.2541 |
|
385 |
+
| cosine_recall@100 | 0.3948 |
|
386 |
+
| cosine_recall@150 | 0.4963 |
|
387 |
+
| cosine_recall@200 | 0.5721 |
|
388 |
+
| cosine_ndcg@1 | 0.7303 |
|
389 |
+
| cosine_ndcg@20 | 0.5385 |
|
390 |
+
| cosine_ndcg@50 | 0.4499 |
|
391 |
+
| cosine_ndcg@100 | 0.4428 |
|
392 |
+
| cosine_ndcg@150 | 0.4895 |
|
393 |
+
| **cosine_ndcg@200** | **0.5346** |
|
394 |
+
| cosine_mrr@1 | 0.7303 |
|
395 |
+
| cosine_mrr@20 | 0.8342 |
|
396 |
+
| cosine_mrr@50 | 0.8343 |
|
397 |
+
| cosine_mrr@100 | 0.8344 |
|
398 |
+
| cosine_mrr@150 | 0.8344 |
|
399 |
+
| cosine_mrr@200 | 0.8344 |
|
400 |
+
| cosine_map@1 | 0.7303 |
|
401 |
+
| cosine_map@20 | 0.3435 |
|
402 |
+
| cosine_map@50 | 0.2378 |
|
403 |
+
| cosine_map@100 | 0.2116 |
|
404 |
+
| cosine_map@150 | 0.229 |
|
405 |
+
| cosine_map@200 | 0.2478 |
|
406 |
+
| cosine_map@500 | 0.2982 |
|
407 |
+
|
408 |
+
<!--
|
409 |
+
## Bias, Risks and Limitations
|
410 |
+
|
411 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
412 |
+
-->
|
413 |
+
|
414 |
+
<!--
|
415 |
+
### Recommendations
|
416 |
+
|
417 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
418 |
+
-->
|
419 |
+
|
420 |
+
## Training Details
|
421 |
+
|
422 |
+
### Training Dataset
|
423 |
+
|
424 |
+
#### Unnamed Dataset
|
425 |
+
|
426 |
+
* Size: 114,699 training samples
|
427 |
+
* Columns: <code>anchor</code> and <code>positive</code>
|
428 |
+
* Approximate statistics based on the first 1000 samples:
|
429 |
+
| | anchor | positive |
|
430 |
+
|:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
|
431 |
+
| type | string | string |
|
432 |
+
| details | <ul><li>min: 43 tokens</li><li>mean: 65.45 tokens</li><li>max: 116 tokens</li></ul> | <ul><li>min: 34 tokens</li><li>mean: 55.34 tokens</li><li>max: 162 tokens</li></ul> |
|
433 |
+
* Samples:
|
434 |
+
| anchor | positive |
|
435 |
+
|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
436 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require promoting health and safety include occupational health and safety specialists, safety managers, and public health educators, all of whom work to ensure safe and healthy environments in workplaces and communities.</code> |
|
437 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles that require organizing rehearsals include directors, choreographers, and conductors in theater, dance, and music ensembles, who must efficiently plan and schedule practice sessions to prepare performers for a successful final performance.</code> |
|
438 |
+
| <code>A technical director or any of its synonyms requires a strong blend of technical expertise and leadership skills, including the ability to oversee technical operations, manage teams, and ensure the successful execution of technical projects while maintaining operational efficiency and innovation.</code> | <code>Job roles such as Health and Safety Managers, Environmental Health Officers, and Risk Management Specialists often require the skill of negotiating health and safety issues with third parties to ensure compliance and protection standards are met across different organizations and sites.</code> |
|
439 |
+
* Loss: [<code>CachedGISTEmbedLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cachedgistembedloss) with these parameters:
|
440 |
+
```json
|
441 |
+
{'guide': SentenceTransformer(
|
442 |
+
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel
|
443 |
+
(1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
444 |
+
(2): Normalize()
|
445 |
+
), 'temperature': 0.01, 'mini_batch_size': 32, 'margin_strategy': 'absolute', 'margin': 0.0}
|
446 |
+
```
|
447 |
+
|
448 |
+
### Training Hyperparameters
|
449 |
+
#### Non-Default Hyperparameters
|
450 |
+
|
451 |
+
- `eval_strategy`: steps
|
452 |
+
- `per_device_train_batch_size`: 64
|
453 |
+
- `per_device_eval_batch_size`: 128
|
454 |
+
- `gradient_accumulation_steps`: 2
|
455 |
+
- `num_train_epochs`: 5
|
456 |
+
- `warmup_ratio`: 0.05
|
457 |
+
- `log_on_each_node`: False
|
458 |
+
- `fp16`: True
|
459 |
+
- `dataloader_num_workers`: 4
|
460 |
+
- `ddp_find_unused_parameters`: True
|
461 |
+
- `batch_sampler`: no_duplicates
|
462 |
+
|
463 |
+
#### All Hyperparameters
|
464 |
+
<details><summary>Click to expand</summary>
|
465 |
+
|
466 |
+
- `overwrite_output_dir`: False
|
467 |
+
- `do_predict`: False
|
468 |
+
- `eval_strategy`: steps
|
469 |
+
- `prediction_loss_only`: True
|
470 |
+
- `per_device_train_batch_size`: 64
|
471 |
+
- `per_device_eval_batch_size`: 128
|
472 |
+
- `per_gpu_train_batch_size`: None
|
473 |
+
- `per_gpu_eval_batch_size`: None
|
474 |
+
- `gradient_accumulation_steps`: 2
|
475 |
+
- `eval_accumulation_steps`: None
|
476 |
+
- `torch_empty_cache_steps`: None
|
477 |
+
- `learning_rate`: 5e-05
|
478 |
+
- `weight_decay`: 0.0
|
479 |
+
- `adam_beta1`: 0.9
|
480 |
+
- `adam_beta2`: 0.999
|
481 |
+
- `adam_epsilon`: 1e-08
|
482 |
+
- `max_grad_norm`: 1.0
|
483 |
+
- `num_train_epochs`: 5
|
484 |
+
- `max_steps`: -1
|
485 |
+
- `lr_scheduler_type`: linear
|
486 |
+
- `lr_scheduler_kwargs`: {}
|
487 |
+
- `warmup_ratio`: 0.05
|
488 |
+
- `warmup_steps`: 0
|
489 |
+
- `log_level`: passive
|
490 |
+
- `log_level_replica`: warning
|
491 |
+
- `log_on_each_node`: False
|
492 |
+
- `logging_nan_inf_filter`: True
|
493 |
+
- `save_safetensors`: True
|
494 |
+
- `save_on_each_node`: False
|
495 |
+
- `save_only_model`: False
|
496 |
+
- `restore_callback_states_from_checkpoint`: False
|
497 |
+
- `no_cuda`: False
|
498 |
+
- `use_cpu`: False
|
499 |
+
- `use_mps_device`: False
|
500 |
+
- `seed`: 42
|
501 |
+
- `data_seed`: None
|
502 |
+
- `jit_mode_eval`: False
|
503 |
+
- `use_ipex`: False
|
504 |
+
- `bf16`: False
|
505 |
+
- `fp16`: True
|
506 |
+
- `fp16_opt_level`: O1
|
507 |
+
- `half_precision_backend`: auto
|
508 |
+
- `bf16_full_eval`: False
|
509 |
+
- `fp16_full_eval`: False
|
510 |
+
- `tf32`: None
|
511 |
+
- `local_rank`: 0
|
512 |
+
- `ddp_backend`: None
|
513 |
+
- `tpu_num_cores`: None
|
514 |
+
- `tpu_metrics_debug`: False
|
515 |
+
- `debug`: []
|
516 |
+
- `dataloader_drop_last`: True
|
517 |
+
- `dataloader_num_workers`: 4
|
518 |
+
- `dataloader_prefetch_factor`: None
|
519 |
+
- `past_index`: -1
|
520 |
+
- `disable_tqdm`: False
|
521 |
+
- `remove_unused_columns`: True
|
522 |
+
- `label_names`: None
|
523 |
+
- `load_best_model_at_end`: False
|
524 |
+
- `ignore_data_skip`: False
|
525 |
+
- `fsdp`: []
|
526 |
+
- `fsdp_min_num_params`: 0
|
527 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
528 |
+
- `tp_size`: 0
|
529 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
530 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
531 |
+
- `deepspeed`: None
|
532 |
+
- `label_smoothing_factor`: 0.0
|
533 |
+
- `optim`: adamw_torch
|
534 |
+
- `optim_args`: None
|
535 |
+
- `adafactor`: False
|
536 |
+
- `group_by_length`: False
|
537 |
+
- `length_column_name`: length
|
538 |
+
- `ddp_find_unused_parameters`: True
|
539 |
+
- `ddp_bucket_cap_mb`: None
|
540 |
+
- `ddp_broadcast_buffers`: False
|
541 |
+
- `dataloader_pin_memory`: True
|
542 |
+
- `dataloader_persistent_workers`: False
|
543 |
+
- `skip_memory_metrics`: True
|
544 |
+
- `use_legacy_prediction_loop`: False
|
545 |
+
- `push_to_hub`: False
|
546 |
+
- `resume_from_checkpoint`: None
|
547 |
+
- `hub_model_id`: None
|
548 |
+
- `hub_strategy`: every_save
|
549 |
+
- `hub_private_repo`: None
|
550 |
+
- `hub_always_push`: False
|
551 |
+
- `gradient_checkpointing`: False
|
552 |
+
- `gradient_checkpointing_kwargs`: None
|
553 |
+
- `include_inputs_for_metrics`: False
|
554 |
+
- `include_for_metrics`: []
|
555 |
+
- `eval_do_concat_batches`: True
|
556 |
+
- `fp16_backend`: auto
|
557 |
+
- `push_to_hub_model_id`: None
|
558 |
+
- `push_to_hub_organization`: None
|
559 |
+
- `mp_parameters`:
|
560 |
+
- `auto_find_batch_size`: False
|
561 |
+
- `full_determinism`: False
|
562 |
+
- `torchdynamo`: None
|
563 |
+
- `ray_scope`: last
|
564 |
+
- `ddp_timeout`: 1800
|
565 |
+
- `torch_compile`: False
|
566 |
+
- `torch_compile_backend`: None
|
567 |
+
- `torch_compile_mode`: None
|
568 |
+
- `include_tokens_per_second`: False
|
569 |
+
- `include_num_input_tokens_seen`: False
|
570 |
+
- `neftune_noise_alpha`: None
|
571 |
+
- `optim_target_modules`: None
|
572 |
+
- `batch_eval_metrics`: False
|
573 |
+
- `eval_on_start`: False
|
574 |
+
- `use_liger_kernel`: False
|
575 |
+
- `eval_use_gather_object`: False
|
576 |
+
- `average_tokens_across_devices`: False
|
577 |
+
- `prompts`: None
|
578 |
+
- `batch_sampler`: no_duplicates
|
579 |
+
- `multi_dataset_batch_sampler`: proportional
|
580 |
+
|
581 |
+
</details>
|
582 |
+
|
583 |
+
### Training Logs
|
584 |
+
| Epoch | Step | Training Loss | full_en_cosine_ndcg@200 |
|
585 |
+
|:------:|:----:|:-------------:|:-----------------------:|
|
586 |
+
| -1 | -1 | - | 0.4784 |
|
587 |
+
| 0.0011 | 1 | 9.119 | - |
|
588 |
+
| 0.1116 | 100 | 4.1469 | - |
|
589 |
+
| 0.2232 | 200 | 2.5294 | 0.5362 |
|
590 |
+
| 0.3348 | 300 | 2.3611 | - |
|
591 |
+
| 0.4464 | 400 | 2.192 | 0.5318 |
|
592 |
+
| 0.5580 | 500 | 2.0338 | - |
|
593 |
+
| 0.6696 | 600 | 1.9009 | 0.5383 |
|
594 |
+
| 0.7812 | 700 | 1.8404 | - |
|
595 |
+
| 0.8929 | 800 | 1.7692 | 0.5352 |
|
596 |
+
| 1.0045 | 900 | 1.6921 | - |
|
597 |
+
| 1.1161 | 1000 | 1.3861 | 0.5368 |
|
598 |
+
| 1.2277 | 1100 | 1.3863 | - |
|
599 |
+
| 1.3393 | 1200 | 1.3546 | 0.5259 |
|
600 |
+
| 1.4509 | 1300 | 1.373 | - |
|
601 |
+
| 1.5625 | 1400 | 1.3364 | 0.5303 |
|
602 |
+
| 1.6741 | 1500 | 1.2876 | - |
|
603 |
+
| 1.7857 | 1600 | 1.3094 | 0.5323 |
|
604 |
+
| 1.8973 | 1700 | 1.2784 | - |
|
605 |
+
| 2.0089 | 1800 | 1.2204 | 0.5330 |
|
606 |
+
| 2.1205 | 1900 | 0.9617 | - |
|
607 |
+
| 2.2321 | 2000 | 1.0004 | 0.5277 |
|
608 |
+
| 2.3438 | 2100 | 0.9694 | - |
|
609 |
+
| 2.4554 | 2200 | 0.9843 | 0.5356 |
|
610 |
+
| 2.5670 | 2300 | 0.9743 | - |
|
611 |
+
| 2.6786 | 2400 | 0.9252 | 0.5320 |
|
612 |
+
| 2.7902 | 2500 | 0.9272 | - |
|
613 |
+
| 2.9018 | 2600 | 0.9279 | 0.5333 |
|
614 |
+
| 3.0134 | 2700 | 0.857 | - |
|
615 |
+
| 3.125 | 2800 | 0.7313 | 0.5300 |
|
616 |
+
| 3.2366 | 2900 | 0.7103 | - |
|
617 |
+
| 3.3482 | 3000 | 0.7187 | 0.5319 |
|
618 |
+
| 3.4598 | 3100 | 0.7067 | - |
|
619 |
+
| 3.5714 | 3200 | 0.7157 | 0.5369 |
|
620 |
+
| 3.6830 | 3300 | 0.7113 | - |
|
621 |
+
| 3.7946 | 3400 | 0.7013 | 0.5341 |
|
622 |
+
| 3.9062 | 3500 | 0.6903 | - |
|
623 |
+
| 4.0179 | 3600 | 0.6462 | 0.5335 |
|
624 |
+
| 4.1295 | 3700 | 0.5162 | - |
|
625 |
+
| 4.2411 | 3800 | 0.524 | 0.5352 |
|
626 |
+
| 4.3527 | 3900 | 0.5303 | - |
|
627 |
+
| 4.4643 | 4000 | 0.5269 | 0.5341 |
|
628 |
+
| 4.5759 | 4100 | 0.4824 | - |
|
629 |
+
| 4.6875 | 4200 | 0.5222 | 0.5342 |
|
630 |
+
| 4.7991 | 4300 | 0.5104 | - |
|
631 |
+
| 4.9107 | 4400 | 0.5002 | 0.5346 |
|
632 |
+
|
633 |
+
|
634 |
+
### Framework Versions
|
635 |
+
- Python: 3.11.11
|
636 |
+
- Sentence Transformers: 4.1.0
|
637 |
+
- Transformers: 4.51.2
|
638 |
+
- PyTorch: 2.6.0+cu124
|
639 |
+
- Accelerate: 1.6.0
|
640 |
+
- Datasets: 3.5.0
|
641 |
+
- Tokenizers: 0.21.1
|
642 |
+
|
643 |
+
## Citation
|
644 |
+
|
645 |
+
### BibTeX
|
646 |
+
|
647 |
+
#### Sentence Transformers
|
648 |
+
```bibtex
|
649 |
+
@inproceedings{reimers-2019-sentence-bert,
|
650 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
651 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
652 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
653 |
+
month = "11",
|
654 |
+
year = "2019",
|
655 |
+
publisher = "Association for Computational Linguistics",
|
656 |
+
url = "https://arxiv.org/abs/1908.10084",
|
657 |
+
}
|
658 |
+
```
|
659 |
+
|
660 |
+
<!--
|
661 |
+
## Glossary
|
662 |
+
|
663 |
+
*Clearly define terms in order to be accessible across audiences.*
|
664 |
+
-->
|
665 |
+
|
666 |
+
<!--
|
667 |
+
## Model Card Authors
|
668 |
+
|
669 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
670 |
+
-->
|
671 |
+
|
672 |
+
<!--
|
673 |
+
## Model Card Contact
|
674 |
+
|
675 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
676 |
+
-->
|
checkpoint-4480/config.json
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"BertModel"
|
4 |
+
],
|
5 |
+
"attention_probs_dropout_prob": 0.1,
|
6 |
+
"classifier_dropout": null,
|
7 |
+
"gradient_checkpointing": false,
|
8 |
+
"hidden_act": "gelu",
|
9 |
+
"hidden_dropout_prob": 0.1,
|
10 |
+
"hidden_size": 1024,
|
11 |
+
"id2label": {
|
12 |
+
"0": "LABEL_0"
|
13 |
+
},
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"intermediate_size": 4096,
|
16 |
+
"label2id": {
|
17 |
+
"LABEL_0": 0
|
18 |
+
},
|
19 |
+
"layer_norm_eps": 1e-12,
|
20 |
+
"max_position_embeddings": 512,
|
21 |
+
"model_type": "bert",
|
22 |
+
"num_attention_heads": 16,
|
23 |
+
"num_hidden_layers": 24,
|
24 |
+
"pad_token_id": 0,
|
25 |
+
"position_embedding_type": "absolute",
|
26 |
+
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.51.2",
|
28 |
+
"type_vocab_size": 2,
|
29 |
+
"use_cache": true,
|
30 |
+
"vocab_size": 30522
|
31 |
+
}
|
checkpoint-4480/config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "4.1.0",
|
4 |
+
"transformers": "4.51.2",
|
5 |
+
"pytorch": "2.6.0+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
checkpoint-4480/modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
checkpoint-4480/sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 256,
|
3 |
+
"do_lower_case": true
|
4 |
+
}
|
checkpoint-4480/special_tokens_map.json
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": {
|
3 |
+
"content": "[CLS]",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"mask_token": {
|
10 |
+
"content": "[MASK]",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "[PAD]",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"sep_token": {
|
24 |
+
"content": "[SEP]",
|
25 |
+
"lstrip": false,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"unk_token": {
|
31 |
+
"content": "[UNK]",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
}
|
37 |
+
}
|
checkpoint-4480/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-4480/tokenizer_config.json
ADDED
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "[PAD]",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"100": {
|
12 |
+
"content": "[UNK]",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"101": {
|
20 |
+
"content": "[CLS]",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"102": {
|
28 |
+
"content": "[SEP]",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"103": {
|
36 |
+
"content": "[MASK]",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"clean_up_tokenization_spaces": true,
|
45 |
+
"cls_token": "[CLS]",
|
46 |
+
"do_basic_tokenize": true,
|
47 |
+
"do_lower_case": true,
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "[MASK]",
|
50 |
+
"model_max_length": 512,
|
51 |
+
"never_split": null,
|
52 |
+
"pad_token": "[PAD]",
|
53 |
+
"sep_token": "[SEP]",
|
54 |
+
"strip_accents": null,
|
55 |
+
"tokenize_chinese_chars": true,
|
56 |
+
"tokenizer_class": "BertTokenizer",
|
57 |
+
"unk_token": "[UNK]"
|
58 |
+
}
|
checkpoint-4480/trainer_state.json
ADDED
@@ -0,0 +1,1339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_global_step": null,
|
3 |
+
"best_metric": null,
|
4 |
+
"best_model_checkpoint": null,
|
5 |
+
"epoch": 5.0,
|
6 |
+
"eval_steps": 200,
|
7 |
+
"global_step": 4480,
|
8 |
+
"is_hyper_param_search": false,
|
9 |
+
"is_local_process_zero": true,
|
10 |
+
"is_world_process_zero": true,
|
11 |
+
"log_history": [
|
12 |
+
{
|
13 |
+
"epoch": 0.0011160714285714285,
|
14 |
+
"grad_norm": NaN,
|
15 |
+
"learning_rate": 0.0,
|
16 |
+
"loss": 9.119,
|
17 |
+
"step": 1
|
18 |
+
},
|
19 |
+
{
|
20 |
+
"epoch": 0.11160714285714286,
|
21 |
+
"grad_norm": 17.632238388061523,
|
22 |
+
"learning_rate": 2.1428571428571428e-05,
|
23 |
+
"loss": 4.1469,
|
24 |
+
"step": 100
|
25 |
+
},
|
26 |
+
{
|
27 |
+
"epoch": 0.22321428571428573,
|
28 |
+
"grad_norm": 14.409270286560059,
|
29 |
+
"learning_rate": 4.375e-05,
|
30 |
+
"loss": 2.5294,
|
31 |
+
"step": 200
|
32 |
+
},
|
33 |
+
{
|
34 |
+
"epoch": 0.22321428571428573,
|
35 |
+
"eval_full_en_cosine_accuracy@1": 0.7467105263157895,
|
36 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
37 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
38 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
39 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
40 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
41 |
+
"eval_full_en_cosine_map@1": 0.7467105263157895,
|
42 |
+
"eval_full_en_cosine_map@100": 0.2121058701298033,
|
43 |
+
"eval_full_en_cosine_map@150": 0.2294109301872967,
|
44 |
+
"eval_full_en_cosine_map@20": 0.34167650006204187,
|
45 |
+
"eval_full_en_cosine_map@200": 0.2492171685943861,
|
46 |
+
"eval_full_en_cosine_map@50": 0.237336657426832,
|
47 |
+
"eval_full_en_cosine_map@500": 0.3000288940307502,
|
48 |
+
"eval_full_en_cosine_mrr@1": 0.7467105263157895,
|
49 |
+
"eval_full_en_cosine_mrr@100": 0.8460592769803298,
|
50 |
+
"eval_full_en_cosine_mrr@150": 0.8460592769803298,
|
51 |
+
"eval_full_en_cosine_mrr@20": 0.8458948032961192,
|
52 |
+
"eval_full_en_cosine_mrr@200": 0.8460592769803298,
|
53 |
+
"eval_full_en_cosine_mrr@50": 0.8460122844991269,
|
54 |
+
"eval_full_en_cosine_ndcg@1": 0.7467105263157895,
|
55 |
+
"eval_full_en_cosine_ndcg@100": 0.4430509248084704,
|
56 |
+
"eval_full_en_cosine_ndcg@150": 0.4894828917681416,
|
57 |
+
"eval_full_en_cosine_ndcg@20": 0.5367541274871807,
|
58 |
+
"eval_full_en_cosine_ndcg@200": 0.5361903606133726,
|
59 |
+
"eval_full_en_cosine_ndcg@50": 0.448683811733402,
|
60 |
+
"eval_full_en_cosine_precision@1": 0.7467105263157895,
|
61 |
+
"eval_full_en_cosine_precision@100": 0.31240131578947367,
|
62 |
+
"eval_full_en_cosine_precision@150": 0.26592105263157895,
|
63 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
64 |
+
"eval_full_en_cosine_precision@200": 0.23370065789473685,
|
65 |
+
"eval_full_en_cosine_precision@50": 0.3904605263157895,
|
66 |
+
"eval_full_en_cosine_recall@1": 0.010753343030902496,
|
67 |
+
"eval_full_en_cosine_recall@100": 0.39446255566624855,
|
68 |
+
"eval_full_en_cosine_recall@150": 0.49544823712709557,
|
69 |
+
"eval_full_en_cosine_recall@20": 0.13279013317825217,
|
70 |
+
"eval_full_en_cosine_recall@200": 0.5739614992682516,
|
71 |
+
"eval_full_en_cosine_recall@50": 0.25254843470147753,
|
72 |
+
"eval_runtime": 1.5828,
|
73 |
+
"eval_samples_per_second": 0.0,
|
74 |
+
"eval_sequential_score": 0.5361903606133726,
|
75 |
+
"eval_steps_per_second": 0.0,
|
76 |
+
"step": 200
|
77 |
+
},
|
78 |
+
{
|
79 |
+
"epoch": 0.33482142857142855,
|
80 |
+
"grad_norm": 16.260934829711914,
|
81 |
+
"learning_rate": 4.915413533834587e-05,
|
82 |
+
"loss": 2.3611,
|
83 |
+
"step": 300
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 0.44642857142857145,
|
87 |
+
"grad_norm": 13.242988586425781,
|
88 |
+
"learning_rate": 4.797932330827068e-05,
|
89 |
+
"loss": 2.192,
|
90 |
+
"step": 400
|
91 |
+
},
|
92 |
+
{
|
93 |
+
"epoch": 0.44642857142857145,
|
94 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
95 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
96 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
97 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
98 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
99 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
100 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
101 |
+
"eval_full_en_cosine_map@100": 0.2088144416212806,
|
102 |
+
"eval_full_en_cosine_map@150": 0.22677217670719133,
|
103 |
+
"eval_full_en_cosine_map@20": 0.3349832137166454,
|
104 |
+
"eval_full_en_cosine_map@200": 0.245946497368659,
|
105 |
+
"eval_full_en_cosine_map@50": 0.23473921202287384,
|
106 |
+
"eval_full_en_cosine_map@500": 0.2973985707303743,
|
107 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
108 |
+
"eval_full_en_cosine_mrr@100": 0.8394156306336016,
|
109 |
+
"eval_full_en_cosine_mrr@150": 0.8394156306336016,
|
110 |
+
"eval_full_en_cosine_mrr@20": 0.8392713554720135,
|
111 |
+
"eval_full_en_cosine_mrr@200": 0.8394156306336016,
|
112 |
+
"eval_full_en_cosine_mrr@50": 0.8393810045948205,
|
113 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
114 |
+
"eval_full_en_cosine_ndcg@100": 0.43855475512592684,
|
115 |
+
"eval_full_en_cosine_ndcg@150": 0.48609390907359196,
|
116 |
+
"eval_full_en_cosine_ndcg@20": 0.5288083416910968,
|
117 |
+
"eval_full_en_cosine_ndcg@200": 0.5318117937684201,
|
118 |
+
"eval_full_en_cosine_ndcg@50": 0.4453338982563473,
|
119 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
120 |
+
"eval_full_en_cosine_precision@100": 0.3088157894736842,
|
121 |
+
"eval_full_en_cosine_precision@150": 0.2644517543859649,
|
122 |
+
"eval_full_en_cosine_precision@20": 0.4875,
|
123 |
+
"eval_full_en_cosine_precision@200": 0.23172697368421055,
|
124 |
+
"eval_full_en_cosine_precision@50": 0.38782894736842105,
|
125 |
+
"eval_full_en_cosine_recall@1": 0.010619007443519193,
|
126 |
+
"eval_full_en_cosine_recall@100": 0.3902042311088277,
|
127 |
+
"eval_full_en_cosine_recall@150": 0.4925745165667779,
|
128 |
+
"eval_full_en_cosine_recall@20": 0.1301764615450556,
|
129 |
+
"eval_full_en_cosine_recall@200": 0.5696006364444781,
|
130 |
+
"eval_full_en_cosine_recall@50": 0.2518199886564403,
|
131 |
+
"eval_runtime": 1.5596,
|
132 |
+
"eval_samples_per_second": 0.0,
|
133 |
+
"eval_sequential_score": 0.5318117937684201,
|
134 |
+
"eval_steps_per_second": 0.0,
|
135 |
+
"step": 400
|
136 |
+
},
|
137 |
+
{
|
138 |
+
"epoch": 0.5580357142857143,
|
139 |
+
"grad_norm": 13.307888984680176,
|
140 |
+
"learning_rate": 4.680451127819549e-05,
|
141 |
+
"loss": 2.0338,
|
142 |
+
"step": 500
|
143 |
+
},
|
144 |
+
{
|
145 |
+
"epoch": 0.6696428571428571,
|
146 |
+
"grad_norm": 12.763930320739746,
|
147 |
+
"learning_rate": 4.56296992481203e-05,
|
148 |
+
"loss": 1.9009,
|
149 |
+
"step": 600
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 0.6696428571428571,
|
153 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
154 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
155 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
156 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
157 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
158 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
159 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
160 |
+
"eval_full_en_cosine_map@100": 0.2146410944227793,
|
161 |
+
"eval_full_en_cosine_map@150": 0.23271596511985665,
|
162 |
+
"eval_full_en_cosine_map@20": 0.3429678297332613,
|
163 |
+
"eval_full_en_cosine_map@200": 0.2520997707361607,
|
164 |
+
"eval_full_en_cosine_map@50": 0.2404899713826549,
|
165 |
+
"eval_full_en_cosine_map@500": 0.302904619520322,
|
166 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
167 |
+
"eval_full_en_cosine_mrr@100": 0.8306572094298247,
|
168 |
+
"eval_full_en_cosine_mrr@150": 0.8306572094298247,
|
169 |
+
"eval_full_en_cosine_mrr@20": 0.8304491697994989,
|
170 |
+
"eval_full_en_cosine_mrr@200": 0.8306572094298247,
|
171 |
+
"eval_full_en_cosine_mrr@50": 0.8306058114035089,
|
172 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
173 |
+
"eval_full_en_cosine_ndcg@100": 0.4445617284976941,
|
174 |
+
"eval_full_en_cosine_ndcg@150": 0.4922393935902775,
|
175 |
+
"eval_full_en_cosine_ndcg@20": 0.5357880041966661,
|
176 |
+
"eval_full_en_cosine_ndcg@200": 0.5383209000398446,
|
177 |
+
"eval_full_en_cosine_ndcg@50": 0.4504820590447715,
|
178 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
179 |
+
"eval_full_en_cosine_precision@100": 0.31358552631578945,
|
180 |
+
"eval_full_en_cosine_precision@150": 0.2677412280701754,
|
181 |
+
"eval_full_en_cosine_precision@20": 0.49720394736842105,
|
182 |
+
"eval_full_en_cosine_precision@200": 0.23452302631578953,
|
183 |
+
"eval_full_en_cosine_precision@50": 0.3932894736842105,
|
184 |
+
"eval_full_en_cosine_recall@1": 0.010303516134180577,
|
185 |
+
"eval_full_en_cosine_recall@100": 0.3970033142271577,
|
186 |
+
"eval_full_en_cosine_recall@150": 0.5001101850184368,
|
187 |
+
"eval_full_en_cosine_recall@20": 0.13302896177814508,
|
188 |
+
"eval_full_en_cosine_recall@200": 0.5777429812058247,
|
189 |
+
"eval_full_en_cosine_recall@50": 0.254528957048419,
|
190 |
+
"eval_runtime": 1.5616,
|
191 |
+
"eval_samples_per_second": 0.0,
|
192 |
+
"eval_sequential_score": 0.5383209000398446,
|
193 |
+
"eval_steps_per_second": 0.0,
|
194 |
+
"step": 600
|
195 |
+
},
|
196 |
+
{
|
197 |
+
"epoch": 0.78125,
|
198 |
+
"grad_norm": 13.439990997314453,
|
199 |
+
"learning_rate": 4.4454887218045117e-05,
|
200 |
+
"loss": 1.8404,
|
201 |
+
"step": 700
|
202 |
+
},
|
203 |
+
{
|
204 |
+
"epoch": 0.8928571428571429,
|
205 |
+
"grad_norm": 12.594465255737305,
|
206 |
+
"learning_rate": 4.3280075187969924e-05,
|
207 |
+
"loss": 1.7692,
|
208 |
+
"step": 800
|
209 |
+
},
|
210 |
+
{
|
211 |
+
"epoch": 0.8928571428571429,
|
212 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
213 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
214 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
215 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
216 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
217 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
218 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
219 |
+
"eval_full_en_cosine_map@100": 0.21030614519224017,
|
220 |
+
"eval_full_en_cosine_map@150": 0.22737063252522982,
|
221 |
+
"eval_full_en_cosine_map@20": 0.3442880676713117,
|
222 |
+
"eval_full_en_cosine_map@200": 0.24764067563282596,
|
223 |
+
"eval_full_en_cosine_map@50": 0.23827484272575025,
|
224 |
+
"eval_full_en_cosine_map@500": 0.2987091429260604,
|
225 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
226 |
+
"eval_full_en_cosine_mrr@100": 0.8404268619187053,
|
227 |
+
"eval_full_en_cosine_mrr@150": 0.8404268619187053,
|
228 |
+
"eval_full_en_cosine_mrr@20": 0.8402307852965749,
|
229 |
+
"eval_full_en_cosine_mrr@200": 0.8404268619187053,
|
230 |
+
"eval_full_en_cosine_mrr@50": 0.8403738058915406,
|
231 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
232 |
+
"eval_full_en_cosine_ndcg@100": 0.440670430732987,
|
233 |
+
"eval_full_en_cosine_ndcg@150": 0.486778222456143,
|
234 |
+
"eval_full_en_cosine_ndcg@20": 0.5383903905850532,
|
235 |
+
"eval_full_en_cosine_ndcg@200": 0.5352292016764449,
|
236 |
+
"eval_full_en_cosine_ndcg@50": 0.45046850998342597,
|
237 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
238 |
+
"eval_full_en_cosine_precision@100": 0.3099342105263158,
|
239 |
+
"eval_full_en_cosine_precision@150": 0.26390350877192986,
|
240 |
+
"eval_full_en_cosine_precision@20": 0.5,
|
241 |
+
"eval_full_en_cosine_precision@200": 0.23320723684210526,
|
242 |
+
"eval_full_en_cosine_precision@50": 0.39335526315789476,
|
243 |
+
"eval_full_en_cosine_recall@1": 0.01051277780149725,
|
244 |
+
"eval_full_en_cosine_recall@100": 0.39158535797000443,
|
245 |
+
"eval_full_en_cosine_recall@150": 0.4917399858788313,
|
246 |
+
"eval_full_en_cosine_recall@20": 0.13328036442285973,
|
247 |
+
"eval_full_en_cosine_recall@200": 0.5734492892933252,
|
248 |
+
"eval_full_en_cosine_recall@50": 0.254129727850083,
|
249 |
+
"eval_runtime": 1.5752,
|
250 |
+
"eval_samples_per_second": 0.0,
|
251 |
+
"eval_sequential_score": 0.5352292016764449,
|
252 |
+
"eval_steps_per_second": 0.0,
|
253 |
+
"step": 800
|
254 |
+
},
|
255 |
+
{
|
256 |
+
"epoch": 1.0044642857142858,
|
257 |
+
"grad_norm": 13.140974998474121,
|
258 |
+
"learning_rate": 4.212875939849624e-05,
|
259 |
+
"loss": 1.6921,
|
260 |
+
"step": 900
|
261 |
+
},
|
262 |
+
{
|
263 |
+
"epoch": 1.1160714285714286,
|
264 |
+
"grad_norm": 12.160736083984375,
|
265 |
+
"learning_rate": 4.096569548872181e-05,
|
266 |
+
"loss": 1.3861,
|
267 |
+
"step": 1000
|
268 |
+
},
|
269 |
+
{
|
270 |
+
"epoch": 1.1160714285714286,
|
271 |
+
"eval_full_en_cosine_accuracy@1": 0.7401315789473685,
|
272 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
273 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
274 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
275 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
276 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
277 |
+
"eval_full_en_cosine_map@1": 0.7401315789473685,
|
278 |
+
"eval_full_en_cosine_map@100": 0.21155466872463927,
|
279 |
+
"eval_full_en_cosine_map@150": 0.2291636549745022,
|
280 |
+
"eval_full_en_cosine_map@20": 0.3373673798048492,
|
281 |
+
"eval_full_en_cosine_map@200": 0.24905074192004603,
|
282 |
+
"eval_full_en_cosine_map@50": 0.2376950112180141,
|
283 |
+
"eval_full_en_cosine_map@500": 0.3006802538137734,
|
284 |
+
"eval_full_en_cosine_mrr@1": 0.7401315789473685,
|
285 |
+
"eval_full_en_cosine_mrr@100": 0.8405236576289212,
|
286 |
+
"eval_full_en_cosine_mrr@150": 0.8405236576289212,
|
287 |
+
"eval_full_en_cosine_mrr@20": 0.8403143274853806,
|
288 |
+
"eval_full_en_cosine_mrr@200": 0.8405236576289212,
|
289 |
+
"eval_full_en_cosine_mrr@50": 0.840463849016481,
|
290 |
+
"eval_full_en_cosine_ndcg@1": 0.7401315789473685,
|
291 |
+
"eval_full_en_cosine_ndcg@100": 0.44212858816477746,
|
292 |
+
"eval_full_en_cosine_ndcg@150": 0.48946706445562127,
|
293 |
+
"eval_full_en_cosine_ndcg@20": 0.5332180756481385,
|
294 |
+
"eval_full_en_cosine_ndcg@200": 0.5367929588661781,
|
295 |
+
"eval_full_en_cosine_ndcg@50": 0.44979391873656477,
|
296 |
+
"eval_full_en_cosine_precision@1": 0.7401315789473685,
|
297 |
+
"eval_full_en_cosine_precision@100": 0.3114473684210526,
|
298 |
+
"eval_full_en_cosine_precision@150": 0.266469298245614,
|
299 |
+
"eval_full_en_cosine_precision@20": 0.49243421052631575,
|
300 |
+
"eval_full_en_cosine_precision@200": 0.2345888157894737,
|
301 |
+
"eval_full_en_cosine_precision@50": 0.3921052631578947,
|
302 |
+
"eval_full_en_cosine_recall@1": 0.010392607884295562,
|
303 |
+
"eval_full_en_cosine_recall@100": 0.3933254279416559,
|
304 |
+
"eval_full_en_cosine_recall@150": 0.4957503189606009,
|
305 |
+
"eval_full_en_cosine_recall@20": 0.13107623492706288,
|
306 |
+
"eval_full_en_cosine_recall@200": 0.5753954619760326,
|
307 |
+
"eval_full_en_cosine_recall@50": 0.2539746341397596,
|
308 |
+
"eval_runtime": 1.6397,
|
309 |
+
"eval_samples_per_second": 0.0,
|
310 |
+
"eval_sequential_score": 0.5367929588661781,
|
311 |
+
"eval_steps_per_second": 0.0,
|
312 |
+
"step": 1000
|
313 |
+
},
|
314 |
+
{
|
315 |
+
"epoch": 1.2276785714285714,
|
316 |
+
"grad_norm": 13.078369140625,
|
317 |
+
"learning_rate": 3.9790883458646615e-05,
|
318 |
+
"loss": 1.3863,
|
319 |
+
"step": 1100
|
320 |
+
},
|
321 |
+
{
|
322 |
+
"epoch": 1.3392857142857144,
|
323 |
+
"grad_norm": 11.990692138671875,
|
324 |
+
"learning_rate": 3.861607142857143e-05,
|
325 |
+
"loss": 1.3546,
|
326 |
+
"step": 1200
|
327 |
+
},
|
328 |
+
{
|
329 |
+
"epoch": 1.3392857142857144,
|
330 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
331 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
332 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
333 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
334 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
335 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
336 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
337 |
+
"eval_full_en_cosine_map@100": 0.20552277525856266,
|
338 |
+
"eval_full_en_cosine_map@150": 0.22274311961933413,
|
339 |
+
"eval_full_en_cosine_map@20": 0.3363904557549852,
|
340 |
+
"eval_full_en_cosine_map@200": 0.24106738760441354,
|
341 |
+
"eval_full_en_cosine_map@50": 0.23370113464760453,
|
342 |
+
"eval_full_en_cosine_map@500": 0.28981293048421486,
|
343 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
344 |
+
"eval_full_en_cosine_mrr@100": 0.8322617799738206,
|
345 |
+
"eval_full_en_cosine_mrr@150": 0.8322617799738206,
|
346 |
+
"eval_full_en_cosine_mrr@20": 0.8320620443153339,
|
347 |
+
"eval_full_en_cosine_mrr@200": 0.8322617799738206,
|
348 |
+
"eval_full_en_cosine_mrr@50": 0.8322050649102997,
|
349 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
350 |
+
"eval_full_en_cosine_ndcg@100": 0.43445871937106545,
|
351 |
+
"eval_full_en_cosine_ndcg@150": 0.48130417146010107,
|
352 |
+
"eval_full_en_cosine_ndcg@20": 0.531477407982968,
|
353 |
+
"eval_full_en_cosine_ndcg@200": 0.5259375639543232,
|
354 |
+
"eval_full_en_cosine_ndcg@50": 0.4444057356887903,
|
355 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
356 |
+
"eval_full_en_cosine_precision@100": 0.3039802631578947,
|
357 |
+
"eval_full_en_cosine_precision@150": 0.25999999999999995,
|
358 |
+
"eval_full_en_cosine_precision@20": 0.4925986842105263,
|
359 |
+
"eval_full_en_cosine_precision@200": 0.22763157894736838,
|
360 |
+
"eval_full_en_cosine_precision@50": 0.3867105263157895,
|
361 |
+
"eval_full_en_cosine_recall@1": 0.010318104890368607,
|
362 |
+
"eval_full_en_cosine_recall@100": 0.385615965839615,
|
363 |
+
"eval_full_en_cosine_recall@150": 0.48656381032984825,
|
364 |
+
"eval_full_en_cosine_recall@20": 0.13139326985918445,
|
365 |
+
"eval_full_en_cosine_recall@200": 0.5617757383007209,
|
366 |
+
"eval_full_en_cosine_recall@50": 0.2506285703289517,
|
367 |
+
"eval_runtime": 1.5585,
|
368 |
+
"eval_samples_per_second": 0.0,
|
369 |
+
"eval_sequential_score": 0.5259375639543232,
|
370 |
+
"eval_steps_per_second": 0.0,
|
371 |
+
"step": 1200
|
372 |
+
},
|
373 |
+
{
|
374 |
+
"epoch": 1.4508928571428572,
|
375 |
+
"grad_norm": 15.019533157348633,
|
376 |
+
"learning_rate": 3.744125939849624e-05,
|
377 |
+
"loss": 1.373,
|
378 |
+
"step": 1300
|
379 |
+
},
|
380 |
+
{
|
381 |
+
"epoch": 1.5625,
|
382 |
+
"grad_norm": 10.545878410339355,
|
383 |
+
"learning_rate": 3.626644736842105e-05,
|
384 |
+
"loss": 1.3364,
|
385 |
+
"step": 1400
|
386 |
+
},
|
387 |
+
{
|
388 |
+
"epoch": 1.5625,
|
389 |
+
"eval_full_en_cosine_accuracy@1": 0.7171052631578947,
|
390 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
391 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
392 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
393 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
394 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
395 |
+
"eval_full_en_cosine_map@1": 0.7171052631578947,
|
396 |
+
"eval_full_en_cosine_map@100": 0.20833018055660496,
|
397 |
+
"eval_full_en_cosine_map@150": 0.22583322401021033,
|
398 |
+
"eval_full_en_cosine_map@20": 0.34006318172507877,
|
399 |
+
"eval_full_en_cosine_map@200": 0.24462161151730188,
|
400 |
+
"eval_full_en_cosine_map@50": 0.23483789231739935,
|
401 |
+
"eval_full_en_cosine_map@500": 0.2946124561805931,
|
402 |
+
"eval_full_en_cosine_mrr@1": 0.7171052631578947,
|
403 |
+
"eval_full_en_cosine_mrr@100": 0.8267713172687238,
|
404 |
+
"eval_full_en_cosine_mrr@150": 0.8267713172687238,
|
405 |
+
"eval_full_en_cosine_mrr@20": 0.8265913362952838,
|
406 |
+
"eval_full_en_cosine_mrr@200": 0.8267713172687238,
|
407 |
+
"eval_full_en_cosine_mrr@50": 0.8267343568902494,
|
408 |
+
"eval_full_en_cosine_ndcg@1": 0.7171052631578947,
|
409 |
+
"eval_full_en_cosine_ndcg@100": 0.4377486787968229,
|
410 |
+
"eval_full_en_cosine_ndcg@150": 0.4850669425848544,
|
411 |
+
"eval_full_en_cosine_ndcg@20": 0.5331724259953773,
|
412 |
+
"eval_full_en_cosine_ndcg@200": 0.5302927064126869,
|
413 |
+
"eval_full_en_cosine_ndcg@50": 0.4451308688476405,
|
414 |
+
"eval_full_en_cosine_precision@1": 0.7171052631578947,
|
415 |
+
"eval_full_en_cosine_precision@100": 0.3074671052631579,
|
416 |
+
"eval_full_en_cosine_precision@150": 0.2625657894736842,
|
417 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
418 |
+
"eval_full_en_cosine_precision@200": 0.23016447368421053,
|
419 |
+
"eval_full_en_cosine_precision@50": 0.38769736842105257,
|
420 |
+
"eval_full_en_cosine_recall@1": 0.010208074045806198,
|
421 |
+
"eval_full_en_cosine_recall@100": 0.3902466549235702,
|
422 |
+
"eval_full_en_cosine_recall@150": 0.49226776551348056,
|
423 |
+
"eval_full_en_cosine_recall@20": 0.13255572846134298,
|
424 |
+
"eval_full_en_cosine_recall@200": 0.5680994353864672,
|
425 |
+
"eval_full_en_cosine_recall@50": 0.25126941591084845,
|
426 |
+
"eval_runtime": 1.5595,
|
427 |
+
"eval_samples_per_second": 0.0,
|
428 |
+
"eval_sequential_score": 0.5302927064126869,
|
429 |
+
"eval_steps_per_second": 0.0,
|
430 |
+
"step": 1400
|
431 |
+
},
|
432 |
+
{
|
433 |
+
"epoch": 1.6741071428571428,
|
434 |
+
"grad_norm": 18.495975494384766,
|
435 |
+
"learning_rate": 3.509163533834587e-05,
|
436 |
+
"loss": 1.2876,
|
437 |
+
"step": 1500
|
438 |
+
},
|
439 |
+
{
|
440 |
+
"epoch": 1.7857142857142856,
|
441 |
+
"grad_norm": 12.646751403808594,
|
442 |
+
"learning_rate": 3.391682330827068e-05,
|
443 |
+
"loss": 1.3094,
|
444 |
+
"step": 1600
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 1.7857142857142856,
|
448 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
449 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
450 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
451 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
452 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
453 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
454 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
455 |
+
"eval_full_en_cosine_map@100": 0.20923239071614674,
|
456 |
+
"eval_full_en_cosine_map@150": 0.225604138471006,
|
457 |
+
"eval_full_en_cosine_map@20": 0.34034356587585846,
|
458 |
+
"eval_full_en_cosine_map@200": 0.24539737099429304,
|
459 |
+
"eval_full_en_cosine_map@50": 0.23464702413938254,
|
460 |
+
"eval_full_en_cosine_map@500": 0.29597166286299953,
|
461 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
462 |
+
"eval_full_en_cosine_mrr@100": 0.8214137967940215,
|
463 |
+
"eval_full_en_cosine_mrr@150": 0.8214137967940215,
|
464 |
+
"eval_full_en_cosine_mrr@20": 0.8213699371448987,
|
465 |
+
"eval_full_en_cosine_mrr@200": 0.8214137967940215,
|
466 |
+
"eval_full_en_cosine_mrr@50": 0.8213699371448987,
|
467 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
468 |
+
"eval_full_en_cosine_ndcg@100": 0.4396726832556684,
|
469 |
+
"eval_full_en_cosine_ndcg@150": 0.4847816359827512,
|
470 |
+
"eval_full_en_cosine_ndcg@20": 0.532792025753163,
|
471 |
+
"eval_full_en_cosine_ndcg@200": 0.5323403273572274,
|
472 |
+
"eval_full_en_cosine_ndcg@50": 0.4452189433184465,
|
473 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
474 |
+
"eval_full_en_cosine_precision@100": 0.3098026315789474,
|
475 |
+
"eval_full_en_cosine_precision@150": 0.26274122807017547,
|
476 |
+
"eval_full_en_cosine_precision@20": 0.4935855263157895,
|
477 |
+
"eval_full_en_cosine_precision@200": 0.23192434210526314,
|
478 |
+
"eval_full_en_cosine_precision@50": 0.38763157894736844,
|
479 |
+
"eval_full_en_cosine_recall@1": 0.010122149362902188,
|
480 |
+
"eval_full_en_cosine_recall@100": 0.39236988612007834,
|
481 |
+
"eval_full_en_cosine_recall@150": 0.4910778378543689,
|
482 |
+
"eval_full_en_cosine_recall@20": 0.13108496301513997,
|
483 |
+
"eval_full_en_cosine_recall@200": 0.5709689534914331,
|
484 |
+
"eval_full_en_cosine_recall@50": 0.25093448303772187,
|
485 |
+
"eval_runtime": 1.5873,
|
486 |
+
"eval_samples_per_second": 0.0,
|
487 |
+
"eval_sequential_score": 0.5323403273572274,
|
488 |
+
"eval_steps_per_second": 0.0,
|
489 |
+
"step": 1600
|
490 |
+
},
|
491 |
+
{
|
492 |
+
"epoch": 1.8973214285714286,
|
493 |
+
"grad_norm": 11.858412742614746,
|
494 |
+
"learning_rate": 3.274201127819549e-05,
|
495 |
+
"loss": 1.2784,
|
496 |
+
"step": 1700
|
497 |
+
},
|
498 |
+
{
|
499 |
+
"epoch": 2.0089285714285716,
|
500 |
+
"grad_norm": 11.152688026428223,
|
501 |
+
"learning_rate": 3.1567199248120306e-05,
|
502 |
+
"loss": 1.2204,
|
503 |
+
"step": 1800
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"epoch": 2.0089285714285716,
|
507 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
508 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
509 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
510 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
511 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
512 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
513 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
514 |
+
"eval_full_en_cosine_map@100": 0.2098412194483687,
|
515 |
+
"eval_full_en_cosine_map@150": 0.22663911455304064,
|
516 |
+
"eval_full_en_cosine_map@20": 0.3433147887298301,
|
517 |
+
"eval_full_en_cosine_map@200": 0.24620266722190678,
|
518 |
+
"eval_full_en_cosine_map@50": 0.23714915519951082,
|
519 |
+
"eval_full_en_cosine_map@500": 0.29690932859887553,
|
520 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
521 |
+
"eval_full_en_cosine_mrr@100": 0.8394024772357531,
|
522 |
+
"eval_full_en_cosine_mrr@150": 0.8394024772357531,
|
523 |
+
"eval_full_en_cosine_mrr@20": 0.8393426686233129,
|
524 |
+
"eval_full_en_cosine_mrr@200": 0.8394024772357531,
|
525 |
+
"eval_full_en_cosine_mrr@50": 0.8393426686233129,
|
526 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
527 |
+
"eval_full_en_cosine_ndcg@100": 0.4396519841053572,
|
528 |
+
"eval_full_en_cosine_ndcg@150": 0.4856325134708184,
|
529 |
+
"eval_full_en_cosine_ndcg@20": 0.5375317893335387,
|
530 |
+
"eval_full_en_cosine_ndcg@200": 0.533015167774829,
|
531 |
+
"eval_full_en_cosine_ndcg@50": 0.44810398395306655,
|
532 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
533 |
+
"eval_full_en_cosine_precision@100": 0.3084539473684211,
|
534 |
+
"eval_full_en_cosine_precision@150": 0.2627631578947368,
|
535 |
+
"eval_full_en_cosine_precision@20": 0.49769736842105267,
|
536 |
+
"eval_full_en_cosine_precision@200": 0.2314309210526316,
|
537 |
+
"eval_full_en_cosine_precision@50": 0.3891447368421053,
|
538 |
+
"eval_full_en_cosine_recall@1": 0.010440810366523372,
|
539 |
+
"eval_full_en_cosine_recall@100": 0.39036009395952986,
|
540 |
+
"eval_full_en_cosine_recall@150": 0.49041982254882954,
|
541 |
+
"eval_full_en_cosine_recall@20": 0.13228070304056636,
|
542 |
+
"eval_full_en_cosine_recall@200": 0.5704962189819233,
|
543 |
+
"eval_full_en_cosine_recall@50": 0.25248213212752935,
|
544 |
+
"eval_runtime": 1.6049,
|
545 |
+
"eval_samples_per_second": 0.0,
|
546 |
+
"eval_sequential_score": 0.533015167774829,
|
547 |
+
"eval_steps_per_second": 0.0,
|
548 |
+
"step": 1800
|
549 |
+
},
|
550 |
+
{
|
551 |
+
"epoch": 2.1205357142857144,
|
552 |
+
"grad_norm": 12.91015625,
|
553 |
+
"learning_rate": 3.0392387218045114e-05,
|
554 |
+
"loss": 0.9617,
|
555 |
+
"step": 1900
|
556 |
+
},
|
557 |
+
{
|
558 |
+
"epoch": 2.232142857142857,
|
559 |
+
"grad_norm": 11.646313667297363,
|
560 |
+
"learning_rate": 2.9217575187969924e-05,
|
561 |
+
"loss": 1.0004,
|
562 |
+
"step": 2000
|
563 |
+
},
|
564 |
+
{
|
565 |
+
"epoch": 2.232142857142857,
|
566 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
567 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
568 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
569 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
570 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
571 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
572 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
573 |
+
"eval_full_en_cosine_map@100": 0.20775225168018954,
|
574 |
+
"eval_full_en_cosine_map@150": 0.22393096419950168,
|
575 |
+
"eval_full_en_cosine_map@20": 0.3380596885262807,
|
576 |
+
"eval_full_en_cosine_map@200": 0.24259765295506924,
|
577 |
+
"eval_full_en_cosine_map@50": 0.23452814948810471,
|
578 |
+
"eval_full_en_cosine_map@500": 0.2920026964508484,
|
579 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
580 |
+
"eval_full_en_cosine_mrr@100": 0.8325452625382137,
|
581 |
+
"eval_full_en_cosine_mrr@150": 0.8325452625382137,
|
582 |
+
"eval_full_en_cosine_mrr@20": 0.8324781304222094,
|
583 |
+
"eval_full_en_cosine_mrr@200": 0.8325452625382137,
|
584 |
+
"eval_full_en_cosine_mrr@50": 0.8325452625382137,
|
585 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
586 |
+
"eval_full_en_cosine_ndcg@100": 0.4376001104057169,
|
587 |
+
"eval_full_en_cosine_ndcg@150": 0.48181431955382,
|
588 |
+
"eval_full_en_cosine_ndcg@20": 0.5323035546433559,
|
589 |
+
"eval_full_en_cosine_ndcg@200": 0.5276663014224582,
|
590 |
+
"eval_full_en_cosine_ndcg@50": 0.44660441452063837,
|
591 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
592 |
+
"eval_full_en_cosine_precision@100": 0.30644736842105263,
|
593 |
+
"eval_full_en_cosine_precision@150": 0.259890350877193,
|
594 |
+
"eval_full_en_cosine_precision@20": 0.4916118421052632,
|
595 |
+
"eval_full_en_cosine_precision@200": 0.2280921052631579,
|
596 |
+
"eval_full_en_cosine_precision@50": 0.3886842105263158,
|
597 |
+
"eval_full_en_cosine_recall@1": 0.010329446437905086,
|
598 |
+
"eval_full_en_cosine_recall@100": 0.38885062846601265,
|
599 |
+
"eval_full_en_cosine_recall@150": 0.4854595951837256,
|
600 |
+
"eval_full_en_cosine_recall@20": 0.131078016933875,
|
601 |
+
"eval_full_en_cosine_recall@200": 0.5630724982932908,
|
602 |
+
"eval_full_en_cosine_recall@50": 0.252357645205228,
|
603 |
+
"eval_runtime": 1.5613,
|
604 |
+
"eval_samples_per_second": 0.0,
|
605 |
+
"eval_sequential_score": 0.5276663014224582,
|
606 |
+
"eval_steps_per_second": 0.0,
|
607 |
+
"step": 2000
|
608 |
+
},
|
609 |
+
{
|
610 |
+
"epoch": 2.34375,
|
611 |
+
"grad_norm": 12.087961196899414,
|
612 |
+
"learning_rate": 2.8042763157894735e-05,
|
613 |
+
"loss": 0.9694,
|
614 |
+
"step": 2100
|
615 |
+
},
|
616 |
+
{
|
617 |
+
"epoch": 2.455357142857143,
|
618 |
+
"grad_norm": 8.181659698486328,
|
619 |
+
"learning_rate": 2.6867951127819552e-05,
|
620 |
+
"loss": 0.9843,
|
621 |
+
"step": 2200
|
622 |
+
},
|
623 |
+
{
|
624 |
+
"epoch": 2.455357142857143,
|
625 |
+
"eval_full_en_cosine_accuracy@1": 0.7236842105263158,
|
626 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
627 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
628 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
629 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
630 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
631 |
+
"eval_full_en_cosine_map@1": 0.7236842105263158,
|
632 |
+
"eval_full_en_cosine_map@100": 0.209953160245849,
|
633 |
+
"eval_full_en_cosine_map@150": 0.22760030144833215,
|
634 |
+
"eval_full_en_cosine_map@20": 0.34078157961918865,
|
635 |
+
"eval_full_en_cosine_map@200": 0.24749824184265867,
|
636 |
+
"eval_full_en_cosine_map@50": 0.2365248444512811,
|
637 |
+
"eval_full_en_cosine_map@500": 0.29789431690676116,
|
638 |
+
"eval_full_en_cosine_mrr@1": 0.7236842105263158,
|
639 |
+
"eval_full_en_cosine_mrr@100": 0.8318935359231412,
|
640 |
+
"eval_full_en_cosine_mrr@150": 0.8318935359231412,
|
641 |
+
"eval_full_en_cosine_mrr@20": 0.8316833751044278,
|
642 |
+
"eval_full_en_cosine_mrr@200": 0.8318935359231412,
|
643 |
+
"eval_full_en_cosine_mrr@50": 0.8318935359231412,
|
644 |
+
"eval_full_en_cosine_ndcg@1": 0.7236842105263158,
|
645 |
+
"eval_full_en_cosine_ndcg@100": 0.44076958126493176,
|
646 |
+
"eval_full_en_cosine_ndcg@150": 0.48838061313116793,
|
647 |
+
"eval_full_en_cosine_ndcg@20": 0.5350320556020238,
|
648 |
+
"eval_full_en_cosine_ndcg@200": 0.5355574509263721,
|
649 |
+
"eval_full_en_cosine_ndcg@50": 0.44803994906340594,
|
650 |
+
"eval_full_en_cosine_precision@1": 0.7236842105263158,
|
651 |
+
"eval_full_en_cosine_precision@100": 0.3099671052631579,
|
652 |
+
"eval_full_en_cosine_precision@150": 0.2648464912280702,
|
653 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
654 |
+
"eval_full_en_cosine_precision@200": 0.23342105263157892,
|
655 |
+
"eval_full_en_cosine_precision@50": 0.39052631578947367,
|
656 |
+
"eval_full_en_cosine_recall@1": 0.010284539147879572,
|
657 |
+
"eval_full_en_cosine_recall@100": 0.39296182819932773,
|
658 |
+
"eval_full_en_cosine_recall@150": 0.4959148528891931,
|
659 |
+
"eval_full_en_cosine_recall@20": 0.13200577828629578,
|
660 |
+
"eval_full_en_cosine_recall@200": 0.5749370249014907,
|
661 |
+
"eval_full_en_cosine_recall@50": 0.25310992970173135,
|
662 |
+
"eval_runtime": 1.8632,
|
663 |
+
"eval_samples_per_second": 0.0,
|
664 |
+
"eval_sequential_score": 0.5355574509263721,
|
665 |
+
"eval_steps_per_second": 0.0,
|
666 |
+
"step": 2200
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 2.5669642857142856,
|
670 |
+
"grad_norm": 13.6882905960083,
|
671 |
+
"learning_rate": 2.5693139097744363e-05,
|
672 |
+
"loss": 0.9743,
|
673 |
+
"step": 2300
|
674 |
+
},
|
675 |
+
{
|
676 |
+
"epoch": 2.678571428571429,
|
677 |
+
"grad_norm": 11.966975212097168,
|
678 |
+
"learning_rate": 2.4518327067669177e-05,
|
679 |
+
"loss": 0.9252,
|
680 |
+
"step": 2400
|
681 |
+
},
|
682 |
+
{
|
683 |
+
"epoch": 2.678571428571429,
|
684 |
+
"eval_full_en_cosine_accuracy@1": 0.7335526315789473,
|
685 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
686 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
687 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
688 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
689 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
690 |
+
"eval_full_en_cosine_map@1": 0.7335526315789473,
|
691 |
+
"eval_full_en_cosine_map@100": 0.20983286336268822,
|
692 |
+
"eval_full_en_cosine_map@150": 0.22675852672419078,
|
693 |
+
"eval_full_en_cosine_map@20": 0.34004090105732804,
|
694 |
+
"eval_full_en_cosine_map@200": 0.24584993568226646,
|
695 |
+
"eval_full_en_cosine_map@50": 0.23672594782424658,
|
696 |
+
"eval_full_en_cosine_map@500": 0.29632183596698103,
|
697 |
+
"eval_full_en_cosine_mrr@1": 0.7335526315789473,
|
698 |
+
"eval_full_en_cosine_mrr@100": 0.83135268727374,
|
699 |
+
"eval_full_en_cosine_mrr@150": 0.83135268727374,
|
700 |
+
"eval_full_en_cosine_mrr@20": 0.8311351294903929,
|
701 |
+
"eval_full_en_cosine_mrr@200": 0.83135268727374,
|
702 |
+
"eval_full_en_cosine_mrr@50": 0.8312917710944029,
|
703 |
+
"eval_full_en_cosine_ndcg@1": 0.7335526315789473,
|
704 |
+
"eval_full_en_cosine_ndcg@100": 0.4400577813719261,
|
705 |
+
"eval_full_en_cosine_ndcg@150": 0.4859220111165228,
|
706 |
+
"eval_full_en_cosine_ndcg@20": 0.5344170691501652,
|
707 |
+
"eval_full_en_cosine_ndcg@200": 0.5320416498978522,
|
708 |
+
"eval_full_en_cosine_ndcg@50": 0.4485020943766835,
|
709 |
+
"eval_full_en_cosine_precision@1": 0.7335526315789473,
|
710 |
+
"eval_full_en_cosine_precision@100": 0.30907894736842106,
|
711 |
+
"eval_full_en_cosine_precision@150": 0.26278508771929826,
|
712 |
+
"eval_full_en_cosine_precision@20": 0.4960526315789474,
|
713 |
+
"eval_full_en_cosine_precision@200": 0.23090460526315787,
|
714 |
+
"eval_full_en_cosine_precision@50": 0.39151315789473684,
|
715 |
+
"eval_full_en_cosine_recall@1": 0.010402156873475942,
|
716 |
+
"eval_full_en_cosine_recall@100": 0.39206565501916524,
|
717 |
+
"eval_full_en_cosine_recall@150": 0.49176955829136443,
|
718 |
+
"eval_full_en_cosine_recall@20": 0.1321996647113643,
|
719 |
+
"eval_full_en_cosine_recall@200": 0.569344104113959,
|
720 |
+
"eval_full_en_cosine_recall@50": 0.2535254041631645,
|
721 |
+
"eval_runtime": 1.5826,
|
722 |
+
"eval_samples_per_second": 0.0,
|
723 |
+
"eval_sequential_score": 0.5320416498978522,
|
724 |
+
"eval_steps_per_second": 0.0,
|
725 |
+
"step": 2400
|
726 |
+
},
|
727 |
+
{
|
728 |
+
"epoch": 2.790178571428571,
|
729 |
+
"grad_norm": 11.857823371887207,
|
730 |
+
"learning_rate": 2.3343515037593984e-05,
|
731 |
+
"loss": 0.9272,
|
732 |
+
"step": 2500
|
733 |
+
},
|
734 |
+
{
|
735 |
+
"epoch": 2.9017857142857144,
|
736 |
+
"grad_norm": 12.297764778137207,
|
737 |
+
"learning_rate": 2.2168703007518798e-05,
|
738 |
+
"loss": 0.9279,
|
739 |
+
"step": 2600
|
740 |
+
},
|
741 |
+
{
|
742 |
+
"epoch": 2.9017857142857144,
|
743 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
744 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
745 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
746 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
747 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
748 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
749 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
750 |
+
"eval_full_en_cosine_map@100": 0.20939105710550232,
|
751 |
+
"eval_full_en_cosine_map@150": 0.22725165687553775,
|
752 |
+
"eval_full_en_cosine_map@20": 0.3403680329074837,
|
753 |
+
"eval_full_en_cosine_map@200": 0.24658865195474836,
|
754 |
+
"eval_full_en_cosine_map@50": 0.23612691752121232,
|
755 |
+
"eval_full_en_cosine_map@500": 0.29718900909315255,
|
756 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
757 |
+
"eval_full_en_cosine_mrr@100": 0.8391709003546018,
|
758 |
+
"eval_full_en_cosine_mrr@150": 0.8391709003546018,
|
759 |
+
"eval_full_en_cosine_mrr@20": 0.8391064008705977,
|
760 |
+
"eval_full_en_cosine_mrr@200": 0.8391709003546018,
|
761 |
+
"eval_full_en_cosine_mrr@50": 0.8391064008705977,
|
762 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
763 |
+
"eval_full_en_cosine_ndcg@100": 0.4389185422351881,
|
764 |
+
"eval_full_en_cosine_ndcg@150": 0.4868646893605612,
|
765 |
+
"eval_full_en_cosine_ndcg@20": 0.5359014833764041,
|
766 |
+
"eval_full_en_cosine_ndcg@200": 0.5332804255738979,
|
767 |
+
"eval_full_en_cosine_ndcg@50": 0.44749591453362436,
|
768 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
769 |
+
"eval_full_en_cosine_precision@100": 0.30779605263157894,
|
770 |
+
"eval_full_en_cosine_precision@150": 0.26355263157894737,
|
771 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
772 |
+
"eval_full_en_cosine_precision@200": 0.2316282894736842,
|
773 |
+
"eval_full_en_cosine_precision@50": 0.38901315789473684,
|
774 |
+
"eval_full_en_cosine_recall@1": 0.010425572953236805,
|
775 |
+
"eval_full_en_cosine_recall@100": 0.3892001066901767,
|
776 |
+
"eval_full_en_cosine_recall@150": 0.492569756570653,
|
777 |
+
"eval_full_en_cosine_recall@20": 0.13284603422933672,
|
778 |
+
"eval_full_en_cosine_recall@200": 0.5706210722984945,
|
779 |
+
"eval_full_en_cosine_recall@50": 0.2518705529759721,
|
780 |
+
"eval_runtime": 1.577,
|
781 |
+
"eval_samples_per_second": 0.0,
|
782 |
+
"eval_sequential_score": 0.5332804255738979,
|
783 |
+
"eval_steps_per_second": 0.0,
|
784 |
+
"step": 2600
|
785 |
+
},
|
786 |
+
{
|
787 |
+
"epoch": 3.013392857142857,
|
788 |
+
"grad_norm": 12.120986938476562,
|
789 |
+
"learning_rate": 2.099389097744361e-05,
|
790 |
+
"loss": 0.857,
|
791 |
+
"step": 2700
|
792 |
+
},
|
793 |
+
{
|
794 |
+
"epoch": 3.125,
|
795 |
+
"grad_norm": 14.276410102844238,
|
796 |
+
"learning_rate": 1.9819078947368423e-05,
|
797 |
+
"loss": 0.7313,
|
798 |
+
"step": 2800
|
799 |
+
},
|
800 |
+
{
|
801 |
+
"epoch": 3.125,
|
802 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
803 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
804 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
805 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
806 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
807 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
808 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
809 |
+
"eval_full_en_cosine_map@100": 0.20830025965749158,
|
810 |
+
"eval_full_en_cosine_map@150": 0.22525408557521698,
|
811 |
+
"eval_full_en_cosine_map@20": 0.34094306993307805,
|
812 |
+
"eval_full_en_cosine_map@200": 0.24400549054611867,
|
813 |
+
"eval_full_en_cosine_map@50": 0.23400685602624646,
|
814 |
+
"eval_full_en_cosine_map@500": 0.29401532392219154,
|
815 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
816 |
+
"eval_full_en_cosine_mrr@100": 0.8315051952798665,
|
817 |
+
"eval_full_en_cosine_mrr@150": 0.8315051952798665,
|
818 |
+
"eval_full_en_cosine_mrr@20": 0.8314268744778616,
|
819 |
+
"eval_full_en_cosine_mrr@200": 0.8315051952798665,
|
820 |
+
"eval_full_en_cosine_mrr@50": 0.8315051952798665,
|
821 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
822 |
+
"eval_full_en_cosine_ndcg@100": 0.43885977048304636,
|
823 |
+
"eval_full_en_cosine_ndcg@150": 0.48486671483618976,
|
824 |
+
"eval_full_en_cosine_ndcg@20": 0.5365677326031855,
|
825 |
+
"eval_full_en_cosine_ndcg@200": 0.5299990147795507,
|
826 |
+
"eval_full_en_cosine_ndcg@50": 0.44591298214905706,
|
827 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
828 |
+
"eval_full_en_cosine_precision@100": 0.308125,
|
829 |
+
"eval_full_en_cosine_precision@150": 0.2621052631578948,
|
830 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
831 |
+
"eval_full_en_cosine_precision@200": 0.22980263157894737,
|
832 |
+
"eval_full_en_cosine_precision@50": 0.3870394736842105,
|
833 |
+
"eval_full_en_cosine_recall@1": 0.010317820884117123,
|
834 |
+
"eval_full_en_cosine_recall@100": 0.38998825691236244,
|
835 |
+
"eval_full_en_cosine_recall@150": 0.4900687458798103,
|
836 |
+
"eval_full_en_cosine_recall@20": 0.13271573138828288,
|
837 |
+
"eval_full_en_cosine_recall@200": 0.5659226272090475,
|
838 |
+
"eval_full_en_cosine_recall@50": 0.25218483369820577,
|
839 |
+
"eval_runtime": 1.607,
|
840 |
+
"eval_samples_per_second": 0.0,
|
841 |
+
"eval_sequential_score": 0.5299990147795507,
|
842 |
+
"eval_steps_per_second": 0.0,
|
843 |
+
"step": 2800
|
844 |
+
},
|
845 |
+
{
|
846 |
+
"epoch": 3.236607142857143,
|
847 |
+
"grad_norm": 8.85190486907959,
|
848 |
+
"learning_rate": 1.8644266917293237e-05,
|
849 |
+
"loss": 0.7103,
|
850 |
+
"step": 2900
|
851 |
+
},
|
852 |
+
{
|
853 |
+
"epoch": 3.3482142857142856,
|
854 |
+
"grad_norm": 8.932626724243164,
|
855 |
+
"learning_rate": 1.7469454887218044e-05,
|
856 |
+
"loss": 0.7187,
|
857 |
+
"step": 3000
|
858 |
+
},
|
859 |
+
{
|
860 |
+
"epoch": 3.3482142857142856,
|
861 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
862 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
863 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
864 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
865 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
866 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
867 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
868 |
+
"eval_full_en_cosine_map@100": 0.20842370079433947,
|
869 |
+
"eval_full_en_cosine_map@150": 0.22608431932756923,
|
870 |
+
"eval_full_en_cosine_map@20": 0.34026464907579207,
|
871 |
+
"eval_full_en_cosine_map@200": 0.2451065024940476,
|
872 |
+
"eval_full_en_cosine_map@50": 0.23418777403622906,
|
873 |
+
"eval_full_en_cosine_map@500": 0.2945476002258968,
|
874 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
875 |
+
"eval_full_en_cosine_mrr@100": 0.8303256958684593,
|
876 |
+
"eval_full_en_cosine_mrr@150": 0.8303256958684593,
|
877 |
+
"eval_full_en_cosine_mrr@20": 0.830265887256019,
|
878 |
+
"eval_full_en_cosine_mrr@200": 0.8303256958684593,
|
879 |
+
"eval_full_en_cosine_mrr@50": 0.830265887256019,
|
880 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
881 |
+
"eval_full_en_cosine_ndcg@100": 0.4379203478644915,
|
882 |
+
"eval_full_en_cosine_ndcg@150": 0.4860723616469748,
|
883 |
+
"eval_full_en_cosine_ndcg@20": 0.534483012777908,
|
884 |
+
"eval_full_en_cosine_ndcg@200": 0.5318565059446251,
|
885 |
+
"eval_full_en_cosine_ndcg@50": 0.4443024102705765,
|
886 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
887 |
+
"eval_full_en_cosine_precision@100": 0.30750000000000005,
|
888 |
+
"eval_full_en_cosine_precision@150": 0.26370614035087714,
|
889 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
890 |
+
"eval_full_en_cosine_precision@200": 0.23116776315789475,
|
891 |
+
"eval_full_en_cosine_precision@50": 0.38539473684210523,
|
892 |
+
"eval_full_en_cosine_recall@1": 0.010298189290703101,
|
893 |
+
"eval_full_en_cosine_recall@100": 0.38891472258186655,
|
894 |
+
"eval_full_en_cosine_recall@150": 0.4925623824521817,
|
895 |
+
"eval_full_en_cosine_recall@20": 0.13215936080151625,
|
896 |
+
"eval_full_en_cosine_recall@200": 0.5698259119139981,
|
897 |
+
"eval_full_en_cosine_recall@50": 0.2502092759755724,
|
898 |
+
"eval_runtime": 1.6179,
|
899 |
+
"eval_samples_per_second": 0.0,
|
900 |
+
"eval_sequential_score": 0.5318565059446251,
|
901 |
+
"eval_steps_per_second": 0.0,
|
902 |
+
"step": 3000
|
903 |
+
},
|
904 |
+
{
|
905 |
+
"epoch": 3.4598214285714284,
|
906 |
+
"grad_norm": 12.761665344238281,
|
907 |
+
"learning_rate": 1.6294642857142858e-05,
|
908 |
+
"loss": 0.7067,
|
909 |
+
"step": 3100
|
910 |
+
},
|
911 |
+
{
|
912 |
+
"epoch": 3.571428571428571,
|
913 |
+
"grad_norm": 12.318887710571289,
|
914 |
+
"learning_rate": 1.5119830827067668e-05,
|
915 |
+
"loss": 0.7157,
|
916 |
+
"step": 3200
|
917 |
+
},
|
918 |
+
{
|
919 |
+
"epoch": 3.571428571428571,
|
920 |
+
"eval_full_en_cosine_accuracy@1": 0.7072368421052632,
|
921 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
922 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
923 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
924 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
925 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
926 |
+
"eval_full_en_cosine_map@1": 0.7072368421052632,
|
927 |
+
"eval_full_en_cosine_map@100": 0.21126096647489126,
|
928 |
+
"eval_full_en_cosine_map@150": 0.22897332387217115,
|
929 |
+
"eval_full_en_cosine_map@20": 0.34020926250086975,
|
930 |
+
"eval_full_en_cosine_map@200": 0.24883265008518762,
|
931 |
+
"eval_full_en_cosine_map@50": 0.2366562995235259,
|
932 |
+
"eval_full_en_cosine_map@500": 0.30009134506130936,
|
933 |
+
"eval_full_en_cosine_mrr@1": 0.7072368421052632,
|
934 |
+
"eval_full_en_cosine_mrr@100": 0.8208446325794724,
|
935 |
+
"eval_full_en_cosine_mrr@150": 0.8208446325794724,
|
936 |
+
"eval_full_en_cosine_mrr@20": 0.8206285125693021,
|
937 |
+
"eval_full_en_cosine_mrr@200": 0.8208446325794724,
|
938 |
+
"eval_full_en_cosine_mrr@50": 0.8208446325794724,
|
939 |
+
"eval_full_en_cosine_ndcg@1": 0.7072368421052632,
|
940 |
+
"eval_full_en_cosine_ndcg@100": 0.4420871692985379,
|
941 |
+
"eval_full_en_cosine_ndcg@150": 0.48983718804719595,
|
942 |
+
"eval_full_en_cosine_ndcg@20": 0.5349182539944062,
|
943 |
+
"eval_full_en_cosine_ndcg@200": 0.5368995914478877,
|
944 |
+
"eval_full_en_cosine_ndcg@50": 0.4481578438397021,
|
945 |
+
"eval_full_en_cosine_precision@1": 0.7072368421052632,
|
946 |
+
"eval_full_en_cosine_precision@100": 0.3118421052631579,
|
947 |
+
"eval_full_en_cosine_precision@150": 0.26625,
|
948 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
949 |
+
"eval_full_en_cosine_precision@200": 0.2341282894736842,
|
950 |
+
"eval_full_en_cosine_precision@50": 0.39125,
|
951 |
+
"eval_full_en_cosine_recall@1": 0.010071368365416018,
|
952 |
+
"eval_full_en_cosine_recall@100": 0.39435465355460575,
|
953 |
+
"eval_full_en_cosine_recall@150": 0.49776297598034985,
|
954 |
+
"eval_full_en_cosine_recall@20": 0.1332224887798492,
|
955 |
+
"eval_full_en_cosine_recall@200": 0.5769437157052201,
|
956 |
+
"eval_full_en_cosine_recall@50": 0.25406609475829245,
|
957 |
+
"eval_runtime": 1.5833,
|
958 |
+
"eval_samples_per_second": 0.0,
|
959 |
+
"eval_sequential_score": 0.5368995914478877,
|
960 |
+
"eval_steps_per_second": 0.0,
|
961 |
+
"step": 3200
|
962 |
+
},
|
963 |
+
{
|
964 |
+
"epoch": 3.6830357142857144,
|
965 |
+
"grad_norm": 10.974320411682129,
|
966 |
+
"learning_rate": 1.3945018796992482e-05,
|
967 |
+
"loss": 0.7113,
|
968 |
+
"step": 3300
|
969 |
+
},
|
970 |
+
{
|
971 |
+
"epoch": 3.794642857142857,
|
972 |
+
"grad_norm": 11.004631042480469,
|
973 |
+
"learning_rate": 1.2770206766917295e-05,
|
974 |
+
"loss": 0.7013,
|
975 |
+
"step": 3400
|
976 |
+
},
|
977 |
+
{
|
978 |
+
"epoch": 3.794642857142857,
|
979 |
+
"eval_full_en_cosine_accuracy@1": 0.7269736842105263,
|
980 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
981 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
982 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
983 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
984 |
+
"eval_full_en_cosine_accuracy@50": 1.0,
|
985 |
+
"eval_full_en_cosine_map@1": 0.7269736842105263,
|
986 |
+
"eval_full_en_cosine_map@100": 0.20998333195374114,
|
987 |
+
"eval_full_en_cosine_map@150": 0.22683318021248486,
|
988 |
+
"eval_full_en_cosine_map@20": 0.34034679376659244,
|
989 |
+
"eval_full_en_cosine_map@200": 0.24654495691213385,
|
990 |
+
"eval_full_en_cosine_map@50": 0.23617479010012724,
|
991 |
+
"eval_full_en_cosine_map@500": 0.29617185416029185,
|
992 |
+
"eval_full_en_cosine_mrr@1": 0.7269736842105263,
|
993 |
+
"eval_full_en_cosine_mrr@100": 0.8291805255603549,
|
994 |
+
"eval_full_en_cosine_mrr@150": 0.8291805255603549,
|
995 |
+
"eval_full_en_cosine_mrr@20": 0.8291105367585632,
|
996 |
+
"eval_full_en_cosine_mrr@200": 0.8291805255603549,
|
997 |
+
"eval_full_en_cosine_mrr@50": 0.8291805255603549,
|
998 |
+
"eval_full_en_cosine_ndcg@1": 0.7269736842105263,
|
999 |
+
"eval_full_en_cosine_ndcg@100": 0.4407299508694298,
|
1000 |
+
"eval_full_en_cosine_ndcg@150": 0.48655314671133576,
|
1001 |
+
"eval_full_en_cosine_ndcg@20": 0.5349966588302529,
|
1002 |
+
"eval_full_en_cosine_ndcg@200": 0.5341334488223752,
|
1003 |
+
"eval_full_en_cosine_ndcg@50": 0.448065635044085,
|
1004 |
+
"eval_full_en_cosine_precision@1": 0.7269736842105263,
|
1005 |
+
"eval_full_en_cosine_precision@100": 0.30973684210526314,
|
1006 |
+
"eval_full_en_cosine_precision@150": 0.26320175438596494,
|
1007 |
+
"eval_full_en_cosine_precision@20": 0.4965460526315789,
|
1008 |
+
"eval_full_en_cosine_precision@200": 0.23210526315789473,
|
1009 |
+
"eval_full_en_cosine_precision@50": 0.3907894736842106,
|
1010 |
+
"eval_full_en_cosine_recall@1": 0.010311461817674684,
|
1011 |
+
"eval_full_en_cosine_recall@100": 0.3931693265429022,
|
1012 |
+
"eval_full_en_cosine_recall@150": 0.49300140763214356,
|
1013 |
+
"eval_full_en_cosine_recall@20": 0.1329270784727238,
|
1014 |
+
"eval_full_en_cosine_recall@200": 0.573228327517634,
|
1015 |
+
"eval_full_en_cosine_recall@50": 0.25330386821616296,
|
1016 |
+
"eval_runtime": 1.577,
|
1017 |
+
"eval_samples_per_second": 0.0,
|
1018 |
+
"eval_sequential_score": 0.5341334488223752,
|
1019 |
+
"eval_steps_per_second": 0.0,
|
1020 |
+
"step": 3400
|
1021 |
+
},
|
1022 |
+
{
|
1023 |
+
"epoch": 3.90625,
|
1024 |
+
"grad_norm": 12.102640151977539,
|
1025 |
+
"learning_rate": 1.1595394736842107e-05,
|
1026 |
+
"loss": 0.6903,
|
1027 |
+
"step": 3500
|
1028 |
+
},
|
1029 |
+
{
|
1030 |
+
"epoch": 4.017857142857143,
|
1031 |
+
"grad_norm": 7.348757743835449,
|
1032 |
+
"learning_rate": 1.0420582706766918e-05,
|
1033 |
+
"loss": 0.6462,
|
1034 |
+
"step": 3600
|
1035 |
+
},
|
1036 |
+
{
|
1037 |
+
"epoch": 4.017857142857143,
|
1038 |
+
"eval_full_en_cosine_accuracy@1": 0.7203947368421053,
|
1039 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1040 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1041 |
+
"eval_full_en_cosine_accuracy@20": 0.9967105263157895,
|
1042 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1043 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1044 |
+
"eval_full_en_cosine_map@1": 0.7203947368421053,
|
1045 |
+
"eval_full_en_cosine_map@100": 0.2102732775077637,
|
1046 |
+
"eval_full_en_cosine_map@150": 0.22767943965852241,
|
1047 |
+
"eval_full_en_cosine_map@20": 0.338502447126724,
|
1048 |
+
"eval_full_en_cosine_map@200": 0.24667619158922902,
|
1049 |
+
"eval_full_en_cosine_map@50": 0.23576300870587916,
|
1050 |
+
"eval_full_en_cosine_map@500": 0.2971463650911015,
|
1051 |
+
"eval_full_en_cosine_mrr@1": 0.7203947368421053,
|
1052 |
+
"eval_full_en_cosine_mrr@100": 0.8263833835420962,
|
1053 |
+
"eval_full_en_cosine_mrr@150": 0.8263833835420962,
|
1054 |
+
"eval_full_en_cosine_mrr@20": 0.8263213180008847,
|
1055 |
+
"eval_full_en_cosine_mrr@200": 0.8263833835420962,
|
1056 |
+
"eval_full_en_cosine_mrr@50": 0.8263213180008847,
|
1057 |
+
"eval_full_en_cosine_ndcg@1": 0.7203947368421053,
|
1058 |
+
"eval_full_en_cosine_ndcg@100": 0.44114478517461736,
|
1059 |
+
"eval_full_en_cosine_ndcg@150": 0.4883455168714466,
|
1060 |
+
"eval_full_en_cosine_ndcg@20": 0.53288860900767,
|
1061 |
+
"eval_full_en_cosine_ndcg@200": 0.5334866046140189,
|
1062 |
+
"eval_full_en_cosine_ndcg@50": 0.4473951526251337,
|
1063 |
+
"eval_full_en_cosine_precision@1": 0.7203947368421053,
|
1064 |
+
"eval_full_en_cosine_precision@100": 0.31078947368421056,
|
1065 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1066 |
+
"eval_full_en_cosine_precision@20": 0.4934210526315789,
|
1067 |
+
"eval_full_en_cosine_precision@200": 0.23212171052631578,
|
1068 |
+
"eval_full_en_cosine_precision@50": 0.3899342105263158,
|
1069 |
+
"eval_full_en_cosine_recall@1": 0.01018155854728512,
|
1070 |
+
"eval_full_en_cosine_recall@100": 0.3935816727444405,
|
1071 |
+
"eval_full_en_cosine_recall@150": 0.4958028561341766,
|
1072 |
+
"eval_full_en_cosine_recall@20": 0.13181077303144853,
|
1073 |
+
"eval_full_en_cosine_recall@200": 0.5716317929962068,
|
1074 |
+
"eval_full_en_cosine_recall@50": 0.25274553753777246,
|
1075 |
+
"eval_runtime": 1.6024,
|
1076 |
+
"eval_samples_per_second": 0.0,
|
1077 |
+
"eval_sequential_score": 0.5334866046140189,
|
1078 |
+
"eval_steps_per_second": 0.0,
|
1079 |
+
"step": 3600
|
1080 |
+
},
|
1081 |
+
{
|
1082 |
+
"epoch": 4.129464285714286,
|
1083 |
+
"grad_norm": 8.786450386047363,
|
1084 |
+
"learning_rate": 9.24577067669173e-06,
|
1085 |
+
"loss": 0.5162,
|
1086 |
+
"step": 3700
|
1087 |
+
},
|
1088 |
+
{
|
1089 |
+
"epoch": 4.241071428571429,
|
1090 |
+
"grad_norm": 10.602435111999512,
|
1091 |
+
"learning_rate": 8.070958646616542e-06,
|
1092 |
+
"loss": 0.524,
|
1093 |
+
"step": 3800
|
1094 |
+
},
|
1095 |
+
{
|
1096 |
+
"epoch": 4.241071428571429,
|
1097 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1098 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1099 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1100 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1101 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1102 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1103 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1104 |
+
"eval_full_en_cosine_map@100": 0.21150798737582682,
|
1105 |
+
"eval_full_en_cosine_map@150": 0.22868847990327232,
|
1106 |
+
"eval_full_en_cosine_map@20": 0.3411525812655742,
|
1107 |
+
"eval_full_en_cosine_map@200": 0.2480155691306444,
|
1108 |
+
"eval_full_en_cosine_map@50": 0.23814436251631807,
|
1109 |
+
"eval_full_en_cosine_map@500": 0.29792672341621373,
|
1110 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1111 |
+
"eval_full_en_cosine_mrr@100": 0.8323485085820613,
|
1112 |
+
"eval_full_en_cosine_mrr@150": 0.8323485085820613,
|
1113 |
+
"eval_full_en_cosine_mrr@20": 0.8321467731829576,
|
1114 |
+
"eval_full_en_cosine_mrr@200": 0.8323485085820613,
|
1115 |
+
"eval_full_en_cosine_mrr@50": 0.832296294714058,
|
1116 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1117 |
+
"eval_full_en_cosine_ndcg@100": 0.44247378999755477,
|
1118 |
+
"eval_full_en_cosine_ndcg@150": 0.48886293038433404,
|
1119 |
+
"eval_full_en_cosine_ndcg@20": 0.5351701323930714,
|
1120 |
+
"eval_full_en_cosine_ndcg@200": 0.5352268343210608,
|
1121 |
+
"eval_full_en_cosine_ndcg@50": 0.4502625298651447,
|
1122 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1123 |
+
"eval_full_en_cosine_precision@100": 0.311546052631579,
|
1124 |
+
"eval_full_en_cosine_precision@150": 0.265219298245614,
|
1125 |
+
"eval_full_en_cosine_precision@20": 0.49588815789473684,
|
1126 |
+
"eval_full_en_cosine_precision@200": 0.23268092105263163,
|
1127 |
+
"eval_full_en_cosine_precision@50": 0.3930921052631579,
|
1128 |
+
"eval_full_en_cosine_recall@1": 0.010244630514181254,
|
1129 |
+
"eval_full_en_cosine_recall@100": 0.39498767852245736,
|
1130 |
+
"eval_full_en_cosine_recall@150": 0.49574169519464223,
|
1131 |
+
"eval_full_en_cosine_recall@20": 0.1324589336710221,
|
1132 |
+
"eval_full_en_cosine_recall@200": 0.574019804020236,
|
1133 |
+
"eval_full_en_cosine_recall@50": 0.2548099607629461,
|
1134 |
+
"eval_runtime": 1.5919,
|
1135 |
+
"eval_samples_per_second": 0.0,
|
1136 |
+
"eval_sequential_score": 0.5352268343210608,
|
1137 |
+
"eval_steps_per_second": 0.0,
|
1138 |
+
"step": 3800
|
1139 |
+
},
|
1140 |
+
{
|
1141 |
+
"epoch": 4.352678571428571,
|
1142 |
+
"grad_norm": 11.65066909790039,
|
1143 |
+
"learning_rate": 6.896146616541354e-06,
|
1144 |
+
"loss": 0.5303,
|
1145 |
+
"step": 3900
|
1146 |
+
},
|
1147 |
+
{
|
1148 |
+
"epoch": 4.464285714285714,
|
1149 |
+
"grad_norm": 10.764215469360352,
|
1150 |
+
"learning_rate": 5.721334586466166e-06,
|
1151 |
+
"loss": 0.5269,
|
1152 |
+
"step": 4000
|
1153 |
+
},
|
1154 |
+
{
|
1155 |
+
"epoch": 4.464285714285714,
|
1156 |
+
"eval_full_en_cosine_accuracy@1": 0.7368421052631579,
|
1157 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1158 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1159 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1160 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1161 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1162 |
+
"eval_full_en_cosine_map@1": 0.7368421052631579,
|
1163 |
+
"eval_full_en_cosine_map@100": 0.2101198919267321,
|
1164 |
+
"eval_full_en_cosine_map@150": 0.2276536266469315,
|
1165 |
+
"eval_full_en_cosine_map@20": 0.34076177455520346,
|
1166 |
+
"eval_full_en_cosine_map@200": 0.24678319516569472,
|
1167 |
+
"eval_full_en_cosine_map@50": 0.23677969810249233,
|
1168 |
+
"eval_full_en_cosine_map@500": 0.297249372287514,
|
1169 |
+
"eval_full_en_cosine_mrr@1": 0.7368421052631579,
|
1170 |
+
"eval_full_en_cosine_mrr@100": 0.8373899157616261,
|
1171 |
+
"eval_full_en_cosine_mrr@150": 0.8373899157616261,
|
1172 |
+
"eval_full_en_cosine_mrr@20": 0.837172357978279,
|
1173 |
+
"eval_full_en_cosine_mrr@200": 0.8373899157616261,
|
1174 |
+
"eval_full_en_cosine_mrr@50": 0.837328999582289,
|
1175 |
+
"eval_full_en_cosine_ndcg@1": 0.7368421052631579,
|
1176 |
+
"eval_full_en_cosine_ndcg@100": 0.4408521323246635,
|
1177 |
+
"eval_full_en_cosine_ndcg@150": 0.48834055710549873,
|
1178 |
+
"eval_full_en_cosine_ndcg@20": 0.5353264293739176,
|
1179 |
+
"eval_full_en_cosine_ndcg@200": 0.5341206282180626,
|
1180 |
+
"eval_full_en_cosine_ndcg@50": 0.44939083758113085,
|
1181 |
+
"eval_full_en_cosine_precision@1": 0.7368421052631579,
|
1182 |
+
"eval_full_en_cosine_precision@100": 0.30953947368421053,
|
1183 |
+
"eval_full_en_cosine_precision@150": 0.26460526315789473,
|
1184 |
+
"eval_full_en_cosine_precision@20": 0.4947368421052632,
|
1185 |
+
"eval_full_en_cosine_precision@200": 0.23187500000000003,
|
1186 |
+
"eval_full_en_cosine_precision@50": 0.3913815789473684,
|
1187 |
+
"eval_full_en_cosine_recall@1": 0.010305566449078924,
|
1188 |
+
"eval_full_en_cosine_recall@100": 0.3922740640225546,
|
1189 |
+
"eval_full_en_cosine_recall@150": 0.4949163913773604,
|
1190 |
+
"eval_full_en_cosine_recall@20": 0.13233275450376297,
|
1191 |
+
"eval_full_en_cosine_recall@200": 0.572041877895568,
|
1192 |
+
"eval_full_en_cosine_recall@50": 0.2535655251683108,
|
1193 |
+
"eval_runtime": 1.5798,
|
1194 |
+
"eval_samples_per_second": 0.0,
|
1195 |
+
"eval_sequential_score": 0.5341206282180626,
|
1196 |
+
"eval_steps_per_second": 0.0,
|
1197 |
+
"step": 4000
|
1198 |
+
},
|
1199 |
+
{
|
1200 |
+
"epoch": 4.575892857142857,
|
1201 |
+
"grad_norm": 10.328286170959473,
|
1202 |
+
"learning_rate": 4.546522556390977e-06,
|
1203 |
+
"loss": 0.4824,
|
1204 |
+
"step": 4100
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"epoch": 4.6875,
|
1208 |
+
"grad_norm": 10.712604522705078,
|
1209 |
+
"learning_rate": 3.3717105263157897e-06,
|
1210 |
+
"loss": 0.5222,
|
1211 |
+
"step": 4200
|
1212 |
+
},
|
1213 |
+
{
|
1214 |
+
"epoch": 4.6875,
|
1215 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1216 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1217 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1218 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1219 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1220 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1221 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1222 |
+
"eval_full_en_cosine_map@100": 0.21090472549603356,
|
1223 |
+
"eval_full_en_cosine_map@150": 0.22845220292726734,
|
1224 |
+
"eval_full_en_cosine_map@20": 0.342326318294358,
|
1225 |
+
"eval_full_en_cosine_map@200": 0.24733168088568283,
|
1226 |
+
"eval_full_en_cosine_map@50": 0.23774626029530496,
|
1227 |
+
"eval_full_en_cosine_map@500": 0.2977609786459198,
|
1228 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1229 |
+
"eval_full_en_cosine_mrr@100": 0.8356281328320803,
|
1230 |
+
"eval_full_en_cosine_mrr@150": 0.8356281328320803,
|
1231 |
+
"eval_full_en_cosine_mrr@20": 0.8354127506265665,
|
1232 |
+
"eval_full_en_cosine_mrr@200": 0.8356281328320803,
|
1233 |
+
"eval_full_en_cosine_mrr@50": 0.8355693922305765,
|
1234 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1235 |
+
"eval_full_en_cosine_ndcg@100": 0.44188546614809043,
|
1236 |
+
"eval_full_en_cosine_ndcg@150": 0.48899866366733713,
|
1237 |
+
"eval_full_en_cosine_ndcg@20": 0.5372965681233445,
|
1238 |
+
"eval_full_en_cosine_ndcg@200": 0.5342393130950145,
|
1239 |
+
"eval_full_en_cosine_ndcg@50": 0.4501724823363586,
|
1240 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1241 |
+
"eval_full_en_cosine_precision@100": 0.3101973684210526,
|
1242 |
+
"eval_full_en_cosine_precision@150": 0.2649780701754386,
|
1243 |
+
"eval_full_en_cosine_precision@20": 0.49786184210526313,
|
1244 |
+
"eval_full_en_cosine_precision@200": 0.23199013157894738,
|
1245 |
+
"eval_full_en_cosine_precision@50": 0.3921710526315789,
|
1246 |
+
"eval_full_en_cosine_recall@1": 0.01024195976751409,
|
1247 |
+
"eval_full_en_cosine_recall@100": 0.3937143686320033,
|
1248 |
+
"eval_full_en_cosine_recall@150": 0.4954717634968576,
|
1249 |
+
"eval_full_en_cosine_recall@20": 0.13293505289394864,
|
1250 |
+
"eval_full_en_cosine_recall@200": 0.5715937768635994,
|
1251 |
+
"eval_full_en_cosine_recall@50": 0.2542542782427721,
|
1252 |
+
"eval_runtime": 1.579,
|
1253 |
+
"eval_samples_per_second": 0.0,
|
1254 |
+
"eval_sequential_score": 0.5342393130950145,
|
1255 |
+
"eval_steps_per_second": 0.0,
|
1256 |
+
"step": 4200
|
1257 |
+
},
|
1258 |
+
{
|
1259 |
+
"epoch": 4.799107142857143,
|
1260 |
+
"grad_norm": 12.404372215270996,
|
1261 |
+
"learning_rate": 2.1968984962406015e-06,
|
1262 |
+
"loss": 0.5104,
|
1263 |
+
"step": 4300
|
1264 |
+
},
|
1265 |
+
{
|
1266 |
+
"epoch": 4.910714285714286,
|
1267 |
+
"grad_norm": 12.05720329284668,
|
1268 |
+
"learning_rate": 1.0220864661654136e-06,
|
1269 |
+
"loss": 0.5002,
|
1270 |
+
"step": 4400
|
1271 |
+
},
|
1272 |
+
{
|
1273 |
+
"epoch": 4.910714285714286,
|
1274 |
+
"eval_full_en_cosine_accuracy@1": 0.7302631578947368,
|
1275 |
+
"eval_full_en_cosine_accuracy@100": 1.0,
|
1276 |
+
"eval_full_en_cosine_accuracy@150": 1.0,
|
1277 |
+
"eval_full_en_cosine_accuracy@20": 0.993421052631579,
|
1278 |
+
"eval_full_en_cosine_accuracy@200": 1.0,
|
1279 |
+
"eval_full_en_cosine_accuracy@50": 0.9967105263157895,
|
1280 |
+
"eval_full_en_cosine_map@1": 0.7302631578947368,
|
1281 |
+
"eval_full_en_cosine_map@100": 0.21161540263537876,
|
1282 |
+
"eval_full_en_cosine_map@150": 0.22899252179487295,
|
1283 |
+
"eval_full_en_cosine_map@20": 0.3434603918412553,
|
1284 |
+
"eval_full_en_cosine_map@200": 0.24784282323083537,
|
1285 |
+
"eval_full_en_cosine_map@50": 0.23779270403918282,
|
1286 |
+
"eval_full_en_cosine_map@500": 0.298154972004029,
|
1287 |
+
"eval_full_en_cosine_mrr@1": 0.7302631578947368,
|
1288 |
+
"eval_full_en_cosine_mrr@100": 0.8343905966424682,
|
1289 |
+
"eval_full_en_cosine_mrr@150": 0.8343905966424682,
|
1290 |
+
"eval_full_en_cosine_mrr@20": 0.8341772399749373,
|
1291 |
+
"eval_full_en_cosine_mrr@200": 0.8343905966424682,
|
1292 |
+
"eval_full_en_cosine_mrr@50": 0.8343338815789473,
|
1293 |
+
"eval_full_en_cosine_ndcg@1": 0.7302631578947368,
|
1294 |
+
"eval_full_en_cosine_ndcg@100": 0.44277699637488865,
|
1295 |
+
"eval_full_en_cosine_ndcg@150": 0.4895063673734854,
|
1296 |
+
"eval_full_en_cosine_ndcg@20": 0.5384654647855256,
|
1297 |
+
"eval_full_en_cosine_ndcg@200": 0.5346148440105628,
|
1298 |
+
"eval_full_en_cosine_ndcg@50": 0.44986527953229877,
|
1299 |
+
"eval_full_en_cosine_precision@1": 0.7302631578947368,
|
1300 |
+
"eval_full_en_cosine_precision@100": 0.3111842105263158,
|
1301 |
+
"eval_full_en_cosine_precision@150": 0.2652412280701754,
|
1302 |
+
"eval_full_en_cosine_precision@20": 0.4998355263157894,
|
1303 |
+
"eval_full_en_cosine_precision@200": 0.232171052631579,
|
1304 |
+
"eval_full_en_cosine_precision@50": 0.39184210526315794,
|
1305 |
+
"eval_full_en_cosine_recall@1": 0.010227350724729817,
|
1306 |
+
"eval_full_en_cosine_recall@100": 0.3948435268881245,
|
1307 |
+
"eval_full_en_cosine_recall@150": 0.49626849018850344,
|
1308 |
+
"eval_full_en_cosine_recall@20": 0.13368254620254577,
|
1309 |
+
"eval_full_en_cosine_recall@200": 0.5720837677245543,
|
1310 |
+
"eval_full_en_cosine_recall@50": 0.2541249933594102,
|
1311 |
+
"eval_runtime": 1.5839,
|
1312 |
+
"eval_samples_per_second": 0.0,
|
1313 |
+
"eval_sequential_score": 0.5346148440105628,
|
1314 |
+
"eval_steps_per_second": 0.0,
|
1315 |
+
"step": 4400
|
1316 |
+
}
|
1317 |
+
],
|
1318 |
+
"logging_steps": 100,
|
1319 |
+
"max_steps": 4480,
|
1320 |
+
"num_input_tokens_seen": 0,
|
1321 |
+
"num_train_epochs": 5,
|
1322 |
+
"save_steps": 200,
|
1323 |
+
"stateful_callbacks": {
|
1324 |
+
"TrainerControl": {
|
1325 |
+
"args": {
|
1326 |
+
"should_epoch_stop": false,
|
1327 |
+
"should_evaluate": false,
|
1328 |
+
"should_log": false,
|
1329 |
+
"should_save": true,
|
1330 |
+
"should_training_stop": true
|
1331 |
+
},
|
1332 |
+
"attributes": {}
|
1333 |
+
}
|
1334 |
+
},
|
1335 |
+
"total_flos": 0.0,
|
1336 |
+
"train_batch_size": 64,
|
1337 |
+
"trial_name": null,
|
1338 |
+
"trial_params": null
|
1339 |
+
}
|
checkpoint-4480/vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
eval/Information-Retrieval_evaluation_full_en_results.csv
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
epoch,steps,cosine-Accuracy@1,cosine-Accuracy@20,cosine-Accuracy@50,cosine-Accuracy@100,cosine-Accuracy@150,cosine-Accuracy@200,cosine-Precision@1,cosine-Recall@1,cosine-Precision@20,cosine-Recall@20,cosine-Precision@50,cosine-Recall@50,cosine-Precision@100,cosine-Recall@100,cosine-Precision@150,cosine-Recall@150,cosine-Precision@200,cosine-Recall@200,cosine-MRR@1,cosine-MRR@20,cosine-MRR@50,cosine-MRR@100,cosine-MRR@150,cosine-MRR@200,cosine-NDCG@1,cosine-NDCG@20,cosine-NDCG@50,cosine-NDCG@100,cosine-NDCG@150,cosine-NDCG@200,cosine-MAP@1,cosine-MAP@20,cosine-MAP@50,cosine-MAP@100,cosine-MAP@150,cosine-MAP@200,cosine-MAP@500
|
2 |
+
0.22321428571428573,200,0.7467105263157895,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7467105263157895,0.010753343030902496,0.4965460526315789,0.13279013317825217,0.3904605263157895,0.25254843470147753,0.31240131578947367,0.39446255566624855,0.26592105263157895,0.49544823712709557,0.23370065789473685,0.5739614992682516,0.7467105263157895,0.8458948032961192,0.8460122844991269,0.8460592769803298,0.8460592769803298,0.8460592769803298,0.7467105263157895,0.5367541274871807,0.448683811733402,0.4430509248084704,0.4894828917681416,0.5361903606133726,0.7467105263157895,0.34167650006204187,0.237336657426832,0.2121058701298033,0.2294109301872967,0.2492171685943861,0.3000288940307502
|
3 |
+
0.44642857142857145,400,0.7368421052631579,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7368421052631579,0.010619007443519193,0.4875,0.1301764615450556,0.38782894736842105,0.2518199886564403,0.3088157894736842,0.3902042311088277,0.2644517543859649,0.4925745165667779,0.23172697368421055,0.5696006364444781,0.7368421052631579,0.8392713554720135,0.8393810045948205,0.8394156306336016,0.8394156306336016,0.8394156306336016,0.7368421052631579,0.5288083416910968,0.4453338982563473,0.43855475512592684,0.48609390907359196,0.5318117937684201,0.7368421052631579,0.3349832137166454,0.23473921202287384,0.2088144416212806,0.22677217670719133,0.245946497368659,0.2973985707303743
|
4 |
+
0.6696428571428571,600,0.7302631578947368,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7302631578947368,0.010303516134180577,0.49720394736842105,0.13302896177814508,0.3932894736842105,0.254528957048419,0.31358552631578945,0.3970033142271577,0.2677412280701754,0.5001101850184368,0.23452302631578953,0.5777429812058247,0.7302631578947368,0.8304491697994989,0.8306058114035089,0.8306572094298247,0.8306572094298247,0.8306572094298247,0.7302631578947368,0.5357880041966661,0.4504820590447715,0.4445617284976941,0.4922393935902775,0.5383209000398446,0.7302631578947368,0.3429678297332613,0.2404899713826549,0.2146410944227793,0.23271596511985665,0.2520997707361607,0.302904619520322
|
5 |
+
0.8928571428571429,800,0.7368421052631579,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7368421052631579,0.01051277780149725,0.5,0.13328036442285973,0.39335526315789476,0.254129727850083,0.3099342105263158,0.39158535797000443,0.26390350877192986,0.4917399858788313,0.23320723684210526,0.5734492892933252,0.7368421052631579,0.8402307852965749,0.8403738058915406,0.8404268619187053,0.8404268619187053,0.8404268619187053,0.7368421052631579,0.5383903905850532,0.45046850998342597,0.440670430732987,0.486778222456143,0.5352292016764449,0.7368421052631579,0.3442880676713117,0.23827484272575025,0.21030614519224017,0.22737063252522982,0.24764067563282596,0.2987091429260604
|
6 |
+
1.1160714285714286,1000,0.7401315789473685,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7401315789473685,0.010392607884295562,0.49243421052631575,0.13107623492706288,0.3921052631578947,0.2539746341397596,0.3114473684210526,0.3933254279416559,0.266469298245614,0.4957503189606009,0.2345888157894737,0.5753954619760326,0.7401315789473685,0.8403143274853806,0.840463849016481,0.8405236576289212,0.8405236576289212,0.8405236576289212,0.7401315789473685,0.5332180756481385,0.44979391873656477,0.44212858816477746,0.48946706445562127,0.5367929588661781,0.7401315789473685,0.3373673798048492,0.2376950112180141,0.21155466872463927,0.2291636549745022,0.24905074192004603,0.3006802538137734
|
7 |
+
1.3392857142857144,1200,0.7203947368421053,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7203947368421053,0.010318104890368607,0.4925986842105263,0.13139326985918445,0.3867105263157895,0.2506285703289517,0.3039802631578947,0.385615965839615,0.25999999999999995,0.48656381032984825,0.22763157894736838,0.5617757383007209,0.7203947368421053,0.8320620443153339,0.8322050649102997,0.8322617799738206,0.8322617799738206,0.8322617799738206,0.7203947368421053,0.531477407982968,0.4444057356887903,0.43445871937106545,0.48130417146010107,0.5259375639543232,0.7203947368421053,0.3363904557549852,0.23370113464760453,0.20552277525856266,0.22274311961933413,0.24106738760441354,0.28981293048421486
|
8 |
+
1.5625,1400,0.7171052631578947,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7171052631578947,0.010208074045806198,0.4947368421052632,0.13255572846134298,0.38769736842105257,0.25126941591084845,0.3074671052631579,0.3902466549235702,0.2625657894736842,0.49226776551348056,0.23016447368421053,0.5680994353864672,0.7171052631578947,0.8265913362952838,0.8267343568902494,0.8267713172687238,0.8267713172687238,0.8267713172687238,0.7171052631578947,0.5331724259953773,0.4451308688476405,0.4377486787968229,0.4850669425848544,0.5302927064126869,0.7171052631578947,0.34006318172507877,0.23483789231739935,0.20833018055660496,0.22583322401021033,0.24462161151730188,0.2946124561805931
|
9 |
+
1.7857142857142856,1600,0.7072368421052632,0.9967105263157895,0.9967105263157895,1.0,1.0,1.0,0.7072368421052632,0.010122149362902188,0.4935855263157895,0.13108496301513997,0.38763157894736844,0.25093448303772187,0.3098026315789474,0.39236988612007834,0.26274122807017547,0.4910778378543689,0.23192434210526314,0.5709689534914331,0.7072368421052632,0.8213699371448987,0.8213699371448987,0.8214137967940215,0.8214137967940215,0.8214137967940215,0.7072368421052632,0.532792025753163,0.4452189433184465,0.4396726832556684,0.4847816359827512,0.5323403273572274,0.7072368421052632,0.34034356587585846,0.23464702413938254,0.20923239071614674,0.225604138471006,0.24539737099429304,0.29597166286299953
|
10 |
+
2.0089285714285716,1800,0.7368421052631579,0.9967105263157895,0.9967105263157895,1.0,1.0,1.0,0.7368421052631579,0.010440810366523372,0.49769736842105267,0.13228070304056636,0.3891447368421053,0.25248213212752935,0.3084539473684211,0.39036009395952986,0.2627631578947368,0.49041982254882954,0.2314309210526316,0.5704962189819233,0.7368421052631579,0.8393426686233129,0.8393426686233129,0.8394024772357531,0.8394024772357531,0.8394024772357531,0.7368421052631579,0.5375317893335387,0.44810398395306655,0.4396519841053572,0.4856325134708184,0.533015167774829,0.7368421052631579,0.3433147887298301,0.23714915519951082,0.2098412194483687,0.22663911455304064,0.24620266722190678,0.29690932859887553
|
11 |
+
2.232142857142857,2000,0.7236842105263158,0.9967105263157895,1.0,1.0,1.0,1.0,0.7236842105263158,0.010329446437905086,0.4916118421052632,0.131078016933875,0.3886842105263158,0.252357645205228,0.30644736842105263,0.38885062846601265,0.259890350877193,0.4854595951837256,0.2280921052631579,0.5630724982932908,0.7236842105263158,0.8324781304222094,0.8325452625382137,0.8325452625382137,0.8325452625382137,0.8325452625382137,0.7236842105263158,0.5323035546433559,0.44660441452063837,0.4376001104057169,0.48181431955382,0.5276663014224582,0.7236842105263158,0.3380596885262807,0.23452814948810471,0.20775225168018954,0.22393096419950168,0.24259765295506924,0.2920026964508484
|
12 |
+
2.455357142857143,2200,0.7236842105263158,0.993421052631579,1.0,1.0,1.0,1.0,0.7236842105263158,0.010284539147879572,0.49588815789473684,0.13200577828629578,0.39052631578947367,0.25310992970173135,0.3099671052631579,0.39296182819932773,0.2648464912280702,0.4959148528891931,0.23342105263157892,0.5749370249014907,0.7236842105263158,0.8316833751044278,0.8318935359231412,0.8318935359231412,0.8318935359231412,0.8318935359231412,0.7236842105263158,0.5350320556020238,0.44803994906340594,0.44076958126493176,0.48838061313116793,0.5355574509263721,0.7236842105263158,0.34078157961918865,0.2365248444512811,0.209953160245849,0.22760030144833215,0.24749824184265867,0.29789431690676116
|
13 |
+
2.678571428571429,2400,0.7335526315789473,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7335526315789473,0.010402156873475942,0.4960526315789474,0.1321996647113643,0.39151315789473684,0.2535254041631645,0.30907894736842106,0.39206565501916524,0.26278508771929826,0.49176955829136443,0.23090460526315787,0.569344104113959,0.7335526315789473,0.8311351294903929,0.8312917710944029,0.83135268727374,0.83135268727374,0.83135268727374,0.7335526315789473,0.5344170691501652,0.4485020943766835,0.4400577813719261,0.4859220111165228,0.5320416498978522,0.7335526315789473,0.34004090105732804,0.23672594782424658,0.20983286336268822,0.22675852672419078,0.24584993568226646,0.29632183596698103
|
14 |
+
2.9017857142857144,2600,0.7368421052631579,0.9967105263157895,0.9967105263157895,1.0,1.0,1.0,0.7368421052631579,0.010425572953236805,0.49588815789473684,0.13284603422933672,0.38901315789473684,0.2518705529759721,0.30779605263157894,0.3892001066901767,0.26355263157894737,0.492569756570653,0.2316282894736842,0.5706210722984945,0.7368421052631579,0.8391064008705977,0.8391064008705977,0.8391709003546018,0.8391709003546018,0.8391709003546018,0.7368421052631579,0.5359014833764041,0.44749591453362436,0.4389185422351881,0.4868646893605612,0.5332804255738979,0.7368421052631579,0.3403680329074837,0.23612691752121232,0.20939105710550232,0.22725165687553775,0.24658865195474836,0.29718900909315255
|
15 |
+
3.125,2800,0.7269736842105263,0.9967105263157895,1.0,1.0,1.0,1.0,0.7269736842105263,0.010317820884117123,0.49786184210526313,0.13271573138828288,0.3870394736842105,0.25218483369820577,0.308125,0.38998825691236244,0.2621052631578948,0.4900687458798103,0.22980263157894737,0.5659226272090475,0.7269736842105263,0.8314268744778616,0.8315051952798665,0.8315051952798665,0.8315051952798665,0.8315051952798665,0.7269736842105263,0.5365677326031855,0.44591298214905706,0.43885977048304636,0.48486671483618976,0.5299990147795507,0.7269736842105263,0.34094306993307805,0.23400685602624646,0.20830025965749158,0.22525408557521698,0.24400549054611867,0.29401532392219154
|
16 |
+
3.3482142857142856,3000,0.7269736842105263,0.9967105263157895,0.9967105263157895,1.0,1.0,1.0,0.7269736842105263,0.010298189290703101,0.49588815789473684,0.13215936080151625,0.38539473684210523,0.2502092759755724,0.30750000000000005,0.38891472258186655,0.26370614035087714,0.4925623824521817,0.23116776315789475,0.5698259119139981,0.7269736842105263,0.830265887256019,0.830265887256019,0.8303256958684593,0.8303256958684593,0.8303256958684593,0.7269736842105263,0.534483012777908,0.4443024102705765,0.4379203478644915,0.4860723616469748,0.5318565059446251,0.7269736842105263,0.34026464907579207,0.23418777403622906,0.20842370079433947,0.22608431932756923,0.2451065024940476,0.2945476002258968
|
17 |
+
3.571428571428571,3200,0.7072368421052632,0.993421052631579,1.0,1.0,1.0,1.0,0.7072368421052632,0.010071368365416018,0.49786184210526313,0.1332224887798492,0.39125,0.25406609475829245,0.3118421052631579,0.39435465355460575,0.26625,0.49776297598034985,0.2341282894736842,0.5769437157052201,0.7072368421052632,0.8206285125693021,0.8208446325794724,0.8208446325794724,0.8208446325794724,0.8208446325794724,0.7072368421052632,0.5349182539944062,0.4481578438397021,0.4420871692985379,0.48983718804719595,0.5368995914478877,0.7072368421052632,0.34020926250086975,0.2366562995235259,0.21126096647489126,0.22897332387217115,0.24883265008518762,0.30009134506130936
|
18 |
+
3.794642857142857,3400,0.7269736842105263,0.9967105263157895,1.0,1.0,1.0,1.0,0.7269736842105263,0.010311461817674684,0.4965460526315789,0.1329270784727238,0.3907894736842106,0.25330386821616296,0.30973684210526314,0.3931693265429022,0.26320175438596494,0.49300140763214356,0.23210526315789473,0.573228327517634,0.7269736842105263,0.8291105367585632,0.8291805255603549,0.8291805255603549,0.8291805255603549,0.8291805255603549,0.7269736842105263,0.5349966588302529,0.448065635044085,0.4407299508694298,0.48655314671133576,0.5341334488223752,0.7269736842105263,0.34034679376659244,0.23617479010012724,0.20998333195374114,0.22683318021248486,0.24654495691213385,0.29617185416029185
|
19 |
+
4.017857142857143,3600,0.7203947368421053,0.9967105263157895,0.9967105263157895,1.0,1.0,1.0,0.7203947368421053,0.01018155854728512,0.4934210526315789,0.13181077303144853,0.3899342105263158,0.25274553753777246,0.31078947368421056,0.3935816727444405,0.265219298245614,0.4958028561341766,0.23212171052631578,0.5716317929962068,0.7203947368421053,0.8263213180008847,0.8263213180008847,0.8263833835420962,0.8263833835420962,0.8263833835420962,0.7203947368421053,0.53288860900767,0.4473951526251337,0.44114478517461736,0.4883455168714466,0.5334866046140189,0.7203947368421053,0.338502447126724,0.23576300870587916,0.2102732775077637,0.22767943965852241,0.24667619158922902,0.2971463650911015
|
20 |
+
4.241071428571429,3800,0.7302631578947368,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7302631578947368,0.010244630514181254,0.49588815789473684,0.1324589336710221,0.3930921052631579,0.2548099607629461,0.311546052631579,0.39498767852245736,0.265219298245614,0.49574169519464223,0.23268092105263163,0.574019804020236,0.7302631578947368,0.8321467731829576,0.832296294714058,0.8323485085820613,0.8323485085820613,0.8323485085820613,0.7302631578947368,0.5351701323930714,0.4502625298651447,0.44247378999755477,0.48886293038433404,0.5352268343210608,0.7302631578947368,0.3411525812655742,0.23814436251631807,0.21150798737582682,0.22868847990327232,0.2480155691306444,0.29792672341621373
|
21 |
+
4.464285714285714,4000,0.7368421052631579,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7368421052631579,0.010305566449078924,0.4947368421052632,0.13233275450376297,0.3913815789473684,0.2535655251683108,0.30953947368421053,0.3922740640225546,0.26460526315789473,0.4949163913773604,0.23187500000000003,0.572041877895568,0.7368421052631579,0.837172357978279,0.837328999582289,0.8373899157616261,0.8373899157616261,0.8373899157616261,0.7368421052631579,0.5353264293739176,0.44939083758113085,0.4408521323246635,0.48834055710549873,0.5341206282180626,0.7368421052631579,0.34076177455520346,0.23677969810249233,0.2101198919267321,0.2276536266469315,0.24678319516569472,0.297249372287514
|
22 |
+
4.6875,4200,0.7302631578947368,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7302631578947368,0.01024195976751409,0.49786184210526313,0.13293505289394864,0.3921710526315789,0.2542542782427721,0.3101973684210526,0.3937143686320033,0.2649780701754386,0.4954717634968576,0.23199013157894738,0.5715937768635994,0.7302631578947368,0.8354127506265665,0.8355693922305765,0.8356281328320803,0.8356281328320803,0.8356281328320803,0.7302631578947368,0.5372965681233445,0.4501724823363586,0.44188546614809043,0.48899866366733713,0.5342393130950145,0.7302631578947368,0.342326318294358,0.23774626029530496,0.21090472549603356,0.22845220292726734,0.24733168088568283,0.2977609786459198
|
23 |
+
4.910714285714286,4400,0.7302631578947368,0.993421052631579,0.9967105263157895,1.0,1.0,1.0,0.7302631578947368,0.010227350724729817,0.4998355263157894,0.13368254620254577,0.39184210526315794,0.2541249933594102,0.3111842105263158,0.3948435268881245,0.2652412280701754,0.49626849018850344,0.232171052631579,0.5720837677245543,0.7302631578947368,0.8341772399749373,0.8343338815789473,0.8343905966424682,0.8343905966424682,0.8343905966424682,0.7302631578947368,0.5384654647855256,0.44986527953229877,0.44277699637488865,0.4895063673734854,0.5346148440105628,0.7302631578947368,0.3434603918412553,0.23779270403918282,0.21161540263537876,0.22899252179487295,0.24784282323083537,0.298154972004029
|