genies-llm commited on
Commit
27bf57c
·
verified ·
1 Parent(s): c312f78

Model save

Browse files
Files changed (4) hide show
  1. README.md +6 -5
  2. all_results.json +4 -4
  3. train_results.json +4 -4
  4. trainer_state.json +29 -29
README.md CHANGED
@@ -1,16 +1,17 @@
1
  ---
2
  base_model: Qwen/Qwen2.5-Coder-7B
3
- datasets: Genies/Reasoning_SQL_BIRD
4
  library_name: transformers
 
5
  tags:
6
  - generated_from_trainer
7
- - open-r1
 
8
  licence: license
9
  ---
10
 
11
- # Model Card for None
12
 
13
- This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-7B](https://huggingface.co/Qwen/Qwen2.5-Coder-7B) on the [Genies/Reasoning_SQL_BIRD](https://huggingface.co/datasets/Genies/Reasoning_SQL_BIRD) dataset.
14
  It has been trained using [TRL](https://github.com/huggingface/trl).
15
 
16
  ## Quick start
@@ -26,7 +27,7 @@ print(output["generated_text"])
26
 
27
  ## Training procedure
28
 
29
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/genies-rnd/text2sql-rl/runs/vsoi38ya)
30
 
31
 
32
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
 
1
  ---
2
  base_model: Qwen/Qwen2.5-Coder-7B
 
3
  library_name: transformers
4
+ model_name: text2sql-reasoning-sql-no-ex
5
  tags:
6
  - generated_from_trainer
7
+ - trl
8
+ - grpo
9
  licence: license
10
  ---
11
 
12
+ # Model Card for text2sql-reasoning-sql-no-ex
13
 
14
+ This model is a fine-tuned version of [Qwen/Qwen2.5-Coder-7B](https://huggingface.co/Qwen/Qwen2.5-Coder-7B).
15
  It has been trained using [TRL](https://github.com/huggingface/trl).
16
 
17
  ## Quick start
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/genies-rnd/text2sql-rl/runs/2t79o2wj)
31
 
32
 
33
  This model was trained with GRPO, a method introduced in [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models](https://huggingface.co/papers/2402.03300).
all_results.json CHANGED
@@ -6,9 +6,9 @@
6
  "eval_samples_per_second": 1.155,
7
  "eval_steps_per_second": 0.024,
8
  "total_flos": 0.0,
9
- "train_loss": -1.063280144317002e-05,
10
- "train_runtime": 264.9551,
11
  "train_samples": 9428,
12
- "train_samples_per_second": 106.75,
13
- "train_steps_per_second": 3.34
14
  }
 
6
  "eval_samples_per_second": 1.155,
7
  "eval_steps_per_second": 0.024,
8
  "total_flos": 0.0,
9
+ "train_loss": 1.3305503601408275e-05,
10
+ "train_runtime": 263.5351,
11
  "train_samples": 9428,
12
+ "train_samples_per_second": 107.325,
13
+ "train_steps_per_second": 3.358
14
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "total_flos": 0.0,
3
- "train_loss": -1.063280144317002e-05,
4
- "train_runtime": 264.9551,
5
  "train_samples": 9428,
6
- "train_samples_per_second": 106.75,
7
- "train_steps_per_second": 3.34
8
  }
 
1
  {
2
  "total_flos": 0.0,
3
+ "train_loss": 1.3305503601408275e-05,
4
+ "train_runtime": 263.5351,
5
  "train_samples": 9428,
6
+ "train_samples_per_second": 107.325,
7
+ "train_steps_per_second": 3.358
8
  }
trainer_state.json CHANGED
@@ -31839,49 +31839,49 @@
31839
  "clip_ratio/low_mean": 0.0,
31840
  "clip_ratio/low_min": 0.0,
31841
  "clip_ratio/region_mean": 0.0,
31842
- "completions/clipped_ratio": -1.21875,
31843
  "completions/max_length": 256.0,
31844
- "completions/max_terminated_length": 252.0,
31845
- "completions/mean_length": 227.36459350585938,
31846
- "completions/mean_terminated_length": 178.56336975097656,
31847
- "completions/min_length": 98.0,
31848
- "completions/min_terminated_length": 98.0,
31849
  "epoch": 2.003392705682782,
31850
  "frac_reward_zero_std": 0.03125,
31851
- "grad_norm": 0.781992495059967,
31852
- "kl": 0.0506591796875,
31853
  "learning_rate": 3.8941533949121694e-12,
31854
- "loss": -0.0094,
31855
- "num_tokens": 226848339.0,
31856
- "reward": 8.37523078918457,
31857
- "reward_std": 1.1983823776245117,
31858
- "rewards/accuracy_reward/mean": 1.25,
31859
- "rewards/accuracy_reward/std": 1.482886552810669,
31860
- "rewards/format_reward/mean": 0.9479166865348816,
31861
- "rewards/format_reward/std": 0.22277599573135376,
31862
- "rewards/judge_reward/mean": 0.9541666507720947,
31863
- "rewards/judge_reward/std": 0.920268177986145,
31864
- "rewards/ngrams_iou_reward/mean": 0.19519639015197754,
31865
- "rewards/ngrams_iou_reward/std": 0.1999417394399643,
31866
- "rewards/schema_keywords_iou_reward/mean": 0.730033814907074,
31867
- "rewards/schema_keywords_iou_reward/std": 0.16520489752292633,
31868
- "rewards/syntax_reward/mean": 0.84375,
31869
- "rewards/syntax_reward/std": 0.36404144763946533,
31870
  "step": 885
31871
  },
31872
  {
31873
  "epoch": 2.003392705682782,
31874
  "step": 885,
31875
  "total_flos": 0.0,
31876
- "train_loss": -1.063280144317002e-05,
31877
- "train_runtime": 264.9551,
31878
- "train_samples_per_second": 106.75,
31879
- "train_steps_per_second": 3.34
31880
  }
31881
  ],
31882
  "logging_steps": 1,
31883
  "max_steps": 885,
31884
- "num_input_tokens_seen": 226848339,
31885
  "num_train_epochs": 3,
31886
  "save_steps": 52,
31887
  "stateful_callbacks": {
 
31839
  "clip_ratio/low_mean": 0.0,
31840
  "clip_ratio/low_min": 0.0,
31841
  "clip_ratio/region_mean": 0.0,
31842
+ "completions/clipped_ratio": -1.0,
31843
  "completions/max_length": 256.0,
31844
+ "completions/max_terminated_length": 255.0,
31845
+ "completions/mean_length": 228.609375,
31846
+ "completions/mean_terminated_length": 173.828125,
31847
+ "completions/min_length": 79.0,
31848
+ "completions/min_terminated_length": 79.0,
31849
  "epoch": 2.003392705682782,
31850
  "frac_reward_zero_std": 0.03125,
31851
+ "grad_norm": 0.9302852749824524,
31852
+ "kl": 0.0499267578125,
31853
  "learning_rate": 3.8941533949121694e-12,
31854
+ "loss": 0.0118,
31855
+ "num_tokens": 226848578.0,
31856
+ "reward": 8.244189262390137,
31857
+ "reward_std": 1.5018832683563232,
31858
+ "rewards/accuracy_reward/mean": 1.1875,
31859
+ "rewards/accuracy_reward/std": 1.4709222316741943,
31860
+ "rewards/format_reward/mean": 0.953125,
31861
+ "rewards/format_reward/std": 0.21192367374897003,
31862
+ "rewards/judge_reward/mean": 0.9822916984558105,
31863
+ "rewards/judge_reward/std": 0.9181696772575378,
31864
+ "rewards/ngrams_iou_reward/mean": 0.1794198751449585,
31865
+ "rewards/ngrams_iou_reward/std": 0.18398059904575348,
31866
+ "rewards/schema_keywords_iou_reward/mean": 0.7147682309150696,
31867
+ "rewards/schema_keywords_iou_reward/std": 0.1661030650138855,
31868
+ "rewards/syntax_reward/mean": 0.8697916865348816,
31869
+ "rewards/syntax_reward/std": 0.3374123275279999,
31870
  "step": 885
31871
  },
31872
  {
31873
  "epoch": 2.003392705682782,
31874
  "step": 885,
31875
  "total_flos": 0.0,
31876
+ "train_loss": 1.3305503601408275e-05,
31877
+ "train_runtime": 263.5351,
31878
+ "train_samples_per_second": 107.325,
31879
+ "train_steps_per_second": 3.358
31880
  }
31881
  ],
31882
  "logging_steps": 1,
31883
  "max_steps": 885,
31884
+ "num_input_tokens_seen": 226848578,
31885
  "num_train_epochs": 3,
31886
  "save_steps": 52,
31887
  "stateful_callbacks": {