File size: 3,057 Bytes
2f5127c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7ce1a1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f2c19a9
7ce1a1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
.PHONY: test precommit common_tests slow_tests test_examples tests_gpu

check_dirs := examples tests trl

ACCELERATE_CONFIG_PATH = `pwd`/examples/accelerate_configs
COMMAND_FILES_PATH = `pwd`/commands

test:
	pytest -n auto -m "not slow and not low-priority" -s -v --reruns 5 --reruns-delay 1 --only-rerun '(OSError|Timeout|HTTPError.*502|HTTPError.*504||not less than or equal to 0.01)' tests/

precommit:
	python scripts/add_copyrights.py
	pre-commit run --all-files

slow_tests:
	pytest -m "slow" tests/ $(if $(IS_GITHUB_CI),--report-log "slow_tests.log",)

test_examples:
	touch temp_results_sft_tests.txt
	for file in $(ACCELERATE_CONFIG_PATH)/*.yaml; do \
		TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_sft.sh; \
		echo $$?','$${file} >> temp_results_sft_tests.txt; \
	done

	touch temp_results_dpo_tests.txt
	for file in $(ACCELERATE_CONFIG_PATH)/*.yaml; do \
		TRL_ACCELERATE_CONFIG=$${file} bash $(COMMAND_FILES_PATH)/run_dpo.sh; \
		echo $$?','$${file} >> temp_results_dpo_tests.txt; \
	done

# ------------------------------------------------------------------------------

run_rm_1:
	python examples/scripts/reward_modeling.py \
		--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
		--dataset_name trl-lib/ultrafeedback_binarized \
		--output_dir Qwen2-0.5B-Reward \
		--per_device_train_batch_size 8 \
		--num_train_epochs 1 \
		--gradient_checkpointing True \
		--learning_rate 1.0e-5 \
		--logging_steps 25 \
		--eval_strategy steps \
		--eval_steps 50 \
		--max_length 2048

run_rm_2:
	python examples/scripts/reward_modeling.py \
		--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
		--dataset_name trl-lib/ultrafeedback_binarized \
		--output_dir Qwen2-0.5B-Reward-LoRA \
		--per_device_train_batch_size 8 \
		--num_train_epochs 1 \
		--gradient_checkpointing True \
		--learning_rate 1.0e-4 \
		--logging_steps 25 \
		--eval_strategy steps \
		--eval_steps 50 \
		--max_length 2048 \
		--use_peft \
		--lora_r 32 \
		--lora_alpha 16

run_ppo_1:
	python examples/scripts/ppo/ppo.py \
		--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
		--dataset_train_split descriptiveness \
		--learning_rate 3e-6 \
		--output_dir models/minimal/ppo \
		--per_device_train_batch_size 64 \
		--gradient_accumulation_steps 1 \
		--total_episodes 10000 \
		--model_name_or_path EleutherAI/pythia-1b-deduped \
		--missing_eos_penalty 1.0

run_ppo_2:
	accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml \
		examples/scripts/ppo/ppo.py \
		--dataset_name trl-internal-testing/descriptiveness-sentiment-trl-style \
		--dataset_train_split descriptiveness \
		--output_dir models/minimal/ppo \
		--num_ppo_epochs 1 \
		--num_mini_batches 1 \
		--learning_rate 3e-6 \
		--per_device_train_batch_size 1 \
		--gradient_accumulation_steps 16 \
		--total_episodes 10000 \
		--model_name_or_path EleutherAI/pythia-1b-deduped \
		--sft_model_path EleutherAI/pythia-1b-deduped \
		--reward_model_path EleutherAI/pythia-1b-deduped \
		--local_rollout_forward_batch_size 1 \
		--missing_eos_penalty 1.0