output_dir: checkpoints/qwen2_5_3B/lora
model:
  _component_: torchtune.models.qwen2_5.lora_qwen2_5_3b
  lora_attn_modules:
  - q_proj
  - v_proj
  - output_proj
  apply_lora_to_mlp: true
  lora_rank: 8
  lora_alpha: 16
  lora_dropout: 0.0
tokenizer:
  _component_: torchtune.models.qwen2_5.qwen2_5_tokenizer
  path: ./Qwen2_5-3B-Instruct/vocab.json
  merges_file: ./Qwen2_5-3B-Instruct/merges.txt
  prompt_template: "{%- if tools %}\n    {{- '<|im_start|>system\\n' }}\n    {%- if\
    \ messages[0]['role'] == 'system' %}\n        {{- messages[0]['content'] }}\n\
    \    {%- else %}\n        {{- 'You are Qwen, created by Alibaba Cloud. You are\
    \ a helpful assistant.' }}\n    {%- endif %}\n    {{- \"\\n\\n# Tools\\n\\nYou\
    \ may call one or more functions to assist with the user query.\\n\\nYou are provided\
    \ with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n \
    \   {%- for tool in tools %}\n        {{- \"\\n\" }}\n        {{- tool | tojson\
    \ }}\n    {%- endfor %}\n    {{- \"\\n</tools>\\n\\nFor each function call, return\
    \ a json object with function name and arguments within <tool_call></tool_call>\
    \ XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\"\
    : <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n    {%- if\
    \ messages[0]['role'] == 'system' %}\n        {{- '<|im_start|>system\\n' + messages[0]['content']\
    \ + '<|im_end|>\\n' }}\n    {%- else %}\n        {{- '<|im_start|>system\\nYou\
    \ are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\\\
    n' }}\n    {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n    {%-\
    \ if (message.role == \"user\") or (message.role == \"system\" and not loop.first)\
    \ or (message.role == \"assistant\" and not message.tool_calls) %}\n        {{-\
    \ '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\\
    n' }}\n    {%- elif message.role == \"assistant\" %}\n        {{- '<|im_start|>'\
    \ + message.role }}\n        {%- if message.content %}\n            {{- '\\n'\
    \ + message.content }}\n        {%- endif %}\n        {%- for tool_call in message.tool_calls\
    \ %}\n            {%- if tool_call.function is defined %}\n                {%-\
    \ set tool_call = tool_call.function %}\n            {%- endif %}\n          \
    \  {{- '\\n<tool_call>\\n{\"name\": \"' }}\n            {{- tool_call.name }}\n\
    \            {{- '\", \"arguments\": ' }}\n            {{- tool_call.arguments\
    \ | tojson }}\n            {{- '}\\n</tool_call>' }}\n        {%- endfor %}\n\
    \        {{- '<|im_end|>\\n' }}\n    {%- elif message.role == \"tool\" %}\n  \
    \      {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\"\
    ) %}\n            {{- '<|im_start|>user' }}\n        {%- endif %}\n        {{-\
    \ '\\n<tool_response>\\n' }}\n        {{- message.content }}\n        {{- '\\\
    n</tool_response>' }}\n        {%- if loop.last or (messages[loop.index0 + 1].role\
    \ != \"tool\") %}\n            {{- '<|im_end|>\\n' }}\n        {%- endif %}\n\
    \    {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n    {{- '<|im_start|>assistant\\\
    n' }}\n{%- endif %}\n"
  max_seq_len: null
checkpointer:
  _component_: torchtune.training.FullModelHFCheckpointer
  checkpoint_dir: ./Qwen2_5-3B-Instruct
  checkpoint_files:
  - model-00001-of-00002.safetensors
  - model-00002-of-00002.safetensors
  recipe_checkpoint: null
  output_dir: ${output_dir}
  model_type: QWEN2
resume_from_checkpoint: false
dataset:
  _component_: torchtune.datasets.chat_dataset
  source: json
  data_files: ./rankwogpt-data.json
  conversation_column: conversations
  conversation_style: sharegpt
  train_on_input: false
  split: train
  packed: false
seed: null
shuffle: true
batch_size: 2
optimizer:
  _component_: torch.optim.AdamW
  fused: true
  weight_decay: 0.01
  lr: 0.0003
lr_scheduler:
  _component_: torchtune.training.lr_schedulers.get_cosine_schedule_with_warmup
  num_warmup_steps: 100
loss:
  _component_: torchtune.modules.loss.CEWithChunkedOutputLoss
epochs: 1
max_steps_per_epoch: null
gradient_accumulation_steps: 8
compile: false
metric_logger:
  _component_: torchtune.training.metric_logging.WandBLogger
  project: torchtune
log_every_n_steps: 1
log_peak_memory_stats: false
device: cuda
dtype: bf16
enable_activation_checkpointing: false
enable_activation_offloading: false
profiler:
  _component_: torchtune.training.setup_torch_profiler
  enabled: false
  output_dir: ${output_dir}/profiling_outputs
  cpu: true
  cuda: true
  profile_memory: false
  with_stack: false
  record_shapes: true
  with_flops: false
  wait_steps: 5
  warmup_steps: 5
  active_steps: 2
  num_cycles: 1