TinyLLaVA-Video-R1 / readme.txt
Zhang199's picture
first commit
3162f59
raw
history blame contribute delete
678 Bytes
Check QUESTION_TEMPLATE!
kl: 0.01
lr: 1e-6
epoch: 1
with cold-start data
noise = torch.randn_like(advantages) * 0.02
advantages = advantages + noise
def format_reward(completions, **kwargs):
pattern = r"<think>.*?</think>\s*<answer>.*?</answer>"
completion_contents = [completion[0]["content"] for completion in completions]
reward = []
for content in completion_contents:
for_re = 0.0
if re.fullmatch(pattern, content, re.DOTALL) and recheck_format(content):
for_re += 0.5
think = extract_first_think_answer(content)
for_re += min(len(think) / 1200, 1) * 0.5
reward.append(for_re)
return reward