--- library_name: transformers tags: [] --- # Model Card for Model ID ## Model: MathTutor RL version (Lambda = 1.0) (no Think) ## Usage: ``` import torch import json from huggingface_hub import hf_hub_download from transformers import AutoTokenizer, AutoModelForCausalLM, AutoConfig # --- Configuration --- model_weights_id = "Sandesh-Zenteiq/MathTutor-7B_v0.1" tokenizer_id = "Qwen/Qwen2.5-7B-Instruct" device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Loading weights from: {model_weights_id}") print(f"Loading tokenizer from: {tokenizer_id}") print(f"Using device: {device}") # --- Loading Logic --- print("\nLoading model config...") config = AutoConfig.from_pretrained(model_weights_id, trust_remote_code=True) print("\nLoading tokenizer...") tokenizer = AutoTokenizer.from_pretrained(tokenizer_id, trust_remote_code=True) print("Loading model weights...") model = AutoModelForCausalLM.from_pretrained( model_weights_id, config=config, torch_dtype=torch.bfloat16, device_map="auto", trust_remote_code=True ) print("Model loaded successfully!") # --- Interactive Socratic Chat Loop --- conversation_history = [ {"role": "system", "content": "You are a Socratic teacher. Guide the student to solve the problem by asking heuristic questions. Do not give direct answers or calculations. Ask one question at a time."}, {"role": "user", "content": "YOUR QUESTION HERE"} ] print("\n--- Starting Interactive Socratic Session ---") print("You are the student. The model is the teacher.") print("Type 'quit' or 'exit' to end the conversation.\n") # Generate the very first response from the teacher prompt_parts = [] for message in conversation_history: prompt_parts.append(f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>") # Signal to the model that it's its turn to generate prompt_parts.append("<|im_start|>assistant") manual_prompt = "\n".join(prompt_parts) inputs = tokenizer(manual_prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=1000, temperature=0.7, do_sample=True) initial_response = tokenizer.decode(outputs[0], skip_special_tokens=False) # Extract only the assistant's part of the response teacher_response_text = initial_response.split('<|im_start|>assistant')[1].replace('<|im_end|>', '').strip() print(f"Teacher: {teacher_response_text}") conversation_history.append({"role": "assistant", "content": teacher_response_text}) # Now start the interactive loop for back-and-forth while True: student_input = input("Student: ") if student_input.lower() in ["quit", "exit"]: print("--- Session Ended ---") break # Add the user's new message to the history conversation_history.append({"role": "user", "content": student_input}) # --- Manually build the prompt with the FULL history --- prompt_parts = [] for message in conversation_history: prompt_parts.append(f"<|im_start|>{message['role']}\n{message['content']}<|im_end|>") prompt_parts.append("<|im_start|>assistant") manual_prompt = "\n".join(prompt_parts) # Generate the next response based on the full history inputs = tokenizer(manual_prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=1000, temperature=0.7, do_sample=True) full_generation = tokenizer.decode(outputs[0], skip_special_tokens=False) # Cleanly extract only the *newest* assistant response try: new_response_part = full_generation.split(manual_prompt)[1] teacher_response_text = new_response_part.replace('<|im_end|>', '').strip() except IndexError: # Fallback if splitting fails teacher_response_text = "I'm sorry, I seem to have lost my train of thought. Could you please repeat your question?" print(f"\nTeacher: {teacher_response_text}") # Add the model's new response to the history for the next turn conversation_history.append({"role": "assistant", "content": teacher_response_text}) ```