messages = [{"role": "user", "content": prompt}] text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) model_inputs = tokenizer([text], return_tensors="pt").to(device) generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512, do_sample=True) generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] Qwen2Config [[autodoc]] Qwen2Config Qwen2Tokenizer [[autodoc]] Qwen2Tokenizer - save_vocabulary Qwen2TokenizerFast [[autodoc]] Qwen2TokenizerFast Qwen2Model [[autodoc]] Qwen2Model - forward Qwen2ForCausalLM [[autodoc]] Qwen2ForCausalLM - forward Qwen2ForSequenceClassification [[autodoc]] Qwen2ForSequenceClassification - forward