Upload GptOssForCausalLM quantized wheights for compatibility with vLLM

#5
by ArthurZ HF Staff - opened
import torch
from transformers import Mxfp4Config, GptOssForCausalLM, AutoTokenizer
model_name = "jxm/gpt-oss-20b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = GptOssForCausalLM.from_pretrained(
    model_name,
    revision="refs/pr/5",
    torch_dtype=torch.bfloat16,
    device_map="auto",
)
print(tokenizer.batch_decode(model.generate(**tokenizer("Once upon a time", return_tensors="pt").to(model.device))))

gave:

['Once upon a time, there was a young student named Lian. He was always a happy and enthusiastic individual, but']
ArthurZ changed pull request title from Upload GptOssForCausalLM to Upload GptOssForCausalLM quantized wheights for compatibility with vLLM
Owner

Thank you so much

jxm changed pull request status to merged

Sign up or log in to comment