Upload GptOssForCausalLM quantized wheights for compatibility with vLLM
#5
by
ArthurZ
HF Staff
- opened
import torch
from transformers import Mxfp4Config, GptOssForCausalLM, AutoTokenizer
model_name = "jxm/gpt-oss-20b-base"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = GptOssForCausalLM.from_pretrained(
model_name,
revision="refs/pr/5",
torch_dtype=torch.bfloat16,
device_map="auto",
)
print(tokenizer.batch_decode(model.generate(**tokenizer("Once upon a time", return_tensors="pt").to(model.device))))
gave:
['Once upon a time, there was a young student named Lian. He was always a happy and enthusiastic individual, but']
ArthurZ
changed pull request title from
Upload GptOssForCausalLM
to Upload GptOssForCausalLM quantized wheights for compatibility with vLLM
Thank you so much
jxm
changed pull request status to
merged