Ahmadzei's picture
added 3 more tables for large emb model
5fa1a76
After conversion, the model and tokenizer can be loaded via:
thon
from transformers import LlamaForCausalLM, CodeLlamaTokenizer
tokenizer = CodeLlamaTokenizer.from_pretrained("codellama/CodeLlama-7b-hf")
model = LlamaForCausalLM.from_pretrained("codellama/CodeLlama-7b-hf")
PROMPT = '''def remove_non_ascii(s: str) -> str:
"""
return result
'''
input_ids = tokenizer(PROMPT, return_tensors="pt")["input_ids"]
generated_ids = model.generate(input_ids, max_new_tokens=128)
filling = tokenizer.batch_decode(generated_ids[:, input_ids.shape[1]:], skip_special_tokens = True)[0]
print(PROMPT.replace("", filling))
def remove_non_ascii(s: str) -> str:
""" Remove non-ASCII characters from a string.