model = PhiForCausalLM.from_pretrained("microsoft/phi-1_5", torch_dtype=torch.float16, attn_implementation="flash_attention_2").to("cuda") | |
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-1_5") | |
feel free to change the prompt to your liking. |