Text Generation
Transformers
Safetensors
English
ddllama
conversational
custom_code
xuan luo commited on
Commit
6b57240
·
verified ·
1 Parent(s): 1e72e38

Upload generation.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. generation.py +90 -0
generation.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ from transformers import TextStreamer
3
+ import torch
4
+ from transformers.generation.streamers import BaseStreamer
5
+
6
+
7
+ class TokenStreamer(BaseStreamer):
8
+ """
9
+ Simple token streamer that prints each token with its corresponding layers used.
10
+
11
+ Parameters:
12
+ tokenizer (`AutoTokenizer`):
13
+ The tokenizer used to decode the tokens.
14
+ skip_prompt (`bool`, *optional*, defaults to `False`):
15
+ Whether to skip the prompt tokens in the output. Useful for chatbots.
16
+ """
17
+
18
+ def __init__(self, tokenizer, skip_prompt=True):
19
+ self.tokenizer = tokenizer
20
+ self.skip_prompt = skip_prompt
21
+ self.next_tokens_are_prompt = True
22
+
23
+ def put(self, value):
24
+ """
25
+ Receives tokens and prints each one surrounded by brackets.
26
+ """
27
+ if len(value.shape) > 1 and value.shape[0] > 1:
28
+ raise ValueError("TokenStreamer only supports batch size 1")
29
+ elif len(value.shape) > 1:
30
+ value = value[0]
31
+
32
+ if self.skip_prompt and self.next_tokens_are_prompt:
33
+ self.next_tokens_are_prompt = False
34
+ return
35
+
36
+ # Process each token in the received tensor
37
+ for token_id in value.tolist():
38
+ token_text = self.tokenizer.decode([token_id])
39
+ print(f"={repr(token_text)}", end="\n", flush=True)
40
+
41
+ def end(self):
42
+ """Prints a newline at the end of generation."""
43
+ self.next_tokens_are_prompt = True
44
+ print() # Print a newline at the end
45
+
46
+
47
+
48
+ # model path
49
+ model_id = "./"
50
+ # tokenizer
51
+ tokenizer = transformers.AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", trust_remote_code=True)
52
+ model = transformers.AutoModelForCausalLM.from_pretrained(
53
+ model_id,
54
+ torch_dtype=torch.bfloat16,
55
+ device_map="auto",
56
+ trust_remote_code=True
57
+ )
58
+
59
+ pipeline = transformers.pipeline(
60
+ "text-generation",
61
+ model=model,
62
+ tokenizer=tokenizer,
63
+ model_kwargs={"torch_dtype": torch.bfloat16},
64
+ device_map="auto",
65
+ trust_remote_code=True
66
+ )
67
+
68
+ messages = [
69
+ {"role": "user", "content": \
70
+ """
71
+ Jan has three times the number of pets as Marcia. Marcia has two more pets than Cindy. If Cindy has four pets, how many total pets do the three have?
72
+ """},
73
+ ]
74
+
75
+ terminators = [
76
+ pipeline.tokenizer.eos_token_id,
77
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
78
+ ]
79
+
80
+
81
+ streamer = TokenStreamer(tokenizer)
82
+ outputs = pipeline(
83
+ messages,
84
+ max_new_tokens=512,
85
+ eos_token_id=terminators,
86
+ do_sample=True,
87
+ temperature=0.6,
88
+ top_p=1.0,
89
+ streamer=streamer,
90
+ )