File size: 2,495 Bytes
eee38ad
 
8086eb7
eee38ad
 
 
 
08863c9
eee38ad
 
 
 
 
 
 
 
 
08863c9
 
 
8086eb7
08863c9
 
 
6e29a0e
8086eb7
08863c9
8086eb7
 
 
 
 
 
 
 
 
 
 
6e29a0e
 
8086eb7
 
 
 
6e29a0e
08863c9
 
6e29a0e
 
 
 
 
8086eb7
 
 
6e29a0e
 
efc5651
 
6e29a0e
 
 
 
 
 
efc5651
6e29a0e
 
 
 
efc5651
 
6e29a0e
8086eb7
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import gradio as gr

# ๅŠ ่ฝฝๆŒ‡ไปคๆจกๅž‹
model = AutoModelForCausalLM.from_pretrained(
    "MediaTek-Research/Breeze-7B-Instruct-v1_0",
    device_map="auto",
    torch_dtype=torch.bfloat16,
)

# ๅŠ ่ฝฝๅˆ†่ฏๅ™จ
tokenizer = AutoTokenizer.from_pretrained("MediaTek-Research/Breeze-7B-Instruct-v1_0")

# ๅฎšไน‰SYS_PROMPT
SYS_PROMPT = "You are a helpful AI assistant built by MediaTek Research. The user you are helping speaks Traditional Chinese and comes from Taiwan."

# ๅฎšไน‰ๅฏน่ฏๅކๅฒ่ฎฐๅฝ•ๅ˜้‡
chat_history = []

def generate_response(user_input):
    global chat_history
    # ๅฐ†็”จๆˆท่พ“ๅ…ฅๆทปๅŠ ๅˆฐๅฏน่ฏๅކๅฒ่ฎฐๅฝ•
    chat_history.append({"role": "user", "content": user_input})

    # ๅบ”็”จ่Šๅคฉๆจกๆฟ
    prompt = tokenizer.apply_chat_template(chat_history, tokenize=False)
    full_prompt = f"<s>{SYS_PROMPT} [INST] {prompt} [/INST]"

    # ็”Ÿๆˆๆ–‡ๆœฌ
    inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        inputs["input_ids"],
        max_new_tokens=128,
        top_p=0.95,
        top_k=50,
        repetition_penalty=1.1,
        temperature=0.7,
        do_sample=True,  # ๅฏ็”จ sample-based ็”Ÿๆˆๆจกๅผ
        attention_mask=inputs["attention_mask"],  # ่ฎพ็ฝฎ attention mask
    )

    # ่งฃ็ ่พ“ๅ‡บ
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # ๅฐ†็”Ÿๆˆ็š„ๆ–‡ๆœฌๆทปๅŠ ๅˆฐๅฏน่ฏๅކๅฒ่ฎฐๅฝ•
    chat_history.append({"role": "assistant", "content": generated_text})

    # ๅชไฟ็•™ๆœ€ๆ–ฐ็š„ไบ”ไธช้—ฎ็ญ”ๅฏน่ฏ
    if len(chat_history) > 10:
        chat_history = chat_history[-10:]

    return generated_text

# ๅˆ›ๅปบGradio็•Œ้ข
def chat_interface(user_input, history):
    response = generate_response(user_input)
    history.append({"role": "user", "content": user_input})
    history.append({"role": "assistant", "content": response})
    return "", history

iface = gr.Blocks()

with iface:
    gr.Markdown("# ้†ซ็™‚ๅ•็ญ”ๅŠฉๆ‰‹\n้€™ๆ˜ฏไธ€ๅ€‹ๅŸบๆ–ผ MediaTek-Research/Breeze-7B-Instruct-v1_0 ๆจกๅž‹็š„้†ซ็™‚ๅ•็ญ”ๅŠฉๆ‰‹ใ€‚")
    chatbot = gr.Chatbot(type="messages")  # ไฝฟ็”จOpenAI้ฃŽๆ ผ็š„ๅญ—ๅ…ธๆ ผๅผ
    with gr.Row():
        txt = gr.Textbox(
            show_label=False,
            placeholder="่ซ‹่ผธๅ…ฅไฝ ็š„ๅ•้กŒ...",
            lines=1,
        )
    txt.submit(chat_interface, [txt, chatbot], [txt, chatbot])

# ๅฏๅŠจGradio็•Œ้ขๅนถๅ…ฑไบซ้“พๆŽฅ
iface.launch(share=True)