alishafique commited on
Commit
4a6b784
·
verified ·
1 Parent(s): 4398194

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -25
app.py CHANGED
@@ -1,16 +1,19 @@
1
- import llama_cpp
2
- from llama_cpp import Llama
3
- # import llama_cpp.llama_tokenizer
4
  import gradio as gr
5
-
6
  from huggingface_hub import hf_hub_download
 
 
7
 
8
- model_name = "large-traversaal/Alif-1.0-8B-Instruct"
9
- model_file = "model-Q8_0.gguf"
10
- model_path_file = hf_hub_download(model_name,
11
- filename=model_file,)
12
 
 
13
 
 
14
  llama = Llama(
15
  model_path=model_path_file,
16
  n_gpu_layers=40, # Adjust based on VRAM
@@ -20,15 +23,17 @@ llama = Llama(
20
  verbose=True # Enable debug logging
21
  )
22
 
23
- chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
 
 
 
 
24
 
25
- # Function to generate text with streaming output
26
- def chat_with_ai(prompt):
27
- query = chat_prompt.format(inp=prompt)
 
28
 
29
- #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
30
- response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
31
-
32
  text = ""
33
  for chunk in response:
34
  content = chunk["choices"][0]["text"]
@@ -36,15 +41,81 @@ def chat_with_ai(prompt):
36
  text += content
37
  yield text
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
- # Gradio UI setup
41
- demo = gr.Interface(
42
- fn=chat_with_ai, # Streaming function
43
- inputs="text", # User input
44
- outputs="text", # Model response
45
- title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
46
- description="Enter a prompt and get a streamed response."
47
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
- # Launch the Gradio app
50
- demo.launch(share=True)
 
1
+ import os
2
+ import json
3
+ import subprocess
4
  import gradio as gr
5
+ from threading import Thread
6
  from huggingface_hub import hf_hub_download
7
+ from llama_cpp import Llama
8
+ from datetime import datetime
9
 
10
+ # Load model from Hugging Face Hub
11
+ MODEL_ID = "large-traversaal/Alif-1.0-8B-Instruct"
12
+ MODEL_FILE = "model-Q8_0.gguf"
 
13
 
14
+ model_path_file = hf_hub_download(MODEL_ID, filename=MODEL_FILE)
15
 
16
+ # Initialize Llama model
17
  llama = Llama(
18
  model_path=model_path_file,
19
  n_gpu_layers=40, # Adjust based on VRAM
 
23
  verbose=True # Enable debug logging
24
  )
25
 
26
+ CHAT_TEMPLATE = "Alif Chat"
27
+ CONTEXT_LENGTH = 4096
28
+ COLOR = "blue"
29
+ EMOJI = "💬"
30
+ DESCRIPTION = "Urdu AI Chatbot powered by Llama.cpp"
31
 
32
+ # Function to generate responses
33
+ def generate_response(message, history, system_prompt, temperature, max_new_tokens, top_k, repetition_penalty, top_p):
34
+ chat_prompt = f"You are an Urdu Chatbot. Write an appropriate response for the given instruction: {message} Response:"
35
+ response = llama(chat_prompt, max_tokens=max_new_tokens, stop=["Q:", "\n"], echo=False, stream=True)
36
 
 
 
 
37
  text = ""
38
  for chunk in response:
39
  content = chunk["choices"][0]["text"]
 
41
  text += content
42
  yield text
43
 
44
+ # Create Gradio interface
45
+ with gr.Blocks() as demo:
46
+ chatbot = gr.Chatbot(label="Urdu Chatbot", likeable=True, render=False)
47
+ chat = gr.ChatInterface(
48
+ generate_response,
49
+ chatbot=chatbot,
50
+ title=EMOJI + " " + "Alif-1.0 Chatbot",
51
+ description=DESCRIPTION,
52
+ examples=[
53
+ ["شہر کراچی کے بارے میں بتاؤ"],
54
+ ["قابل تجدید توانائی کیا ہے؟"],
55
+ ["پاکستان کی تاریخ کے بارے میں بتائیں۔"]
56
+ ],
57
+ additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
58
+ additional_inputs=[
59
+ gr.Textbox("", label="System prompt", render=False),
60
+ gr.Slider(0, 1, 0.6, label="Temperature", render=False),
61
+ gr.Slider(128, CONTEXT_LENGTH, 1024, label="Max new tokens", render=False),
62
+ gr.Slider(1, 80, 40, step=1, label="Top K sampling", render=False),
63
+ gr.Slider(0, 2, 1.1, label="Repetition penalty", render=False),
64
+ gr.Slider(0, 1, 0.95, label="Top P sampling", render=False),
65
+ ],
66
+ theme=gr.themes.Soft(primary_hue=COLOR),
67
+ )
68
 
69
+ demo.queue(max_size=20).launch(share=True)
70
+
71
+
72
+ # import llama_cpp
73
+ # from llama_cpp import Llama
74
+ # # import llama_cpp.llama_tokenizer
75
+ # import gradio as gr
76
+
77
+ # from huggingface_hub import hf_hub_download
78
+
79
+ # model_name = "large-traversaal/Alif-1.0-8B-Instruct"
80
+ # model_file = "model-Q8_0.gguf"
81
+ # model_path_file = hf_hub_download(model_name,
82
+ # filename=model_file,)
83
+
84
+
85
+ # llama = Llama(
86
+ # model_path=model_path_file,
87
+ # n_gpu_layers=40, # Adjust based on VRAM
88
+ # n_threads=8, # Match CPU cores
89
+ # n_batch=512, # Optimize for better VRAM usage
90
+ # n_ctx=4096, # Context window size
91
+ # verbose=True # Enable debug logging
92
+ # )
93
+
94
+ # chat_prompt = """You are Urdu Chatbot. Write approriate response for given instruction:{inp} Response:"""
95
+
96
+ # # Function to generate text with streaming output
97
+ # def chat_with_ai(prompt):
98
+ # query = chat_prompt.format(inp=prompt)
99
+
100
+ # #response = llama(prompt, max_tokens=1024, stop=stop_tokens, echo=False, stream=True) # Enable streaming
101
+ # response = llama(query, max_tokens=256, stop=["Q:", "\n"], echo=False, stream=True) # Enable streaming
102
+
103
+ # text = ""
104
+ # for chunk in response:
105
+ # content = chunk["choices"][0]["text"]
106
+ # if content:
107
+ # text += content
108
+ # yield text
109
+
110
+
111
+ # # Gradio UI setup
112
+ # demo = gr.Interface(
113
+ # fn=chat_with_ai, # Streaming function
114
+ # inputs="text", # User input
115
+ # outputs="text", # Model response
116
+ # title="Streaming Alif-1.0-8B-Instruct Chatbot 🚀",
117
+ # description="Enter a prompt and get a streamed response."
118
+ # )
119
 
120
+ # # Launch the Gradio app
121
+ # demo.launch(share=True)