prithivMLmods commited on
Commit
8716c2f
·
verified ·
1 Parent(s): c0da9a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -134
app.py CHANGED
@@ -1,138 +1,164 @@
1
- import os
2
- import time
3
- import spaces
4
- import json
5
- import subprocess
6
- from llama_cpp import Llama
7
- from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
8
- from llama_cpp_agent.providers import LlamaCppPythonProvider
9
- from llama_cpp_agent.chat_history import BasicChatHistory
10
- from llama_cpp_agent.chat_history.messages import Roles
11
  import gradio as gr
12
- from huggingface_hub import hf_hub_download
13
-
14
- # Define model details
15
- MODEL_REPO = "prithivMLmods/Sombrero-QwQ-32B-Elite10-Fixed-Q2_K-GGUF"
16
- MODEL_FILENAME = "sombrero-qwq-32b-elite10-fixed-q2_k.gguf"
17
- MODEL_DIR = "./models"
18
- MODEL_PATH = os.path.join(MODEL_DIR, MODEL_FILENAME)
19
-
20
- # Ensure the model directory exists
21
- os.makedirs(MODEL_DIR, exist_ok=True)
22
-
23
- # Download the model if not already present
24
- if not os.path.exists(MODEL_PATH):
25
- print("Downloading the model... This may take some time.")
26
- try:
27
- hf_hub_download(
28
- repo_id=MODEL_REPO,
29
- filename=MODEL_FILENAME,
30
- local_dir=MODEL_DIR
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  )
32
- print("Model downloaded successfully!")
33
- except Exception as e:
34
- print(f"Error downloading model: {e}")
35
- exit(1)
36
-
37
- # Ensure model is fully downloaded before using
38
- while not os.path.exists(MODEL_PATH):
39
- print("Waiting for model to be available...")
40
- time.sleep(5)
41
-
42
- # Function to handle AI responses
43
- @spaces.GPU
44
- def respond(
45
- message,
46
- history: list[tuple[str, str]],
47
- model,
48
- system_message,
49
- max_tokens,
50
- temperature,
51
- top_p,
52
- top_k,
53
- repeat_penalty,
54
- ):
55
- chat_template = MessagesFormatterType.GEMMA_2
56
-
57
- llm = Llama(
58
- model_path=MODEL_PATH,
59
- flash_attn=True,
60
- n_gpu_layers=81,
61
- n_batch=1024,
62
- n_ctx=8192,
63
- )
64
- provider = LlamaCppPythonProvider(llm)
65
-
66
- agent = LlamaCppAgent(
67
- provider,
68
- system_prompt=f"{system_message}",
69
- predefined_messages_formatter_type=chat_template,
70
- debug_output=True
71
- )
72
-
73
- settings = provider.get_provider_default_settings()
74
- settings.temperature = temperature
75
- settings.top_k = top_k
76
- settings.top_p = top_p
77
- settings.max_tokens = max_tokens
78
- settings.repeat_penalty = repeat_penalty
79
- settings.stream = True
80
-
81
- messages = BasicChatHistory()
82
-
83
- for msn in history:
84
- user = {'role': Roles.user, 'content': msn[0]}
85
- assistant = {'role': Roles.assistant, 'content': msn[1]}
86
- messages.add_message(user)
87
- messages.add_message(assistant)
88
-
89
- stream = agent.get_chat_response(
90
- message,
91
- llm_sampling_settings=settings,
92
- chat_history=messages,
93
- returns_streaming_generator=True,
94
- print_output=False
95
- )
96
-
97
- outputs = ""
98
- for output in stream:
99
- outputs += output
100
- yield outputs
101
-
102
- # Function to create Gradio interface
103
- def create_interface(model_name, description):
104
- return gr.ChatInterface(
105
- respond,
106
- additional_inputs=[
107
- gr.Textbox(value=model_name, label="Model", interactive=False),
108
- gr.Textbox(
109
- value="You are a helpful assistant. You should think step-by-step. You should answer all questions by prioritizing the principle of openness",
110
- label="System message"
111
- ),
112
- gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens"),
113
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
114
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
115
- gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k"),
116
- gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty"),
117
- ],
118
- retry_btn="Retry",
119
- undo_btn="Undo",
120
- clear_btn="Clear",
121
- submit_btn="Send",
122
- title=f"{model_name}",
123
- description=description,
124
- chatbot=gr.Chatbot(scale=1, likeable=False, show_copy_button=True)
125
- )
126
-
127
- # Set interface description
128
- description = """<p align="center">Viper-Coder-32B-Elite13-GGUF</p>"""
129
- interface = create_interface(MODEL_REPO, description)
130
-
131
- # Create Gradio Blocks app
132
- demo = gr.Blocks()
133
-
134
- with demo:
135
- interface.render()
136
 
137
  if __name__ == "__main__":
138
- demo.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
+ import torch
3
+ import numpy as np
4
+ import cv2
5
+ import matplotlib.pyplot as plt
6
+ import random
7
+ import time
8
+ from PIL import Image
9
+ from threading import Thread
10
+ from transformers import AutoProcessor, Qwen2_5_VLForConditionalGeneration, TextIteratorStreamer
11
+ from transformers.image_utils import load_image
12
+
13
+ #####################################
14
+ # 1. Load Qwen2.5-VL Model & Processor
15
+ #####################################
16
+ MODEL_ID = "Qwen/Qwen2.5-VL-7B-Instruct" # or "Qwen/Qwen2.5-VL-3B-Instruct"
17
+
18
+ processor = AutoProcessor.from_pretrained(MODEL_ID, trust_remote_code=True)
19
+ model = Qwen2_5_VLForConditionalGeneration.from_pretrained(
20
+ MODEL_ID,
21
+ trust_remote_code=True,
22
+ torch_dtype=torch.bfloat16
23
+ ).to("cuda")
24
+ model.eval()
25
+
26
+ #####################################
27
+ # 2. Helper Function: Downsample Video
28
+ #####################################
29
+ def downsample_video(video_path, num_frames=10):
30
+ """
31
+ Downsamples the video file to `num_frames` evenly spaced frames.
32
+ Each frame is converted to a PIL Image along with its timestamp.
33
+ """
34
+ vidcap = cv2.VideoCapture(video_path)
35
+ total_frames = int(vidcap.get(cv2.CAP_PROP_FRAME_COUNT))
36
+ fps = vidcap.get(cv2.CAP_PROP_FPS)
37
+ frames = []
38
+ if total_frames <= 0 or fps <= 0:
39
+ vidcap.release()
40
+ return frames
41
+
42
+ frame_indices = np.linspace(0, total_frames - 1, num_frames, dtype=int)
43
+ for i in frame_indices:
44
+ vidcap.set(cv2.CAP_PROP_POS_FRAMES, i)
45
+ success, image = vidcap.read()
46
+ if success:
47
+ image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
48
+ pil_image = Image.fromarray(image)
49
+ timestamp = round(i / fps, 2)
50
+ frames.append((pil_image, timestamp))
51
+ vidcap.release()
52
+ return frames
53
+
54
+ #####################################
55
+ # 3. The Inference Function
56
+ #####################################
57
+ def video_inference(video_file, duration):
58
+ """
59
+ - Takes a recorded video file and a chosen duration (string).
60
+ - Downsamples the video, passes frames to Qwen2.5-VL for inference.
61
+ - Returns model-generated text + a dummy bar chart as example analytics.
62
+ """
63
+ if video_file is None:
64
+ return "No video provided.", None
65
+
66
+ # 3.1: Downsample the recorded video
67
+ frames = downsample_video(video_file)
68
+ if not frames:
69
+ return "Could not read frames from video.", None
70
+
71
+ # 3.2: Construct Qwen2.5-VL prompt
72
+ # We'll do a simple prompt: "Please describe what's happening in this video."
73
+ messages = [
74
+ {
75
+ "role": "user",
76
+ "content": [{"type": "text", "text": "Please describe what's happening in this video."}]
77
+ }
78
+ ]
79
+
80
+ # Add frames (with timestamp) to the messages
81
+ for (image, ts) in frames:
82
+ messages[0]["content"].append({"type": "text", "text": f"Frame at {ts} seconds:"})
83
+ messages[0]["content"].append({"type": "image", "image": image})
84
+
85
+ # Prepare final prompt for the model
86
+ prompt = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
87
+
88
+ # Qwen requires images in the same order. We'll just collect them:
89
+ frame_images = [img for (img, _) in frames]
90
+
91
+ inputs = processor(
92
+ text=[prompt],
93
+ images=frame_images,
94
+ return_tensors="pt",
95
+ padding=True
96
+ ).to("cuda")
97
+
98
+ # 3.3: Generate text output
99
+ streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
100
+ generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=512)
101
+
102
+ # We'll run generation in a thread to simulate streaming.
103
+ thread = Thread(target=model.generate, kwargs=generation_kwargs)
104
+ thread.start()
105
+
106
+ # Collect the streamed text
107
+ generated_text = ""
108
+ for new_text in streamer:
109
+ generated_text += new_text
110
+ # Sleep briefly to yield control
111
+ time.sleep(0.01)
112
+
113
+ # 3.4: Dummy bar chart for demonstration
114
+ fig, ax = plt.subplots()
115
+ categories = ["Category A", "Category B", "Category C"]
116
+ values = [random.randint(1, 10) for _ in categories]
117
+ ax.bar(categories, values, color=["#4B0082", "#9370DB", "#4B0082"])
118
+ ax.set_title("Example Analytics Chart")
119
+ ax.set_ylabel("Value")
120
+ ax.set_xlabel("Category")
121
+
122
+ # Return text + figure
123
+ return generated_text, fig
124
+
125
+ #####################################
126
+ # 4. Build a Professional Gradio UI
127
+ #####################################
128
+ def build_app():
129
+ with gr.Blocks() as demo:
130
+ gr.Markdown("""
131
+ # **Qwen2.5-VL-7B-Instruct Live Video Analysis**
132
+ Record your webcam for a chosen duration, then click **Stop** to finalize.
133
+ After that, click **Analyze** to run Qwen2.5-VL and see textual + chart outputs.
134
+ """)
135
+
136
+ with gr.Row():
137
+ with gr.Column():
138
+ duration = gr.Radio(
139
+ choices=["5", "10", "20", "30"],
140
+ value="5",
141
+ label="Suggested Recording Duration (seconds)",
142
+ info="Select how long you plan to record before pressing Stop."
143
+ )
144
+ video = gr.Video(
145
+ source="webcam",
146
+ format="mp4",
147
+ label="Webcam Recording (press the Record button, then Stop)"
148
+ )
149
+ analyze_btn = gr.Button("Analyze", variant="primary")
150
+ with gr.Column():
151
+ output_text = gr.Textbox(label="Model Output")
152
+ output_plot = gr.Plot(label="Analytics Chart")
153
+
154
+ analyze_btn.click(
155
+ fn=video_inference,
156
+ inputs=[video, duration],
157
+ outputs=[output_text, output_plot]
158
  )
159
+
160
+ return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
 
162
  if __name__ == "__main__":
163
+ app = build_app()
164
+ app.launch(debug=True)