hjaved202 commited on
Commit
a350173
·
verified ·
1 Parent(s): 8d0b512

Upload folder using huggingface_hub

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ test_output.wav filter=lfs diff=lfs merge=lfs -text
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
README.md CHANGED
@@ -1,12 +1,6 @@
1
  ---
2
- title: Gencent
3
- emoji: 🐢
4
- colorFrom: indigo
5
- colorTo: blue
6
  sdk: gradio
7
- sdk_version: 5.15.0
8
- app_file: app.py
9
- pinned: false
10
  ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: gencent
3
+ app_file: app/main.py
 
 
4
  sdk: gradio
5
+ sdk_version: 5.9.1
 
 
6
  ---
 
 
app/.DS_Store ADDED
Binary file (6.15 kB). View file
 
app/.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app/__init__.py ADDED
File without changes
app/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (146 Bytes). View file
 
app/config.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ import torch
4
+
5
+ load_dotenv()
6
+
7
+ class Config:
8
+ SECRET_KEY = os.getenv('SECRET_KEY', 'your-secret-key')
9
+ MODEL_PATH = os.getenv('MODEL_PATH', 'mistralai/Mistral-7B-v0.1')
10
+ DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
app/main.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from utils.speech_to_text import SpeechRecognizer
4
+ from utils.text_to_speech import TextToSpeech
5
+
6
+ class VoiceChatApp:
7
+ def __init__(self):
8
+ self.speech_recognizer = SpeechRecognizer()
9
+ self.tts_engine = TextToSpeech()
10
+ self.welcome_message = "Hello, this is GenCent AI calling. This is a follow-up call. Am I speaking to Alex?"
11
+ self.chat_history = []
12
+
13
+ async def welcome_audio(self):
14
+ """Generate and play the welcome message"""
15
+ sample_rate, audio_data = await self.tts_engine.synthesize(self.welcome_message)
16
+ audio_response = (sample_rate, audio_data.astype(np.int16))
17
+ self.chat_history.append((None, self.welcome_message))
18
+ return self.chat_history, audio_response
19
+
20
+ async def process_audio(self, audio, history):
21
+ """Process user audio input and generate response"""
22
+ if audio is None:
23
+ return history, (24000, np.zeros(24000, dtype=np.int16)), None
24
+
25
+ # Speech to text
26
+ text_input = await self.speech_recognizer.transcribe(audio)
27
+ if not text_input:
28
+ return history, (24000, np.zeros(24000, dtype=np.int16)), None
29
+
30
+ # Generate response
31
+ response = "This is a test response. Please confirm if you can hear this clearly."
32
+
33
+ # Text to speech
34
+ sample_rate, audio_data = await self.tts_engine.synthesize(response)
35
+ audio_response = (sample_rate, audio_data.astype(np.int16))
36
+
37
+ # Update chat history
38
+ history.append((text_input, response))
39
+
40
+ return history, audio_response, None
41
+
42
+ def launch(self):
43
+ """Launch the Gradio interface"""
44
+ with gr.Blocks(title="Voice-Enabled Chatbot") as interface:
45
+ with gr.Row():
46
+ with gr.Column(scale=2):
47
+ chatbot = gr.Chatbot(label="Chat History", height=400)
48
+ audio_input = gr.Audio(sources=["microphone"], type="numpy",
49
+ label="Speak Here", interactive=True)
50
+ audio_output = gr.Audio(label="Assistant Response", autoplay=True, elem_classes="compact-audio")
51
+
52
+ # Initial welcome message
53
+ interface.load(
54
+ fn=self.welcome_audio,
55
+ outputs=[chatbot, audio_output]
56
+ )
57
+
58
+ # Audio processing chain
59
+ audio_input.change(
60
+ fn=self.process_audio,
61
+ inputs=[audio_input, chatbot],
62
+ outputs=[chatbot, audio_output, audio_input],
63
+ api_name="process_audio"
64
+ ).then(
65
+ lambda: None,
66
+ None,
67
+ audio_input,
68
+ queue=False
69
+ )
70
+
71
+ interface.launch(
72
+ server_name="127.0.0.1",
73
+ server_port=7860,
74
+ share=True,
75
+ debug=True
76
+ )
77
+
78
+ if __name__ == "__main__":
79
+ app = VoiceChatApp()
80
+ app.launch()
app/main_legacy.py ADDED
@@ -0,0 +1,187 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from utils.speech_to_text import SpeechRecognizer
3
+ from utils.text_to_speech import TextToSpeech
4
+ import numpy as np
5
+ from utils.med42 import Med42
6
+ import time
7
+
8
+ class VoiceChatApp:
9
+ def __init__(self):
10
+ self.speech_recognizer = SpeechRecognizer()
11
+ self.tts_engine = TextToSpeech()
12
+ self.welcome_message = "Hello, this is GenCent AI calling. This is a follow-up call. Am I speaking to Aleks?"
13
+ self.chat_history = [] # Maintain persistent chat history
14
+
15
+ async def welcome_audio(self):
16
+ """Generate and play the welcome message."""
17
+ tts_output = await self.tts_engine.synthesize(self.welcome_message)
18
+
19
+ # Extract audio data if the TTS returns a tuple (sample_rate, data)
20
+ if isinstance(tts_output, tuple) and len(tts_output) == 2:
21
+ _, audio_data = tts_output
22
+ else:
23
+ audio_data = tts_output
24
+
25
+ audio_response = self._normalize_audio(audio_data, 24000)
26
+ self.chat_history.append({"role": "assistant", "content": f"🤖 {self.welcome_message}"})
27
+ return self.chat_history, audio_response # Return chat history & audio
28
+
29
+ # Modify the process_audio method to handle TTS errors and add debug prints
30
+ async def process_audio(self, audio, state):
31
+ """Process user audio, generate a response, and return updated chat history."""
32
+ if audio is None:
33
+ return state, (24000, np.zeros((24000,), dtype=np.int16)), None
34
+
35
+ # Convert speech to text
36
+ text_input = await self.speech_recognizer.transcribe(audio)
37
+ if not text_input:
38
+ print("I am here because recogniser did not work")
39
+ return state, (24000, np.zeros((24000,), dtype=np.int16)), None
40
+
41
+ # Generate response (simplified for debugging)
42
+ response = "This is a test response. Please confirm if you can hear this."
43
+
44
+ print(f"TTS Input Text: '{response}'") # Debug print
45
+
46
+ try:
47
+ # Attempt TTS synthesis
48
+ tts_output = await self.tts_engine.synthesize(response)
49
+ print(f"Raw TTS Output Type: {type(tts_output)}") # Debug type
50
+
51
+ # Extract audio data
52
+ if isinstance(tts_output, tuple):
53
+ sample_rate, audio_data = tts_output
54
+
55
+ print(f"Raw TTS data type: {type(audio_data)}") # Check container type
56
+ print(f"Raw TTS dtype: {audio_data.dtype}") # Check numerical type
57
+ print(f"Raw TTS min/max: {np.min(audio_data)}, {np.max(audio_data)}") # Verify range
58
+
59
+ print(f"Sample rate: {sample_rate}, Audio shape: {audio_data.shape}")
60
+ else:
61
+ audio_data = tts_output
62
+
63
+ # Check for zeros in audio data
64
+ if np.all(audio_data == 0):
65
+ print("Warning: TTS generated silent audio!")
66
+ except Exception as e:
67
+ print(f"TTS Synthesis Error: {e}")
68
+ audio_data = np.zeros((24000,), dtype=np.float32)
69
+
70
+ # Capture both sample rate and audio data
71
+ if isinstance(tts_output, tuple) and len(tts_output) == 2:
72
+ sample_rate, audio_data = tts_output
73
+ else:
74
+ sample_rate = 24000 # Fallback
75
+ audio_data = tts_output
76
+
77
+ # Normalize while preserving sample rate
78
+ audio_response = self._normalize_audio(audio_data, sample_rate)
79
+
80
+ # Update chat history and return
81
+ messages = [
82
+ {"role": "user", "content": f"🎤 User said: {text_input}"},
83
+ {"role": "assistant", "content": f"🤖 {response}"}
84
+ ]
85
+ state = state.copy() if state else []
86
+ state.extend(messages)
87
+ print(f"Final audio response - SR: {audio_response[0]}, Shape: {audio_response[1].shape}, Dtype: {audio_response[1].dtype}")
88
+ print(f"Final audio peaks: {np.max(np.abs(audio_response[1]))}") # Should be > 0
89
+ audio_response = (audio_response[0], audio_response[1], str(time.time()))
90
+
91
+ return state, audio_response, None
92
+
93
+ def _normalize_audio(self, audio_array, sample_rate):
94
+ """Final format adjustment for Gradio compatibility"""
95
+ # Keep previous processing steps
96
+ if audio_array.dtype != np.int16:
97
+ audio_array = audio_array.astype(np.int16)
98
+
99
+ # Ensure 2D shape for mono audio (samples, 1)
100
+ if audio_array.ndim == 1:
101
+ audio_array = audio_array.reshape(-1, 1)
102
+
103
+ # Convert to 1D if mono
104
+ if audio_array.ndim == 2 and audio_array.shape[1] == 1:
105
+ audio_array = audio_array.flatten() # Shape becomes (N,)
106
+
107
+ return (sample_rate, audio_array)
108
+
109
+ # The rest of your code (launch method, etc.) remains unchanged
110
+
111
+ def launch(self):
112
+ """Launch the Gradio interface with audio refresh workaround"""
113
+ with gr.Blocks(title="Voice-Enabled Chatbot", css=".autoplay-audio { display: none }") as interface:
114
+ with gr.Row():
115
+ with gr.Column(scale=2):
116
+ chatbot = gr.Chatbot(
117
+ label="Chat History",
118
+ type="messages",
119
+ height=400
120
+ )
121
+ audio_input = gr.Audio(
122
+ sources=["microphone"],
123
+ type="numpy",
124
+ label="Speak Here",
125
+ interactive=True # Ensure it stays interactive
126
+ )
127
+ audio_output = gr.Audio(
128
+ label="Assistant Response",
129
+ autoplay=True,
130
+ format="wav", # Explicit format
131
+ elem_id="audio-output" # Add ID for JS control
132
+ )
133
+
134
+ # Add JavaScript to force audio reload
135
+ interface.load(
136
+ None,
137
+ None,
138
+ None,
139
+ _js="""
140
+ () => {
141
+ function reloadAudio() {
142
+ const audio = document.querySelector('#audio-output audio');
143
+ if (audio) {
144
+ const source = audio.querySelector('source');
145
+ if (source) {
146
+ source.src += '#' + Date.now();
147
+ audio.load();
148
+ }
149
+ }
150
+ }
151
+ setInterval(reloadAudio, 500);
152
+ }
153
+ """
154
+ )
155
+
156
+ # State for managing chat history
157
+ state = gr.State([])
158
+
159
+ # On page load, play welcome message and show initial chat history
160
+ interface.load(
161
+ fn=self.welcome_audio,
162
+ outputs=[chatbot, audio_output]
163
+ )
164
+
165
+ # When user speaks, process audio and update the chat
166
+ audio_input.change(
167
+ fn=self.process_audio,
168
+ inputs=[audio_input, state],
169
+ outputs=[chatbot, audio_output, audio_input],
170
+ api_name="process_audio"
171
+ ).then(
172
+ lambda: None,
173
+ None,
174
+ audio_input,
175
+ queue=False
176
+ )
177
+
178
+ interface.launch(
179
+ server_name="127.0.0.1",
180
+ server_port=7860,
181
+ share=True,
182
+ debug=True
183
+ )
184
+
185
+ if __name__ == "__main__":
186
+ app = VoiceChatApp()
187
+ app.launch()
app/scratch.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ from utils.text_to_speech import TextToSpeech # Update with your actual module path
3
+ import soundfile as sf
4
+
5
+ async def main():
6
+ tts = TextToSpeech()
7
+ text = "This is a test voice output"
8
+
9
+ # Get audio data
10
+ sample_rate, audio_data = await tts.synthesize(text)
11
+
12
+ # Save to file (WAV format)
13
+ sf.write("test_output.wav", audio_data, sample_rate)
14
+ print("Saved test_output.wav")
15
+
16
+ if __name__ == "__main__":
17
+ asyncio.run(main())
app/utils/__init__.py ADDED
File without changes
app/utils/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (152 Bytes). View file
 
app/utils/__pycache__/chat_model.cpython-312.pyc ADDED
Binary file (1.61 kB). View file
 
app/utils/__pycache__/med42.cpython-312.pyc ADDED
Binary file (5.42 kB). View file
 
app/utils/__pycache__/speech_to_text.cpython-312.pyc ADDED
Binary file (5.38 kB). View file
 
app/utils/__pycache__/text_to_speech.cpython-312.pyc ADDED
Binary file (1.58 kB). View file
 
app/utils/chat_model.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+
4
+ class ChatModel:
5
+ def __init__(self):
6
+ self.tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1", token=True)
7
+ self.model = AutoModelForCausalLM.from_pretrained(
8
+ "mistralai/Mistral-7B-v0.1",
9
+ torch_dtype=torch.float16,
10
+ token=True
11
+ )
12
+
13
+ async def generate_response(self, input_text):
14
+ inputs = self.tokenizer(input_text, return_tensors="pt").to(self.model.device)
15
+ outputs = self.model.generate(
16
+ **inputs,
17
+ max_length=100,
18
+ num_return_sequences=1,
19
+ temperature=0.7
20
+ )
21
+ response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
22
+ return response
app/utils/med42.py ADDED
@@ -0,0 +1,124 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Import relevant packages
2
+ import requests
3
+ import yaml
4
+ from typing import List, Union
5
+ from pathlib import Path
6
+
7
+
8
+ class Med42():
9
+ """
10
+ A class for interacting with the Med42 API
11
+ """
12
+ def __init__(self, base_endpoint: str = "https://dev-openai-api.med42.ai/",
13
+ prompt_filepath: Union[str, Path, None] = "./prompts.yaml"):
14
+ """
15
+ Initialise the Med42 API caller object
16
+
17
+ Args:
18
+ base_endpoint: URL address API calls are hosted. Defaults to "http://dev-openai-api.med42.ai/".
19
+ prompt_filepath: Filepath to prompts catalogue in yaml format. Defaults to "./prompts.yaml".
20
+ """
21
+ self.base_endpoint = base_endpoint
22
+ self.docs_endpoint = self.base_endpoint + "docs"
23
+ # self.available_models = self._list_model_ids()
24
+ self.prompt_catalogue = load_yaml(prompt_filepath) if prompt_filepath is not None else None
25
+
26
+
27
+ def __repr__(self):
28
+ """
29
+ Returns: Description of this object class including a link to API docs page
30
+ """
31
+ return f"Med42 API calls available, see docs at {self.docs_endpoint}"
32
+
33
+
34
+ def list_models(self) -> List[dict]:
35
+ """
36
+ Pull information on all the models currently hosted on API page
37
+
38
+ Returns:
39
+ List of models available and their details
40
+ """
41
+ try:
42
+ response = requests.get(self.base_endpoint + "v1/models")
43
+ response.raise_for_status() # raise an exception for HTTP errors
44
+ return response.json()['data']
45
+
46
+ except requests.RequestException as e:
47
+ print("Error: ", e)
48
+
49
+
50
+ def _list_model_ids(self) -> List[str]:
51
+ """
52
+ Returns: List of currently available model ids
53
+ """
54
+ return [model_entry['id'] for model_entry in self.list_models()]
55
+
56
+
57
+ def _update_prompt_catalogue(self, prompt_filepath: Union[str, Path] = "./prompts.yaml") -> dict:
58
+ """
59
+ Read in catalogue of prompts (recorded in a yaml file) and update attribute
60
+
61
+ Args:
62
+ prompt_filepath: filepath to catalogue of prompts to be read. Defaults to "./prompts.yaml"
63
+ """
64
+ self.prompt_catalogue = load_yaml(prompt_filepath)
65
+ print(f"Prompt catalogue updated to be the prompts from {prompt_filepath}")
66
+
67
+
68
+ def chat_completion(self, system_instruct: str, user_instruct: str, llm_model: str = "Llama3-Med42-70B-32k",
69
+ temp: float = 0.7, generate_log: bool = False) -> str:
70
+ """
71
+ Uses chat completion functionality
72
+
73
+ Args:
74
+ system_instruct: System instruction
75
+ user_instruct: User instruction
76
+ llm_model: Language model to use. Defaults to "Mixtral-8x7B-Instruct-v0.1"
77
+ temp: Temperature parameter for generating responses. Defaults to 0.7
78
+ generate_log: Whether to log the response or not. Defaults to False
79
+
80
+ Returns:
81
+ LLM output if log is False, otherwise a dictionary containing system instruction, user instruction, and LLM call response
82
+ """
83
+ return "hardcoded response because Med42 is not working I don't think. Just making it longer"
84
+ data = {"model": llm_model, "messages": [{"role": "system", "content": system_instruct},
85
+ {"role": "user", "content": f"{user_instruct}"}], "temperature": temp}
86
+
87
+ headers = {"Content-Type": "application/json"}
88
+
89
+ try:
90
+ response = requests.post(self.base_endpoint + "v1/chat/completions", json=data, headers=headers)
91
+ response.raise_for_status()
92
+ if not generate_log:
93
+ return response.json()["choices"][0]["message"]["content"] # LLM chat completion output
94
+ else:
95
+ # LLM logged response plus chat completion output (tuple outputted)
96
+ return {"system_instruction": system_instruct, "user_instruction": user_instruct, "llm_call_response": response.json()}, response.json()["choices"][0]["message"]["content"]
97
+ except requests.RequestException as e:
98
+ print("Error: ", e)
99
+
100
+
101
+ # Util functions
102
+ def load_yaml(yaml_filepath: str | Path) -> dict:
103
+ """
104
+ Read in YAML files
105
+
106
+ Args:
107
+ yaml_filepath: filepath to yaml file to be read
108
+
109
+ Returns:
110
+ Contents of the YAML file
111
+ """
112
+ try:
113
+ with open(yaml_filepath, 'r') as file:
114
+ data = yaml.safe_load(file)
115
+ return data
116
+ except FileNotFoundError:
117
+ raise FileNotFoundError(f"File not found: {yaml_filepath}")
118
+ except IOError as e:
119
+ raise IOError(f"Error reading file: {e}")
120
+ except yaml.YAMLError as e:
121
+ raise ValueError(f"Error parsing YAML file: {e}")
122
+
123
+ if __name__ == "__main__":
124
+ llm_caller = Med42()
app/utils/speech_to_text.py ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline
2
+ import numpy as np
3
+ import torch
4
+ import librosa
5
+
6
+ class ASRConfig:
7
+ """Configuration class for ASR transcription."""
8
+ def __init__(
9
+ self,
10
+ model_id="openai/whisper-large-v2",
11
+ language="english",
12
+ sampling_rate=16000,
13
+ device="cuda" if torch.cuda.is_available() else "cpu",
14
+ torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
15
+ ):
16
+ self.model_id = model_id
17
+ self.language = language
18
+ self.sampling_rate = sampling_rate
19
+ self.device = device
20
+ self.torch_dtype = torch_dtype
21
+
22
+ class SpeechRecognizer:
23
+ def __init__(self, config: ASRConfig = None):
24
+ self.config = config if config else ASRConfig()
25
+ print(f"Using ASR configuration: {self.config.__dict__}")
26
+ self._setup_model()
27
+
28
+ def _setup_model(self):
29
+ """Initialize Whisper model and processor."""
30
+ try:
31
+ self.model = AutoModelForSpeechSeq2Seq.from_pretrained(
32
+ self.config.model_id,
33
+ torch_dtype=self.config.torch_dtype,
34
+ use_safetensors=True,
35
+ ).to(self.config.device)
36
+
37
+ self.processor = AutoProcessor.from_pretrained(self.config.model_id)
38
+ self.pipe = pipeline(
39
+ "automatic-speech-recognition",
40
+ model=self.model,
41
+ tokenizer=self.processor.tokenizer,
42
+ feature_extractor=self.processor.feature_extractor,
43
+ torch_dtype=self.config.torch_dtype,
44
+ device=self.config.device,
45
+ )
46
+ except Exception as e:
47
+ raise RuntimeError(f"Failed to set up Whisper model: {str(e)}")
48
+
49
+ async def transcribe(self, audio: tuple, prompt: str = None) -> str:
50
+ """
51
+ Transcribes the provided audio using the Whisper pipeline.
52
+
53
+ Args:
54
+ audio (tuple): A tuple containing (sample_rate, audio_array).
55
+ prompt (str): An optional text prompt to guide transcription.
56
+
57
+ Returns:
58
+ str: Transcription of the audio.
59
+ """
60
+ if not audio or len(audio) != 2:
61
+ raise ValueError("Invalid audio input. Expected a tuple (sample_rate, audio_array).")
62
+
63
+ try:
64
+ # Extract the raw audio data (audio_array) from the input tuple
65
+ sample_rate, audio_array = audio
66
+
67
+ # Ensure the audio is a numpy array and has the expected format
68
+ if not isinstance(audio_array, np.ndarray):
69
+ raise TypeError(f"Expected numpy.ndarray for audio data, got {type(audio_array)}")
70
+
71
+ # Ensure the audio array is in floating-point format
72
+ if audio_array.dtype != np.float32:
73
+ audio_array = audio_array.astype(np.float32) / np.iinfo(audio_array.dtype).max
74
+
75
+ # Resample audio if the sample rate differs from the configured rate
76
+ if sample_rate != self.config.sampling_rate:
77
+ import librosa
78
+ audio_array = librosa.resample(audio_array, orig_sr=sample_rate, target_sr=self.config.sampling_rate)
79
+
80
+ # Prepare generate_kwargs for the pipeline
81
+ generate_kwargs = {}
82
+ if self.config.language:
83
+ generate_kwargs["language"] = self.config.language
84
+ if prompt:
85
+ prompt_ids = self.processor.get_prompt_ids(prompt, return_tensors="pt").to(self.config.device)
86
+ generate_kwargs["prompt_ids"] = prompt_ids
87
+
88
+ # Run transcription through the pipeline
89
+ result = self.pipe(audio_array, generate_kwargs=generate_kwargs)
90
+ return result["text"].strip()
91
+ except Exception as e:
92
+ raise RuntimeError(f"Transcription failed: {str(e)}")
app/utils/text_to_speech.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import edge_tts
2
+ import io
3
+ import numpy as np
4
+ from pydub import AudioSegment # Install with: pip install pydub
5
+
6
+ class TextToSpeech:
7
+ def __init__(self, voice="en-US-AriaNeural"):
8
+ self.voice = voice
9
+
10
+ async def synthesize(self, text):
11
+ communicate = edge_tts.Communicate(text, self.voice)
12
+
13
+ # Collect raw audio bytes
14
+ audio_bytes = bytearray()
15
+ async for chunk in communicate.stream():
16
+ if chunk["type"] == "audio":
17
+ audio_bytes.extend(chunk["data"])
18
+
19
+ # Convert to numpy array using pydub
20
+ audio = AudioSegment.from_file(io.BytesIO(audio_bytes), format="mp3")
21
+ samples = np.array(audio.get_array_of_samples())
22
+ sample_rate = audio.frame_rate
23
+
24
+ return (sample_rate, samples)
environment.yml ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: gencent_env
2
+ channels:
3
+ - pytorch
4
+ - conda-forge
5
+ - defaults
6
+ dependencies:
7
+ - aiohappyeyeballs=2.4.4=py312hca03da5_0
8
+ - aiohttp=3.11.10=py312h80987f9_0
9
+ - aiosignal=1.2.0=pyhd3eb1b0_0
10
+ - altair=5.0.1=py312hca03da5_0
11
+ - arrow-cpp=16.1.0=hbc20fb2_0
12
+ - attrs=24.2.0=py312hca03da5_0
13
+ - aws-c-auth=0.6.19=h80987f9_0
14
+ - aws-c-cal=0.5.20=h80987f9_0
15
+ - aws-c-common=0.8.5=h80987f9_0
16
+ - aws-c-compression=0.2.16=h80987f9_0
17
+ - aws-c-event-stream=0.2.15=h313beb8_0
18
+ - aws-c-http=0.6.25=h80987f9_0
19
+ - aws-c-io=0.13.10=h80987f9_0
20
+ - aws-c-mqtt=0.7.13=h80987f9_0
21
+ - aws-c-s3=0.1.51=h80987f9_0
22
+ - aws-c-sdkutils=0.1.6=h80987f9_0
23
+ - aws-checksums=0.1.13=h80987f9_0
24
+ - aws-crt-cpp=0.18.16=h313beb8_0
25
+ - aws-sdk-cpp=1.10.55=h313beb8_0
26
+ - blas=1.0=openblas
27
+ - blinker=1.6.2=py312hca03da5_0
28
+ - boost-cpp=1.82.0=h48ca7d4_2
29
+ - bottleneck=1.4.2=py312ha86b861_0
30
+ - brotli-python=1.0.9=py312h313beb8_8
31
+ - bzip2=1.0.8=h80987f9_6
32
+ - c-ares=1.19.1=h80987f9_0
33
+ - ca-certificates=2024.11.26=hca03da5_0
34
+ - cachetools=5.3.3=py312hca03da5_0
35
+ - certifi=2024.12.14=py312hca03da5_0
36
+ - cffi=1.17.1=py312h3eb5a62_0
37
+ - charset-normalizer=3.3.2=pyhd3eb1b0_0
38
+ - click=8.1.7=py312hca03da5_0
39
+ - datasets=3.2.0=pyhd8ed1ab_0
40
+ - dill=0.3.8=py312hca03da5_0
41
+ - filelock=3.13.1=py312hca03da5_0
42
+ - freetype=2.12.1=h1192e45_0
43
+ - frozenlist=1.5.0=py312h80987f9_0
44
+ - fsspec=2024.6.1=py312hca03da5_0
45
+ - gettext=0.22.5=h8414b35_3
46
+ - gettext-tools=0.22.5=h8414b35_3
47
+ - gflags=2.2.2=h313beb8_1
48
+ - gitdb=4.0.7=pyhd3eb1b0_0
49
+ - gitpython=3.1.43=py312hca03da5_0
50
+ - glog=0.5.0=h313beb8_1
51
+ - huggingface_hub=0.26.5=pyhd8ed1ab_1
52
+ - icu=73.1=h313beb8_0
53
+ - idna=3.7=py312hca03da5_0
54
+ - jinja2=3.1.4=py312hca03da5_1
55
+ - jpeg=9e=h80987f9_3
56
+ - jsonschema=4.23.0=py312hca03da5_0
57
+ - jsonschema-specifications=2023.7.1=py312hca03da5_0
58
+ - krb5=1.20.1=hf3e1bf2_1
59
+ - lame=3.100=h1a28f6b_0
60
+ - lcms2=2.12=hba8e193_0
61
+ - lerc=3.0=hc377ac9_0
62
+ - libabseil=20240116.2=cxx17_h313beb8_0
63
+ - libasprintf=0.22.5=h8414b35_3
64
+ - libasprintf-devel=0.22.5=h8414b35_3
65
+ - libboost=1.82.0=h0bc93f9_2
66
+ - libbrotlicommon=1.0.9=h80987f9_8
67
+ - libbrotlidec=1.0.9=h80987f9_8
68
+ - libbrotlienc=1.0.9=h80987f9_8
69
+ - libcurl=8.9.1=h3e2b118_0
70
+ - libcxx=19.1.6=ha82da77_1
71
+ - libdeflate=1.17=h80987f9_1
72
+ - libedit=3.1.20230828=h80987f9_0
73
+ - libev=4.33=h1a28f6b_1
74
+ - libevent=2.1.12=h02f6b3c_1
75
+ - libexpat=2.6.3=hf9b8971_0
76
+ - libffi=3.4.4=hca03da5_1
77
+ - libflac=1.4.3=hb765f3a_0
78
+ - libgettextpo=0.22.5=h8414b35_3
79
+ - libgettextpo-devel=0.22.5=h8414b35_3
80
+ - libgfortran=5.0.0=11_3_0_hca03da5_28
81
+ - libgfortran5=11.3.0=h009349e_28
82
+ - libgrpc=1.62.2=h62f6fdd_0
83
+ - libiconv=1.17=h0d3ecfb_2
84
+ - libintl=0.22.5=h8414b35_3
85
+ - libintl-devel=0.22.5=h8414b35_3
86
+ - libnghttp2=1.57.0=h62f6fdd_0
87
+ - libogg=1.3.5=h1a28f6b_1
88
+ - libopenblas=0.3.21=h269037a_0
89
+ - libopus=1.3.1=h80987f9_1
90
+ - libpng=1.6.39=h80987f9_0
91
+ - libprotobuf=4.25.3=h514c7bf_0
92
+ - libsndfile=1.2.2=h9739721_1
93
+ - libsqlite=3.46.0=hfb93653_0
94
+ - libssh2=1.11.0=h3e2b118_0
95
+ - libthrift=0.15.0=h73c2103_2
96
+ - libtiff=4.5.1=h313beb8_0
97
+ - libvorbis=1.3.7=h1a28f6b_0
98
+ - libwebp-base=1.3.2=h80987f9_1
99
+ - libzlib=1.2.13=hfb2fe0b_6
100
+ - llvm-openmp=14.0.6=hc6e5704_0
101
+ - lz4-c=1.9.4=h313beb8_1
102
+ - markdown-it-py=2.2.0=py312hca03da5_1
103
+ - markupsafe=2.1.3=py312h80987f9_0
104
+ - mdurl=0.1.0=py312hca03da5_0
105
+ - mpg123=1.32.9=hf642e45_0
106
+ - mpmath=1.3.0=py312hca03da5_0
107
+ - multidict=6.1.0=py312h80987f9_0
108
+ - multiprocess=0.70.15=py312hca03da5_0
109
+ - ncurses=6.4=h313beb8_0
110
+ - networkx=3.3=py312hca03da5_0
111
+ - numexpr=2.10.1=py312h5d9532f_0
112
+ - numpy=1.26.4=py312h7f4fdc5_0
113
+ - numpy-base=1.26.4=py312he047099_0
114
+ - openjpeg=2.5.2=h54b8e55_0
115
+ - openssl=3.4.0=h39f12f2_0
116
+ - orc=2.0.1=h937ddfc_0
117
+ - packaging=24.1=py312hca03da5_0
118
+ - pandas=2.2.2=py312hd77ebd4_0
119
+ - pillow=10.4.0=py312h80987f9_0
120
+ - pip=24.2=py312hca03da5_0
121
+ - portaudio=19.7.0=h5833ebf_0
122
+ - propcache=0.2.0=py312h80987f9_0
123
+ - protobuf=4.25.3=py312h8472c4a_0
124
+ - pyarrow=16.1.0=py312hd77ebd4_0
125
+ - pycparser=2.21=pyhd3eb1b0_0
126
+ - pydeck=0.8.0=py312hca03da5_2
127
+ - pygments=2.15.1=py312hca03da5_1
128
+ - pysocks=1.7.1=py312hca03da5_0
129
+ - pysoundfile=0.12.1=pyhd8ed1ab_3
130
+ - python=3.12.2=hdf0ec26_0_cpython
131
+ - python-dateutil=2.9.0post0=py312hca03da5_2
132
+ - python-dotenv=1.0.1=pyhd8ed1ab_1
133
+ - python-sounddevice=0.5.0=pyhd8ed1ab_0
134
+ - python-tzdata=2023.3=pyhd3eb1b0_0
135
+ - python-xxhash=2.0.2=py312h80987f9_1
136
+ - python_abi=3.12=5_cp312
137
+ - pytorch=2.5.1=py3.12_0
138
+ - pytz=2024.1=py312hca03da5_0
139
+ - pyyaml=6.0.2=py312h80987f9_0
140
+ - re2=2022.04.01=hc377ac9_0
141
+ - readline=8.2=h1a28f6b_0
142
+ - referencing=0.30.2=py312hca03da5_0
143
+ - regex=2024.9.11=py312h80987f9_0
144
+ - requests=2.32.3=py312hca03da5_1
145
+ - rich=13.7.1=py312hca03da5_0
146
+ - rpds-py=0.10.6=py312h2aea54e_1
147
+ - safetensors=0.4.5=py312h7805bc0_1
148
+ - setuptools=75.1.0=py312hca03da5_0
149
+ - six=1.16.0=pyhd3eb1b0_1
150
+ - smmap=4.0.0=pyhd3eb1b0_0
151
+ - snappy=1.2.1=h313beb8_0
152
+ - sqlite=3.45.3=h80987f9_0
153
+ - streamlit=1.38.0=py312hca03da5_0
154
+ - tenacity=8.2.3=py312hca03da5_0
155
+ - tk=8.6.14=h6ba3021_0
156
+ - tokenizers=0.21.0=py312hf3e4074_0
157
+ - toml=0.10.2=pyhd3eb1b0_0
158
+ - toolz=0.12.0=py312hca03da5_0
159
+ - tornado=6.4.1=py312h80987f9_0
160
+ - tqdm=4.66.5=py312h989b03a_0
161
+ - transformers=4.47.1=pyhd8ed1ab_0
162
+ - tzdata=2024b=h04d1e81_0
163
+ - urllib3=2.2.3=py312hca03da5_0
164
+ - utf8proc=2.6.1=h80987f9_1
165
+ - wheel=0.44.0=py312hca03da5_0
166
+ - xxhash=0.8.0=h1a28f6b_3
167
+ - xz=5.4.6=h80987f9_1
168
+ - yaml=0.2.5=h1a28f6b_0
169
+ - yarl=1.18.0=py312h80987f9_0
170
+ - zlib=1.2.13=hfb2fe0b_6
171
+ - zstd=1.5.6=hfb09047_0
172
+ - pip:
173
+ - aiofiles==23.2.1
174
+ - annotated-types==0.7.0
175
+ - anyio==4.7.0
176
+ - edge-tts==7.0.0
177
+ - fastapi==0.115.6
178
+ - ffmpy==0.5.0
179
+ - gradio==5.9.1
180
+ - gradio-client==1.5.2
181
+ - h11==0.14.0
182
+ - httpcore==1.0.7
183
+ - httpx==0.28.1
184
+ - hyperpyyaml==1.2.2
185
+ - joblib==1.4.2
186
+ - orjson==3.10.13
187
+ - pydantic==2.10.4
188
+ - pydantic-core==2.27.2
189
+ - pydub==0.25.1
190
+ - python-multipart==0.0.20
191
+ - ruamel-yaml==0.18.7
192
+ - ruamel-yaml-clib==0.2.12
193
+ - ruff==0.8.4
194
+ - safehttpx==0.1.6
195
+ - scipy==1.14.1
196
+ - semantic-version==2.10.0
197
+ - sentencepiece==0.2.0
198
+ - shellingham==1.5.4
199
+ - sniffio==1.3.1
200
+ - speechbrain==1.0.2
201
+ - srt==3.5.3
202
+ - starlette==0.41.3
203
+ - sympy==1.13.1
204
+ - tabulate==0.9.0
205
+ - tomlkit==0.13.2
206
+ - torchaudio==2.5.1
207
+ - typer==0.15.1
208
+ - typing-extensions==4.12.2
209
+ - uvicorn==0.34.0
210
+ - websockets==14.1
211
+ prefix: /Users/hamza/miniconda3/envs/gencent_env
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Not sure version numbers here are actually correct
2
+ gradio==3.50.2
3
+ speechbrain==1.0.2
4
+ transformers==4.21.0
5
+ python-dotenv==0.19.0
6
+ sounddevice==0.4.3
7
+ soundfile==0.10.3.post1
8
+ edge-tts==4.0.0
9
+ # flask==2.0.1
10
+ torch==2.3.1
test_output.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1260f6a6c0583fec3824fc8851c44c1adf2ac7b630fa09d9cb38dc65b5286c
3
+ size 130220