Sajjo commited on
Commit
1500574
·
1 Parent(s): df50271

Add .ipynb_checkpoints to .gitignore

Browse files
Files changed (2) hide show
  1. app.py +46 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import gradio as gr
3
+ import torchaudio
4
+ from transformers import AutoModel, AutoProcessor
5
+ from quanto import qint8, quantize, freeze
6
+
7
+ # Load and quantize the model
8
+ model_name = "cdactvm/w2v-bert-punjabi"
9
+ model = AutoModel.from_pretrained(model_name)
10
+ processor = AutoProcessor.from_pretrained(model_name)
11
+
12
+ # Quantization
13
+ quantize(model, weights=qint8, activations=None)
14
+ freeze(model)
15
+
16
+ # Audio transcription function
17
+ def transcribe(audio):
18
+ waveform, sample_rate = torchaudio.load(audio)
19
+
20
+ # Ensure 16kHz sample rate
21
+ if sample_rate != 16000:
22
+ waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform)
23
+
24
+ # Process audio
25
+ inputs = processor(waveform.squeeze(0), sampling_rate=16000, return_tensors="pt")
26
+
27
+ # Run inference
28
+ with torch.no_grad():
29
+ logits = model(**inputs).logits
30
+
31
+ # Decode transcription
32
+ predicted_ids = torch.argmax(logits, dim=-1)
33
+ transcription = processor.batch_decode(predicted_ids)[0]
34
+
35
+ return transcription
36
+
37
+ # Gradio UI
38
+ iface = gr.Interface(
39
+ fn=transcribe,
40
+ inputs=gr.Audio(source="upload", type="filepath"),
41
+ outputs="text",
42
+ title="Punjabi Speech Recognition",
43
+ description="Upload an audio file and get a Punjabi transcription using a quantized model.",
44
+ )
45
+
46
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ torchaudio
4
+ quanto
5
+ gradio