Spaces:
Running
Running
File size: 776 Bytes
7b74407 1c817fd 7b74407 1c817fd e83e49f 7b74407 1c817fd 7b74407 1c817fd 7b74407 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 |
import os, uuid
from flask import jsonify, send_file, request
from main import *
import torch, torchaudio
def speech_to_text_func(audio_path):
if stt_model is None: return {"error": "STT model not initialized."}
waveform, sample_rate = torchaudio.load(audio_path);
if waveform.ndim > 1: waveform = torch.mean(waveform, dim=0, keepdim=True)
waveform = waveform.to(device)
with torch.no_grad(): logits = stt_model(waveform)
predicted_ids = torch.argmax(logits, dim=-1); transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist()); return {"text": transcription}
def stt_api(audio_filepath):
output = speech_to_text_func(audio_filepath)
if "error" in output: return {"error": output["error"]}
return output
|