Spaces:
Running
Running
import os, uuid | |
from flask import jsonify, send_file, request | |
from main import * | |
import torch, torchaudio | |
def speech_to_text_func(audio_path): | |
if stt_model is None: return {"error": "STT model not initialized."} | |
waveform, sample_rate = torchaudio.load(audio_path); | |
if waveform.ndim > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) | |
waveform = waveform.to(device) | |
with torch.no_grad(): logits = stt_model(waveform) | |
predicted_ids = torch.argmax(logits, dim=-1); transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist()); return {"text": transcription} | |
def stt_api(audio_filepath): | |
output = speech_to_text_func(audio_filepath) | |
if "error" in output: return {"error": output["error"]} | |
return output | |