File size: 776 Bytes
7b74407
1c817fd
 
7b74407
1c817fd
e83e49f
7b74407
 
 
1c817fd
7b74407
 
1c817fd
7b74407
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
import os, uuid
from flask import jsonify, send_file, request
from main import *
import torch, torchaudio

def speech_to_text_func(audio_path):
    if stt_model is None: return {"error": "STT model not initialized."}
    waveform, sample_rate = torchaudio.load(audio_path); 
    if waveform.ndim > 1: waveform = torch.mean(waveform, dim=0, keepdim=True)
    waveform = waveform.to(device)
    with torch.no_grad(): logits = stt_model(waveform)
    predicted_ids = torch.argmax(logits, dim=-1); transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist()); return {"text": transcription}

def stt_api(audio_filepath):
    output = speech_to_text_func(audio_filepath)
    if "error" in output: return {"error": output["error"]}
    return output