Hhhh / stt_api.py
Hjgugugjhuhjggg's picture
Upload 27 files
1c817fd verified
raw
history blame
1.35 kB
import os
import uuid
from flask import jsonify, send_file, request
from main import *
#from main import import stt_model, device
import torch
import torchaudio
def speech_to_text_func(audio_path, output_path="output_stt.txt"):
if stt_model is None:
return "STT model not initialized."
waveform, sample_rate = torchaudio.load(audio_path)
if waveform.ndim > 1:
waveform = torch.mean(waveform, dim=0, keepdim=True)
waveform = waveform.to(device)
with torch.no_grad():
logits = stt_model(waveform)
predicted_ids = torch.argmax(logits, dim=-1)
transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist())
with open(output_path, "w") as file:
file.write(transcription)
return output_path
def stt_api():
if 'audio' not in request.files:
return jsonify({"error": "Audio file is required"}), 400
audio_file = request.files['audio']
temp_audio_path = f"temp_audio_{uuid.uuid4()}.wav"
audio_file.save(temp_audio_path)
output_file = speech_to_text_func(temp_audio_path)
os.remove(temp_audio_path)
if output_file == "STT model not initialized.":
return jsonify({"error": "STT failed"}), 500
return send_file(output_file, mimetype="text/plain", as_attachment=True, download_name="output.txt")