import os import uuid from flask import jsonify, send_file, request from main import * import torch import torchaudio def speech_to_text_func(audio_path, output_path="output_stt.txt"): if stt_model is None: return "STT model not initialized." waveform, sample_rate = torchaudio.load(audio_path) if waveform.ndim > 1: waveform = torch.mean(waveform, dim=0, keepdim=True) waveform = waveform.to(device) with torch.no_grad(): logits = stt_model(waveform) predicted_ids = torch.argmax(logits, dim=-1) transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist()) with open(output_path, "w") as file: file.write(transcription) return output_path def stt_api(): if 'audio' not in request.files: return jsonify({"error": "Audio file is required"}), 400 audio_file = request.files['audio'] temp_audio_path = f"temp_audio_{uuid.uuid4()}.wav" audio_file.save(temp_audio_path) output_file = speech_to_text_func(temp_audio_path) os.remove(temp_audio_path) if output_file == "STT model not initialized.": return jsonify({"error": "STT failed"}), 500 return send_file(output_file, mimetype="text/plain", as_attachment=True, download_name="output.txt")