File size: 1,351 Bytes
1c817fd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
import uuid
from flask import jsonify, send_file, request
from main import *
#from main import import stt_model, device
import torch
import torchaudio

def speech_to_text_func(audio_path, output_path="output_stt.txt"):
    if stt_model is None:
        return "STT model not initialized."

    waveform, sample_rate = torchaudio.load(audio_path)
    if waveform.ndim > 1:
        waveform = torch.mean(waveform, dim=0, keepdim=True)
    waveform = waveform.to(device)
    with torch.no_grad():
        logits = stt_model(waveform)
    predicted_ids = torch.argmax(logits, dim=-1)
    transcription = stt_model.tokenizer.decode(predicted_ids[0].cpu().tolist())

    with open(output_path, "w") as file:
        file.write(transcription)
    return output_path

def stt_api():
    if 'audio' not in request.files:
        return jsonify({"error": "Audio file is required"}), 400
    audio_file = request.files['audio']
    temp_audio_path = f"temp_audio_{uuid.uuid4()}.wav"
    audio_file.save(temp_audio_path)
    output_file = speech_to_text_func(temp_audio_path)
    os.remove(temp_audio_path)
    if output_file == "STT model not initialized.":
        return jsonify({"error": "STT failed"}), 500
    return send_file(output_file, mimetype="text/plain", as_attachment=True, download_name="output.txt")