Model Card for Model ID
Accuracy : ~91%
Note: The model may sometimes confuse closely related dialects.
Inference
# load the model
import torch
from transformers import AutoModelForAudioClassification
from transformers import AutoFeatureExtractor
import librosa
ckpt = "Respair/Arabic_Vocal_Dialect_Classifier"
feature_extractor = AutoFeatureExtractor.from_pretrained(ckpt)
model = AutoModelForAudioClassification.from_pretrained(ckpt).to('cuda').to(torch.bfloat16) # if your hardware supports it
# inference function
def classify_dialect(audio):
wav = librosa.load(audio, sr=16_000)[0]
inputs = feature_extractor(wav, sampling_rate=16_000, return_tensors="pt").to(torch.bfloat16).to('cuda')
with torch.no_grad():
logits = model(**inputs).logits
predicted_class_ids = torch.argmax(logits).item()
predicted_class_id = predicted_class_ids.item() if hasattr(predicted_class_ids, 'item') else int(predicted_class_ids)
predicted_label = model.config.id2label[predicted_class_id]
return predicted_label
# run the function
classify_dialect("path/to/audio.mp3") # output something like -> "egyptian"
- Downloads last month
- 7
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support