Spaces:

Amr-h
/

English_Dialect_Classifier

Sleeping

App Files Files Community

Amr-h commited on May 31

Commit

70f7db6

0 Parent(s):

ALL

Browse files

Files changed (9) hide show

.gitignore +108 -0
README.md +31 -0
analyze.txt +80 -0
app.py +519 -0
audio_extractor.py +193 -0
chunck_time.py +261 -0
dialect_predector.py +251 -0
pretrained_models/accent-id-commonaccent_ecapa/hyperparams.yaml +1 -0
requirements.txt +13 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,108 @@

+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+# PyInstaller
+#  Usually these files are written by a python script from a template
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+# Jupyter Notebook checkpoints
+.ipynb_checkpoints/
+# IPython history
+profile_default/
+ipython_config.py
+# PyTorch save files
+*.pt
+*.pth
+*.ckpt
+# Torchaudio temp files or cache (if any)
+*.wav
+# Hugging Face Hub cache and tokens
+cache/
+hf_home/
+*.json
+*.token
+# VS Code settings
+.vscode/
+# IDE settings
+.idea/
+*.iml
+# macOS files
+.DS_Store
+# Logs and temp files
+*.log
+*.tmp
+*.temp
+# Temp directories used in your code
+tmp/
+temp/
+temp_dir/
+tmp_dir/
+# yt-dlp downloads (temporary audio files)
+*.m4a
+*.mp3
+*.webm
+*.mkv
+# dotenv environment variables file
+.env
+# Python virtual environments
+venv/
+env/
+.venv/
+.env/
+# Misc
+*.bak
+*.swp

README.md ADDED Viewed

	@@ -0,0 +1,31 @@

+---
+title: AI Accent Analyzer
+emoji: 🎤
+colorFrom: blue
+colorTo: purple
+sdk: streamlit
+sdk_version: 1.28.1
+app_file: app.py
+pinned: false
+license: mit
+---
+# 🎤 AI Accent Analyzer
+Analyze accents from YouTube videos using advanced AI models with confidence-based filtering.
+## Features
+- 🎥 YouTube video support (including Shorts)
+- 🧠 SpeechBrain AI model for accent detection
+- 📊 Confidence-based filtering (configurable threshold)
+- ⚡ Early stopping mechanism
+- 📈 Interactive visualizations
+- 📥 Export results (CSV/JSON)
+## How to Use
+1. Paste a YouTube video URL
+2. Adjust confidence threshold if needed
+3. Click "Analyze Accent"
+4. View detailed results and visualizations
+Built with Streamlit, SpeechBrain, and Plotly.

analyze.txt ADDED Viewed

	@@ -0,0 +1,80 @@

+============================================================
+🎵 Extracting and preparing audio...
+[⏱️] Audio extraction took 11.33 seconds.
+✅ Audio prepared in 11.56s | Duration: 1.8 minutes
+🧠 Loading model...
+✅ Model loaded in 1.63s
+============================================================
+📊 CHUNK SIZE ANALYSIS RESULTS
+============================================================
+🧩 Testing 10-second chunks...
+CategoricalEncoder.expect_len was never called: assuming category count of 16 to be correct! Sanity check your encoder using `.expect_len`. Ensure that downstream code also uses the correct size. If you are sure this does not apply to you, use `.ignore_len`.
+  📦 Chunks created: 11
+  ⏱️  Chunking time: 0.001s
+  🧠 Prediction time: 23.477s
+  🔄 Total processing: 23.478s
+  ⚡ Processing rate: 0.5 chunks/sec
+  📈 Avg confidence: 0.666
+  🎯 Most common: indian (5 times)
+  📊 Confidence range: 0.592 - 0.797
+🧩 Testing 15-second chunks...
+  📦 Chunks created: 8
+  ⏱️  Chunking time: 0.000s
+  🧠 Prediction time: 25.102s
+  🔄 Total processing: 25.102s
+  ⚡ Processing rate: 0.3 chunks/sec
+  📈 Avg confidence: 0.681
+  🎯 Most common: england (4 times)
+  📊 Confidence range: 0.602 - 0.849
+🧩 Testing 20-second chunks...
+  📦 Chunks created: 6
+  ⏱️  Chunking time: 0.000s
+  🧠 Prediction time: 26.239s
+  🔄 Total processing: 26.239s
+  ⚡ Processing rate: 0.2 chunks/sec
+  📈 Avg confidence: 0.671
+  🎯 Most common: england (4 times)
+  📊 Confidence range: 0.603 - 0.733
+🧩 Testing 30-second chunks...
+  📦 Chunks created: 4
+  ⏱️  Chunking time: 0.000s
+  🧠 Prediction time: 28.015s
+  🔄 Total processing: 28.015s
+  ⚡ Processing rate: 0.1 chunks/sec
+  📈 Avg confidence: 0.659
+  🎯 Most common: england (2 times)
+  📊 Confidence range: 0.559 - 0.714
+🧩 Testing 60-second chunks...
+  📦 Chunks created: 2
+  ⏱️  Chunking time: 0.000s
+  🧠 Prediction time: 25.356s
+  🔄 Total processing: 25.356s
+  ⚡ Processing rate: 0.1 chunks/sec
+  📈 Avg confidence: 0.714
+  🎯 Most common: indian (2 times)
+  📊 Confidence range: 0.667 - 0.760
+================================================================================
+📈 PERFORMANCE COMPARISON SUMMARY
+================================================================================
+Size   Chunks   Total Time   Rate         Avg Conf   Consistency  Winner
+--------------------------------------------------------------------------------
+10     11       23.478       0.5          0.666      0.90         indian
+15     8        25.102       0.3          0.681      0.88         england
+20     6        26.239       0.2          0.671      0.93         england
+30     4        28.015       0.1          0.659      0.89         england
+60     2        25.356       0.1          0.714      0.91         indian
+============================================================
+🏆 RECOMMENDATIONS
+============================================================
+⚡ Fastest processing: 10s chunks (23.48s total)
+🎯 Highest accuracy: 60s chunks (0.714 avg confidence)
+📊 Most consistent: 20s chunks (0.926 consistency)
+⚖️  Best balance: 60s chunks (score: 42.8)

app.py ADDED Viewed

	@@ -0,0 +1,519 @@

+import streamlit as st
+import pandas as pd
+import plotly.express as px
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+import time
+import re
+from datetime import datetime
+import numpy as np
+from dialect_predector import analyze_video_accent
+# Import your accent analysis function
+# from your_accent_module import analyze_video_accent
+# Page configuration
+st.set_page_config(
+    page_title="🎤 AI Accent Analyzer",
+    page_icon="🎤",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for beautiful styling
+st.markdown("""
+<style>
+    .main-header {
+        background: linear-gradient(90deg, #667eea 0%, #764ba2 100%);
+        padding: 2rem;
+        border-radius: 10px;
+        color: white;
+        text-align: center;
+        margin-bottom: 2rem;
+        box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
+    }
+    .metric-card {
+        background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
+        padding: 1.5rem;
+        border-radius: 10px;
+        box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        margin: 0.5rem 0;
+        border-left: 4px solid #667eea;
+    }
+    .analysis-section {
+        background: white;
+        padding: 1.5rem;
+        border-radius: 10px;
+        box-shadow: 0 2px 8px rgba(0, 0, 0, 0.1);
+        margin: 1rem 0;
+        border: 1px solid #e0e6ed;
+    }
+    .accent-tag {
+        display: inline-block;
+        padding: 0.3rem 0.8rem;
+        margin: 0.2rem;
+        border-radius: 20px;
+        font-weight: bold;
+        font-size: 0.9rem;
+    }
+    .accent-primary {
+        background: linear-gradient(45deg, #667eea, #764ba2);
+        color: white;
+    }
+    .accent-secondary {
+        background: linear-gradient(45deg, #ffecd2, #fcb69f);
+        color: #333;
+    }
+    .processing-animation {
+        display: flex;
+        justify-content: center;
+        align-items: center;
+        padding: 2rem;
+    }
+    .confidence-bar {
+        background: linear-gradient(90deg, #ff6b6b, #feca57, #48cae4, #06ffa5);
+        height: 20px;
+        border-radius: 10px;
+        margin: 0.5rem 0;
+    }
+    .chunk-result {
+        background: #f8f9fa;
+        border-left: 4px solid #28a745;
+        padding: 0.8rem;
+        margin: 0.3rem 0;
+        border-radius: 5px;
+    }
+    .chunk-result.low-confidence {
+        border-left-color: #ffc107;
+    }
+    .sidebar-info {
+        background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+        color: white;
+        padding: 1rem;
+        border-radius: 10px;
+        margin-bottom: 1rem;
+    }
+</style>
+""", unsafe_allow_html=True)
+def validate_url(url):
+    """Validate if the URL is a valid YouTube URL"""
+    youtube_patterns = [
+        r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/',
+        r'(https?://)?(www\.)?youtube\.com/shorts/',
+        r'(https?://)?(www\.)?youtu\.be/'
+    ]
+    for pattern in youtube_patterns:
+        if re.match(pattern, url):
+            return True
+    return False
+def create_confidence_gauge(confidence):
+    """Create a beautiful confidence gauge"""
+    fig = go.Figure(go.Indicator(
+        mode = "gauge+number+delta",
+        value = confidence * 100,
+        domain = {'x': [0, 1], 'y': [0, 1]},
+        title = {'text': "Confidence Score"},
+        delta = {'reference': 70},
+        gauge = {
+            'axis': {'range': [None, 100]},
+            'bar': {'color': "darkblue"},
+            'steps': [
+                {'range': [0, 50], 'color': "lightgray"},
+                {'range': [50, 80], 'color': "yellow"},
+                {'range': [80, 100], 'color': "green"}
+            ],
+            'threshold': {
+                'line': {'color': "red", 'width': 4},
+                'thickness': 0.75,
+                'value': 90
+            }
+        }
+    ))
+    fig.update_layout(height=300, margin=dict(l=20, r=20, t=40, b=20))
+    return fig
+def create_accent_distribution_chart(accent_counts, title="Accent Distribution"):
+    """Create a beautiful pie chart for accent distribution"""
+    if not accent_counts:
+        return None
+    accents = list(accent_counts.keys())
+    counts = list(accent_counts.values())
+    fig = px.pie(
+        values=counts,
+        names=accents,
+        title=title,
+        color_discrete_sequence=px.colors.qualitative.Set3
+    )
+    fig.update_traces(
+        textposition='inside',
+        textinfo='percent+label',
+        hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
+    )
+    fig.update_layout(
+        height=400,
+        margin=dict(l=20, r=20, t=40, b=20),
+        font=dict(size=12)
+    )
+    return fig
+def create_chunk_confidence_chart(chunk_results):
+    """Create a chart showing confidence over chunks"""
+    if not chunk_results:
+        return None
+    df = pd.DataFrame(chunk_results)
+    fig = px.line(
+        df,
+        x='chunk',
+        y='confidence',
+        title='Confidence Score Across Audio Chunks',
+        markers=True,
+        color='accent',
+        hover_data=['accent', 'is_confident']
+    )
+    fig.add_hline(y=0.6, line_dash="dash", line_color="red",
+                  annotation_text="Confidence Threshold (60%)")
+    fig.update_layout(
+        height=400,
+        xaxis_title="Chunk Number",
+        yaxis_title="Confidence Score",
+        margin=dict(l=20, r=20, t=40, b=20)
+    )
+    return fig
+def create_detailed_analysis(result):
+    """Create detailed analysis section"""
+    if not result or not result.get("success"):
+        return
+    st.markdown('<div class="analysis-section">', unsafe_allow_html=True)
+    st.markdown("## 📊 Detailed Analysis")
+    # Key metrics
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.metric(
+            "🎯 Final Accent",
+            result['predicted_accent'],
+            f"{result['confidence_percentage']}"
+        )
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col2:
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.metric(
+            "📦 Chunks Processed",
+            f"{result['processed_chunks_count']}/{result['available_chunks_count']}",
+            f"Confident: {result.get('confident_chunks_count', 0)}"
+        )
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col3:
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        st.metric(
+            "⏱️ Processing Time",
+            f"{result['processing_time']:.1f}s",
+            f"Audio: {result.get('duration_minutes', 0):.1f}min" if result.get('duration_minutes') else ""
+        )
+        st.markdown('</div>', unsafe_allow_html=True)
+    with col4:
+        st.markdown('<div class="metric-card">', unsafe_allow_html=True)
+        early_stopped_text = "Yes ⚡" if result.get('early_stopped') else "No 🔄"
+        st.metric(
+            "🛑 Early Stopped",
+            early_stopped_text,
+            f"Threshold: {result.get('confidence_threshold', 0.6)*100:.0f}%"
+        )
+        st.markdown('</div>', unsafe_allow_html=True)
+    st.markdown('</div>', unsafe_allow_html=True)
+    # Charts section
+    col1, col2 = st.columns(2)
+    with col1:
+        # Confidence gauge
+        gauge_fig = create_confidence_gauge(result['confidence_score'])
+        st.plotly_chart(gauge_fig, use_container_width=True)
+        # Accent distribution (confident predictions)
+        if result.get('confident_accent_counts'):
+            pie_fig = create_accent_distribution_chart(
+                result['confident_accent_counts'],
+                "Confident Predictions Distribution"
+            )
+            if pie_fig:
+                st.plotly_chart(pie_fig, use_container_width=True)
+    with col2:
+        # Chunk confidence over time
+        if result.get('chunk_results'):
+            confidence_fig = create_chunk_confidence_chart(result['chunk_results'])
+            if confidence_fig:
+                st.plotly_chart(confidence_fig, use_container_width=True)
+        # All predictions distribution
+        if result.get('all_accent_counts') and len(result['all_accent_counts']) > 1:
+            all_pie_fig = create_accent_distribution_chart(
+                result['all_accent_counts'],
+                "All Predictions Distribution"
+            )
+            if all_pie_fig:
+                st.plotly_chart(all_pie_fig, use_container_width=True)
+def display_chunk_details(chunk_results, confidence_threshold=0.6):
+    """Display detailed chunk-by-chunk results"""
+    if not chunk_results:
+        return
+    st.markdown("### 🔍 Chunk-by-Chunk Analysis")
+    # Summary statistics
+    confident_chunks = [r for r in chunk_results if r.get('is_confident', r['confidence'] > confidence_threshold)]
+    col1, col2, col3 = st.columns(3)
+    with col1:
+        st.info(f"**Total Chunks:** {len(chunk_results)}")
+    with col2:
+        st.success(f"**Confident Chunks:** {len(confident_chunks)}")
+    with col3:
+        confidence_rate = len(confident_chunks) / len(chunk_results) * 100 if chunk_results else 0
+        st.warning(f"**Confidence Rate:** {confidence_rate:.1f}%")
+    # Detailed results
+    with st.expander("📋 View Detailed Chunk Results", expanded=False):
+        for i, result in enumerate(chunk_results):
+            confidence = result['confidence']
+            is_confident = result.get('is_confident', confidence > confidence_threshold)
+            confidence_emoji = "✅" if is_confident else "⚠️"
+            confidence_class = "" if is_confident else "low-confidence"
+            st.markdown(f"""
+            <div class="chunk-result {confidence_class}">
+                <strong>Chunk {result['chunk']}</strong> {confidence_emoji}<br>
+                <strong>Accent:</strong> {result['accent']}<br>
+                <strong>Confidence:</strong> {confidence:.3f} ({confidence*100:.1f}%)<br>
+                <strong>Status:</strong> {'Confident' if is_confident else 'Low Confidence'}
+            </div>
+            """, unsafe_allow_html=True)
+def main():
+    # Header
+    st.markdown("""
+    <div class="main-header">
+        <h1>🎤 AI Accent Analyzer</h1>
+        <p>Analyze accents from YouTube videos using advanced AI models</p>
+    </div>
+    """, unsafe_allow_html=True)
+    # Sidebar
+    with st.sidebar:
+        st.markdown("""
+        <div class="sidebar-info">
+            <h3>🔧 Configuration</h3>
+            <p>Adjust analysis parameters</p>
+        </div>
+        """, unsafe_allow_html=True)
+        confidence_threshold = st.slider(
+            "🎯 Confidence Threshold",
+            min_value=0.1,
+            max_value=0.9,
+            value=0.6,
+            step=0.05,
+            help="Only predictions above this confidence level are considered reliable"
+        )
+        early_stopping_threshold = st.slider(
+            "⚡ Early Stopping Threshold",
+            min_value=2,
+            max_value=10,
+            value=3,
+            help="Stop processing after this many consecutive confident predictions"
+        )
+        st.markdown("---")
+        st.markdown("""
+        ### 📋 Supported Formats
+        - YouTube videos
+        - YouTube Shorts
+        - YouTube Music
+        - Youtu.be links
+        ### ⚙️ How it works
+        1. **Audio Extraction**: Extracts audio from video
+        2. **Chunking**: Splits audio into manageable segments
+        3. **AI Analysis**: Uses SpeechBrain model for accent detection
+        4. **Confidence Filtering**: Only considers high-confidence predictions
+        5. **Results**: Provides detailed analysis and visualization
+        """)
+    # Main interface
+    st.markdown("## 🔗 Enter Video URL")
+    # URL input with examples
+    col1, col2 = st.columns([3, 1])
+    with col1:
+        video_url = st.text_input(
+            "YouTube Video URL",
+            placeholder="https://www.youtube.com/watch?v=example or https://youtu.be/example",
+            help="Paste any YouTube video URL here"
+        )
+    with col2:
+        st.markdown("**Quick Examples:**")
+        example_urls = [
+            "https://www.youtube.com/shorts/mxMzNp3RfpA",
+            "https://youtu.be/dQw4w9WgXcQ",
+            "https://www.youtube.com/watch?v=example"
+        ]
+        for i, url in enumerate(example_urls):
+            if st.button(f"Example {i+1}", key=f"example_{i}"):
+                st.session_state.example_url = url
+                st.rerun()
+    # Use example URL if selected
+    if hasattr(st.session_state, 'example_url'):
+        video_url = st.session_state.example_url
+        delattr(st.session_state, 'example_url')
+    # URL validation
+    if video_url:
+        if validate_url(video_url):
+            st.success("✅ Valid YouTube URL detected!")
+        else:
+            st.error("❌ Please enter a valid YouTube URL")
+            st.stop()
+    # Analysis button
+    if st.button("🚀 Analyze Accent", type="primary", disabled=not video_url):
+        if not video_url:
+            st.warning("Please enter a video URL first!")
+            return
+        # Progress tracking
+        progress_bar = st.progress(0)
+        status_text = st.empty()
+        try:
+            # Simulate the analysis process with progress updates
+            status_text.text("🔄 Initializing analysis...")
+            progress_bar.progress(10)
+            time.sleep(1)
+            status_text.text("🎵 Extracting audio from video...")
+            progress_bar.progress(30)
+            time.sleep(1)
+            status_text.text("🧠 Loading AI model...")
+            progress_bar.progress(50)
+            time.sleep(1)
+            status_text.text("🔍 Analyzing accent patterns...")
+            progress_bar.progress(80)
+            # Here you would call your actual analysis function
+            # result = analyze_video_accent(video_url, confidence_threshold)
+            # For demo purposes, creating mock result
+            result = analyze_video_accent(video_url, confidence_threshold)
+            progress_bar.progress(100)
+            status_text.text("✅ Analysis complete!")
+            time.sleep(0.5)
+            # Clear progress indicators
+            progress_bar.empty()
+            status_text.empty()
+            # Display results
+            if result["success"]:
+                st.success("🎉 Analysis completed successfully!")
+                # Main result highlight
+                st.markdown(f"""
+                <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+                           color: white; padding: 2rem; border-radius: 15px; text-align: center; margin: 2rem 0;">
+                    <h2>🎤 Detected Accent: {result['predicted_accent']}</h2>
+                    <h3>📊 Confidence: {result['confidence_percentage']}</h3>
+                </div>
+                """, unsafe_allow_html=True)
+                # Detailed analysis
+                create_detailed_analysis(result)
+                # Chunk details
+                if result.get('chunk_results'):
+                    display_chunk_details(result['chunk_results'], confidence_threshold)
+                # Raw data download
+                with st.expander("📥 Download Results", expanded=False):
+                    # Convert results to DataFrame for download
+                    if result.get('chunk_results'):
+                        df = pd.DataFrame(result['chunk_results'])
+                        csv = df.to_csv(index=False)
+                        st.download_button(
+                            label="📊 Download Chunk Results (CSV)",
+                            data=csv,
+                            file_name=f"accent_analysis_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                            mime="text/csv"
+                        )
+                    # JSON download
+                    import json
+                    json_str = json.dumps(result, indent=2, default=str)
+                    st.download_button(
+                        label="📋 Download Full Results (JSON)",
+                        data=json_str,
+                        file_name=f"accent_analysis_full_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json",
+                        mime="application/json"
+                    )
+            else:
+                st.error(f"❌ Analysis failed: {result.get('error', 'Unknown error')}")
+        except Exception as e:
+            progress_bar.empty()
+            status_text.empty()
+            st.error(f"❌ An error occurred during analysis: {str(e)}")
+    # Footer
+    st.markdown("---")
+    st.markdown("""
+    <div style="text-align: center; color: #666; margin-top: 2rem;">
+        <p>🎤 AI Accent Analyzer | Built with Streamlit & SpeechBrain</p>
+        <p>Analyze accents from YouTube videos with confidence-based filtering</p>
+    </div>
+    """, unsafe_allow_html=True)
+if __name__ == "__main__":
+    main()

audio_extractor.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import os
+import sys
+import tempfile
+import warnings
+import time
+import shutil
+import random
+import torch
+import torchaudio
+import yt_dlp
+from contextlib import contextmanager
+warnings.filterwarnings("ignore")
+os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
+@contextmanager
+def suppress_stdout_stderr():
+    with open(os.devnull, "w") as devnull:
+        old_stdout = sys.stdout
+        old_stderr = sys.stderr
+        sys.stdout = devnull
+        sys.stderr = devnull
+        try:
+            yield
+        finally:
+            sys.stdout = old_stdout
+            sys.stderr = old_stderr
+def extract_audio_from_video_url(video_url):
+    start_time = time.time()
+    temp_dir = tempfile.mkdtemp()
+    ydl_opts = {
+        'format': 'bestaudio[abr<=64]',
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'wav',
+            'preferredquality': '192',
+        }],
+        'outtmpl': os.path.join(temp_dir, 'audio.%(ext)s'),
+        'quiet': True,
+        'no_warnings': True,
+        'noplaylist': True,
+    }
+    with suppress_stdout_stderr():
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([video_url])
+    for file in os.listdir(temp_dir):
+        if file.endswith('.wav'):
+            end_time = time.time()
+            print(f"[⏱️] Audio extraction took {end_time - start_time:.2f} seconds.")
+            return os.path.join(temp_dir, file)
+    raise Exception("Failed to extract audio in WAV format")
+def smart_chunk_audio(waveform, sample_rate, duration_minutes):
+    """Smart chunking based on video duration"""
+    total_duration = waveform.size(1) / sample_rate
+    print(f"📏 Video duration: {total_duration/60:.1f} minutes")
+    if duration_minutes <= 1:
+        # Short videos: smaller chunks, process all
+        chunk_length_sec = 10
+        return chunk_audio_all(waveform, sample_rate, chunk_length_sec)
+    elif duration_minutes <= 5:
+        # Medium videos: normal chunks, skip some randomly
+        chunk_length_sec = 20
+        all_chunks = chunk_audio_all(waveform, sample_rate, chunk_length_sec)
+        # Keep 70% of chunks randomly
+        keep_ratio = 0.7
+        num_keep = max(1, int(len(all_chunks) * keep_ratio))
+        selected_chunks = random.sample(all_chunks, num_keep)
+        print(f"📦 Selected {len(selected_chunks)} out of {len(all_chunks)} chunks")
+        return selected_chunks
+    else:
+        # Long videos: strategic sampling from beginning, middle, end
+        chunk_length_sec = 25
+        return chunk_audio_strategic(waveform, sample_rate, chunk_length_sec)
+def chunk_audio_all(waveform, sample_rate, chunk_length_sec=20):
+    """Create all chunks from audio"""
+    chunk_samples = chunk_length_sec * sample_rate
+    total_samples = waveform.size(1)
+    chunks = []
+    for start in range(0, total_samples, chunk_samples):
+        end = min(start + chunk_samples, total_samples)
+        chunk = waveform[:, start:end]
+        if chunk.size(1) > sample_rate * 3:  # ignore very short chunks (3 sec minimum)
+            chunks.append(chunk)
+    return chunks
+def chunk_audio_strategic(waveform, sample_rate, chunk_length_sec=25):
+    """Strategic chunking for long videos - sample from beginning, middle, end"""
+    total_samples = waveform.size(1)
+    chunk_samples = chunk_length_sec * sample_rate
+    chunks = []
+    # Beginning: 2-3 chunks
+    beginning_chunks = min(3, total_samples // chunk_samples)
+    for i in range(beginning_chunks):
+        start = i * chunk_samples
+        end = min(start + chunk_samples, total_samples)
+        chunk = waveform[:, start:end]
+        if chunk.size(1) > sample_rate * 3:
+            chunks.append(chunk)
+    # Middle: 2-3 chunks
+    middle_start = total_samples // 2 - chunk_samples
+    middle_chunks = min(3, 2)
+    for i in range(middle_chunks):
+        start = middle_start + (i * chunk_samples)
+        end = min(start + chunk_samples, total_samples)
+        if start >= 0 and start < total_samples:
+            chunk = waveform[:, start:end]
+            if chunk.size(1) > sample_rate * 3:
+                chunks.append(chunk)
+    # End: 2-3 chunks
+    end_start = total_samples - (3 * chunk_samples)
+    end_chunks = min(3, 3)
+    for i in range(end_chunks):
+        start = max(0, end_start + (i * chunk_samples))
+        end = min(start + chunk_samples, total_samples)
+        if start < total_samples:
+            chunk = waveform[:, start:end]
+            if chunk.size(1) > sample_rate * 3:
+                chunks.append(chunk)
+    print(f"📦 Strategic sampling: {len(chunks)} chunks from long video")
+    return chunks
+def prepare_audio(video_url):
+    """Main function to extract and prepare audio chunks"""
+    try:
+        print(f"🎵 Extracting audio from video...")
+        audio_path = extract_audio_from_video_url(video_url)
+        print(f"✅ Audio extracted to: {audio_path}")
+        print(f"��� Loading and preparing audio...")
+        start = time.time()
+        waveform, sample_rate = torchaudio.load(audio_path)
+        # Resample to 16kHz if needed
+        if sample_rate != 16000:
+            waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
+            sample_rate = 16000
+        # Convert to mono if needed
+        if waveform.shape[0] > 1:
+            waveform = torch.mean(waveform, dim=0, keepdim=True)
+        end = time.time()
+        print(f"[⏱️] Audio preparation took {end - start:.2f} seconds.")
+        # # Apply simple VAD
+        # print(f"🎤 Applying Voice Activity Detection...")
+        # start = time.time()
+        # waveform = simple_vad(waveform, sample_rate)
+        # end = time.time()
+        # print(f"[⏱️] VAD took {end - start:.2f} seconds.")
+        # Calculate duration and apply smart chunking
+        duration_minutes = waveform.size(1) / sample_rate / 60
+        print(f"🧩 Smart chunking based on duration...")
+        start = time.time()
+        chunks = smart_chunk_audio(waveform, sample_rate, duration_minutes)
+        end = time.time()
+        print(f"[⏱️] Smart chunking took {end - start:.2f} seconds. Total chunks: {len(chunks)}")
+        return {
+            "success": True,
+            "chunks": chunks,
+            "audio_path": audio_path,
+            "duration_minutes": duration_minutes,
+            "total_chunks": len(chunks)
+        }
+    except Exception as e:
+        print(f"❌ Error in audio preparation: {str(e)}")
+        return {
+            "success": False,
+            "error": str(e),
+            "chunks": [],
+            "audio_path": None
+        }

chunck_time.py ADDED Viewed

	@@ -0,0 +1,261 @@

+import os
+import sys
+import warnings
+import time
+import statistics
+from collections import Counter
+import torch
+import torchaudio
+from speechbrain.inference.classifiers import EncoderClassifier
+from audio_extractor import extract_audio_from_video_url
+warnings.filterwarnings("ignore")
+os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
+def create_chunks_by_size(waveform, sample_rate, chunk_length_sec):
+    """Create chunks of specific size"""
+    chunk_samples = chunk_length_sec * sample_rate
+    total_samples = waveform.size(1)
+    chunks = []
+    for start in range(0, total_samples, chunk_samples):
+        end = min(start + chunk_samples, total_samples)
+        chunk = waveform[:, start:end]
+        if chunk.size(1) > sample_rate * 2:  # minimum 2 seconds
+            chunks.append(chunk)
+    return chunks
+def predict_chunks_timing(chunks, classifier):
+    """Time the prediction process for chunks"""
+    if not chunks:
+        return [], 0.0
+    start_time = time.time()
+    # Pad to same length
+    max_len = max(chunk.size(1) for chunk in chunks)
+    padded_chunks = [torch.nn.functional.pad(chunk, (0, max_len - chunk.size(1))) for chunk in chunks]
+    batch = torch.cat(padded_chunks, dim=0).unsqueeze(1)
+    batch = batch.squeeze(1)
+    out_prob, score, index, text_lab = classifier.classify_batch(batch)
+    end_time = time.time()
+    prediction_time = end_time - start_time
+    results = []
+    for i in range(len(chunks)):
+        results.append({
+            "accent": text_lab[i],
+            "confidence": score[i].item(),
+        })
+    return results, prediction_time
+def analyze_chunk_size_performance(video_url, chunk_sizes=[10, 15, 20, 30, 60]):
+    """Analyze performance for different chunk sizes"""
+    print("🔍 Starting Chunk Size Performance Analysis")
+    print("=" * 60)
+    # Extract and prepare audio once
+    print("🎵 Extracting and preparing audio...")
+    audio_start = time.time()
+    audio_path = extract_audio_from_video_url(video_url)
+    waveform, sample_rate = torchaudio.load(audio_path)
+    if sample_rate != 16000:
+        waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)(waveform)
+        sample_rate = 16000
+    if waveform.shape[0] > 1:
+        waveform = torch.mean(waveform, dim=0, keepdim=True)
+    # # Apply VAD
+    # waveform = simple_vad(waveform, sample_rate)
+    audio_end = time.time()
+    audio_prep_time = audio_end - audio_start
+    duration_minutes = waveform.size(1) / sample_rate / 60
+    print(f"✅ Audio prepared in {audio_prep_time:.2f}s | Duration: {duration_minutes:.1f} minutes")
+    # Load model once
+    print("🧠 Loading model...")
+    model_start = time.time()
+    classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa")
+    model_end = time.time()
+    model_load_time = model_end - model_start
+    print(f"✅ Model loaded in {model_load_time:.2f}s")
+    print("\n" + "=" * 60)
+    print("📊 CHUNK SIZE ANALYSIS RESULTS")
+    print("=" * 60)
+    results = []
+    for chunk_size in chunk_sizes:
+        print(f"\n🧩 Testing {chunk_size}-second chunks...")
+        # Create chunks
+        chunk_start = time.time()
+        chunks = create_chunks_by_size(waveform, sample_rate, chunk_size)
+        chunk_end = time.time()
+        chunking_time = chunk_end - chunk_start
+        if not chunks:
+            print(f"❌ No valid chunks created for {chunk_size}s size")
+            continue
+        # Predict
+        predictions, prediction_time = predict_chunks_timing(chunks, classifier)
+        # Calculate statistics
+        confidences = [p["confidence"] for p in predictions]
+        accents = [p["accent"] for p in predictions]
+        avg_confidence = statistics.mean(confidences) if confidences else 0
+        max_confidence = max(confidences) if confidences else 0
+        min_confidence = min(confidences) if confidences else 0
+        std_confidence = statistics.stdev(confidences) if len(confidences) > 1 else 0
+        # Most common accent
+        accent_counts = Counter(accents)
+        most_common_accent = accent_counts.most_common(1)[0] if accent_counts else ("Unknown", 0)
+        # Calculate processing rates
+        total_processing_time = chunking_time + prediction_time
+        chunks_per_second = len(chunks) / total_processing_time if total_processing_time > 0 else 0
+        seconds_per_chunk = total_processing_time / len(chunks) if len(chunks) > 0 else 0
+        result = {
+            "chunk_size": chunk_size,
+            "num_chunks": len(chunks),
+            "chunking_time": chunking_time,
+            "prediction_time": prediction_time,
+            "total_time": total_processing_time,
+            "avg_confidence": avg_confidence,
+            "max_confidence": max_confidence,
+            "min_confidence": min_confidence,
+            "std_confidence": std_confidence,
+            "most_common_accent": most_common_accent[0],
+            "accent_occurrence": most_common_accent[1],
+            "chunks_per_second": chunks_per_second,
+            "seconds_per_chunk": seconds_per_chunk,
+            "confidence_consistency": 1 - (std_confidence / avg_confidence) if avg_confidence > 0 else 0
+        }
+        results.append(result)
+        # Print results for this chunk size
+        print(f"  📦 Chunks created: {len(chunks)}")
+        print(f"  ⏱️  Chunking time: {chunking_time:.3f}s")
+        print(f"  🧠 Prediction time: {prediction_time:.3f}s")
+        print(f"  🔄 Total processing: {total_processing_time:.3f}s")
+        print(f"  ⚡ Processing rate: {chunks_per_second:.1f} chunks/sec")
+        print(f"  📈 Avg confidence: {avg_confidence:.3f}")
+        print(f"  🎯 Most common: {most_common_accent[0]} ({most_common_accent[1]} times)")
+        print(f"  📊 Confidence range: {min_confidence:.3f} - {max_confidence:.3f}")
+    # Print summary comparison
+    print("\n" + "=" * 80)
+    print("📈 PERFORMANCE COMPARISON SUMMARY")
+    print("=" * 80)
+    if results:
+        print(f"{'Size':<6} {'Chunks':<8} {'Total Time':<12} {'Rate':<12} {'Avg Conf':<10} {'Consistency':<12} {'Winner'}")
+        print("-" * 80)
+        for r in results:
+            consistency = f"{r['confidence_consistency']:.2f}"
+            print(f"{r['chunk_size']:<6} {r['num_chunks']:<8} {r['total_time']:<12.3f} {r['chunks_per_second']:<12.1f} {r['avg_confidence']:<10.3f} {consistency:<12} {r['most_common_accent']}")
+    # Recommendations
+    print("\n" + "=" * 60)
+    print("🏆 RECOMMENDATIONS")
+    print("=" * 60)
+    if results:
+        # Find best for speed
+        fastest = min(results, key=lambda x: x['total_time'])
+        print(f"⚡ Fastest processing: {fastest['chunk_size']}s chunks ({fastest['total_time']:.2f}s total)")
+        # Find best for accuracy (highest average confidence)
+        most_accurate = max(results, key=lambda x: x['avg_confidence'])
+        print(f"🎯 Highest accuracy: {most_accurate['chunk_size']}s chunks ({most_accurate['avg_confidence']:.3f} avg confidence)")
+        # Find most consistent
+        most_consistent = max(results, key=lambda x: x['confidence_consistency'])
+        print(f"📊 Most consistent: {most_consistent['chunk_size']}s chunks ({most_consistent['confidence_consistency']:.3f} consistency)")
+        # Find best balance (speed + accuracy)
+        for r in results:
+            r['balance_score'] = (r['chunks_per_second'] * 0.4) + (r['avg_confidence'] * 100 * 0.6)
+        best_balance = max(results, key=lambda x: x['balance_score'])
+        print(f"⚖️  Best balance: {best_balance['chunk_size']}s chunks (score: {best_balance['balance_score']:.1f})")
+    return results
+def quick_test_multiple_videos(video_urls, chunk_sizes=[10, 15, 20, 30]):
+    """Quick test on multiple videos to get average performance"""
+    print("🔍 MULTI-VIDEO CHUNK SIZE ANALYSIS")
+    print("=" * 60)
+    all_results = {size: [] for size in chunk_sizes}
+    for i, video_url in enumerate(video_urls, 1):
+        print(f"\n📹 Testing Video {i}/{len(video_urls)}")
+        try:
+            video_results = analyze_chunk_size_performance(video_url, chunk_sizes)
+            for result in video_results:
+                all_results[result['chunk_size']].append(result)
+        except Exception as e:
+            print(f"❌ Error with video {i}: {str(e)}")
+            continue
+    # Calculate averages
+    print("\n" + "=" * 60)
+    print("📊 AVERAGE PERFORMANCE ACROSS ALL VIDEOS")
+    print("=" * 60)
+    avg_results = []
+    for chunk_size in chunk_sizes:
+        if all_results[chunk_size]:
+            results = all_results[chunk_size]
+            avg_result = {
+                'chunk_size': chunk_size,
+                'avg_total_time': statistics.mean([r['total_time'] for r in results]),
+                'avg_chunks_per_sec': statistics.mean([r['chunks_per_second'] for r in results]),
+                'avg_confidence': statistics.mean([r['avg_confidence'] for r in results]),
+                'avg_consistency': statistics.mean([r['confidence_consistency'] for r in results]),
+                'sample_count': len(results)
+            }
+            avg_results.append(avg_result)
+    if avg_results:
+        print(f"{'Size':<6} {'Samples':<8} {'Avg Time':<10} {'Avg Rate':<10} {'Avg Conf':<10} {'Consistency'}")
+        print("-" * 60)
+        for r in avg_results:
+            print(f"{r['chunk_size']:<6} {r['sample_count']:<8} {r['avg_total_time']:<10.2f} {r['avg_chunks_per_sec']:<10.1f} {r['avg_confidence']:<10.3f} {r['avg_consistency']:.3f}")
+    return avg_results
+if __name__ == "__main__":
+    # Test with single video
+    video_url = "https://www.youtube.com/watch?v=-JTq1BFBwmo&list=PLDN4rrl48XKpZkf03iYFl-O29szjTrs_O&index=2"
+    print("🚀 Starting Single Video Analysis...")
+    results = analyze_chunk_size_performance(video_url)
+    # Uncomment below to test multiple videos
+    # print("\n" + "="*60)
+    # print("🚀 Starting Multi-Video Analysis...")
+    # video_urls = [
+    #     "https://www.youtube.com/watch?v=VIDEO1",
+    #     "https://www.youtube.com/watch?v=VIDEO2",
+    #     # Add more video URLs here
+    # ]
+    # multi_results = quick_test_multiple_videos(video_urls)

dialect_predector.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import os
+import sys
+import warnings
+import time
+from collections import Counter
+import torch
+from speechbrain.inference.classifiers import EncoderClassifier
+from audio_extractor import prepare_audio
+warnings.filterwarnings("ignore")
+os.environ['HF_HUB_DISABLE_SYMLINKS_WARNING'] = '1'
+def predict_accent_from_chunks(chunks, classifier, early_stopping_threshold=3, confidence_threshold=0.6):
+    """Predict accents for chunks iteratively with early stopping based on confident predictions only."""
+    print(f"\n📦 Running prediction for up to {len(chunks)} chunks with early stopping (threshold={early_stopping_threshold}, confidence>{confidence_threshold*100}%)...")
+    iterative_start_time = time.time()
+    results = []
+    consecutive_dialect_count = 0
+    last_dialect = None
+    processed_chunks_count_in_func = 0 # Renamed to avoid clash if this func is nested
+    for i, chunk_tensor in enumerate(chunks):
+        processed_chunks_count_in_func += 1
+        current_chunk_for_batch = chunk_tensor
+        if current_chunk_for_batch.ndim == 1:
+            current_chunk_for_batch = current_chunk_for_batch.unsqueeze(0) # Shape: [1, T]
+        elif not (current_chunk_for_batch.ndim == 2 and current_chunk_for_batch.shape[0] == 1):
+            print(f"Warning: Chunk {i+1} has unexpected shape {current_chunk_for_batch.shape}. Required [T] or [1,T]. Skipping.")
+            continue
+        # Perform prediction for the single chunk
+        out_prob, score, index, text_lab = classifier.classify_batch(current_chunk_for_batch)
+        accent = text_lab[0] # Batch of 1
+        confidence = score[0].item()
+        class_idx = index[0].item()
+        # Determine if prediction is confident enough
+        is_confident = confidence > confidence_threshold
+        confidence_indicator = "✓" if is_confident else "✗"
+        print(f"Chunk {i+1}/{len(chunks)}: {accent} | Confidence: {confidence:.2f} {confidence_indicator}")
+        current_result = {
+            "chunk_index_original": i + 1,
+            "accent": accent,
+            "confidence": confidence,
+            "class_index": class_idx,
+            "is_confident": is_confident
+        }
+        results.append(current_result)
+        # Only consider confident predictions for early stopping
+        if is_confident:
+            if accent == last_dialect:
+                consecutive_dialect_count += 1
+            else:
+                last_dialect = accent
+                consecutive_dialect_count = 1
+            if consecutive_dialect_count >= early_stopping_threshold:
+                print(f"\n⚠️ Early stopping triggered after processing chunk {i+1}: "
+                      f"{early_stopping_threshold} consecutive confident chunks predicted '{last_dialect}'.")
+                break
+        else:
+            # Reset consecutive count if prediction is not confident
+            consecutive_dialect_count = 0
+            last_dialect = None
+    iterative_end_time = time.time()
+    num_actually_processed = len(results)
+    confident_predictions = sum(1 for r in results if r["is_confident"])
+    print(f"[⏱️] Prediction for {num_actually_processed} out of {len(chunks)} available chunks took {iterative_end_time - iterative_start_time:.2f} seconds.")
+    print(f"[📊] {confident_predictions}/{num_actually_processed} predictions were confident (>{confidence_threshold*100}%).")
+    # Add sequential "chunk" number for processed chunks
+    for idx, res_item in enumerate(results):
+        res_item["chunk"] = idx + 1
+    return results
+def get_final_verdict(chunk_results, confidence_threshold=0.6):
+    """Determine final accent based on confident predictions only (confidence > threshold)."""
+    if not chunk_results:
+        return None, 0.0, {}, {}
+    # Filter for confident predictions only
+    confident_results = [r for r in chunk_results if r["confidence"] > confidence_threshold]
+    if not confident_results:
+        print(f"\n⚠️ No confident predictions found (confidence > {confidence_threshold*100}%). Using all predictions as fallback.")
+        confident_results = chunk_results
+    accent_confidence_sum = {}
+    accent_counts = Counter()
+    all_accent_counts = Counter()  # Track all predictions for reporting
+    # Calculate stats for confident predictions
+    for result in confident_results:
+        accent = result["accent"]
+        confidence = result["confidence"]
+        accent_counts[accent] += 1
+        accent_confidence_sum[accent] = accent_confidence_sum.get(accent, 0.0) + confidence
+    # Calculate stats for all predictions (for reporting)
+    for result in chunk_results:
+        all_accent_counts[result["accent"]] += 1
+    final_accent = max(accent_confidence_sum, key=accent_confidence_sum.get)
+    final_confidence = accent_confidence_sum[final_accent] / accent_counts[final_accent]
+    print(f"\n📊 Accent Analysis (based on {len(confident_results)} confident predictions out of {len(chunk_results)} total):")
+    print(f"    Confident predictions (confidence > {confidence_threshold*100}%):")
+    for accent in accent_counts:
+        count = accent_counts[accent]
+        total_conf = accent_confidence_sum[accent]
+        avg_conf = total_conf / count
+        print(f"      {accent}: {count} chunks, total confidence: {total_conf:.2f}, avg confidence: {avg_conf:.2f}")
+    print(f"    All predictions (including low confidence):")
+    for accent in all_accent_counts:
+        count = all_accent_counts[accent]
+        print(f"      {accent}: {count} chunks")
+    return final_accent, final_confidence, accent_counts, all_accent_counts
+def analyze_video_accent(video_url, confidence_threshold=0.6):
+    """Main function to analyze video accent with confidence threshold"""
+    total_start = time.time()
+    try:
+        audio_result = prepare_audio(video_url)
+        if not audio_result["success"]:
+            return {
+                "success": False, "error": audio_result["error"], "predicted_accent": "Error",
+                "confidence_score": 0.0, "confidence_percentage": "0.0%", "video_url": video_url,
+                "processing_time": time.time() - total_start
+            }
+        chunks = audio_result["chunks"]
+        available_chunks_count = len(chunks)
+        if not chunks:
+            return {
+                "success": False, "error": "No valid audio chunks found", "predicted_accent": "Error",
+                "confidence_score": 0.0, "confidence_percentage": "0.0%", "video_url": video_url,
+                "available_chunks_count": 0, "processed_chunks_count": 0,
+                "processing_time": time.time() - total_start
+            }
+        print(f"🧠 Loading accent classification model...")
+        load_model_start = time.time()
+        classifier = EncoderClassifier.from_hparams(source="Jzuluaga/accent-id-commonaccent_ecapa")
+        load_model_end = time.time()
+        print(f"[⏱️] Model loading took {load_model_end - load_model_start:.2f} seconds.")
+        chunk_results = predict_accent_from_chunks(chunks, classifier, confidence_threshold=confidence_threshold)
+        processed_chunks_count = len(chunk_results)
+        final_accent, final_confidence, confident_accent_counts, all_accent_counts = get_final_verdict(chunk_results, confidence_threshold)
+        if final_accent is None:
+             return {
+                "success": False, "error": "Could not determine accent (no chunks processed or no consensus)",
+                "predicted_accent": "Unknown", "confidence_score": 0.0, "confidence_percentage": "0.0%",
+                "video_url": video_url, "available_chunks_count": available_chunks_count,
+                "processed_chunks_count": processed_chunks_count, "chunk_results": chunk_results,
+                "processing_time": time.time() - total_start
+            }
+        # Calculate statistics
+        confident_chunks = [r for r in chunk_results if r["confidence"] > confidence_threshold]
+        confident_chunks_count = len(confident_chunks)
+        avg_conf_processed_chunks = 0.0
+        if processed_chunks_count > 0:
+            avg_conf_processed_chunks = sum(r["confidence"] for r in chunk_results) / processed_chunks_count
+        avg_conf_confident_chunks = 0.0
+        if confident_chunks_count > 0:
+            avg_conf_confident_chunks = sum(r["confidence"] for r in confident_chunks) / confident_chunks_count
+        total_end = time.time()
+        total_processing_time = total_end - total_start
+        print(f"\n[⏱️] 🔁 Total pipeline time: {total_processing_time:.2f} seconds.")
+        winning_chunks_for_final_accent = confident_accent_counts.get(final_accent, 0)
+        early_stopped = processed_chunks_count < available_chunks_count
+        print(f"\n✅ Final Verdict: {final_accent}")
+        print(f"📈 Final Confidence (for '{final_accent}'): {final_confidence:.2f}")
+        print(f"🎯 Based on {winning_chunks_for_final_accent} confident occurrences out of {confident_chunks_count} confident chunks.")
+        print(f"   ({confident_chunks_count}/{processed_chunks_count} chunks were confident, threshold: {confidence_threshold*100}%)")
+        if early_stopped:
+            print(f"   (Early stopping occurred. {available_chunks_count} chunks were available in total).")
+        print(f"📊 Average Confidence Across All Processed Chunks: {avg_conf_processed_chunks:.2f}")
+        print(f"📊 Average Confidence Across Confident Chunks: {avg_conf_confident_chunks:.2f}")
+        return {
+            "success": True,
+            "predicted_accent": final_accent,
+            "confidence_score": final_confidence,
+            "confidence_percentage": f"{final_confidence * 100:.1f}%",
+            "confidence_threshold": confidence_threshold,
+            "average_confidence_processed_chunks": avg_conf_processed_chunks,
+            "average_confidence_confident_chunks": avg_conf_confident_chunks,
+            "confident_accent_counts": dict(confident_accent_counts),
+            "all_accent_counts": dict(all_accent_counts),
+            "processed_chunks_count": processed_chunks_count,
+            "confident_chunks_count": confident_chunks_count,
+            "available_chunks_count": available_chunks_count,
+            "winning_chunks_for_final_accent": winning_chunks_for_final_accent,
+            "audio_file": audio_result.get("audio_path"),
+            "video_url": video_url,
+            "duration_minutes": audio_result.get("duration_minutes"),
+            "chunk_results": chunk_results,
+            "processing_time": total_processing_time,
+            "early_stopped": early_stopped
+        }
+    except Exception as e:
+        total_end = time.time()
+        processing_time_before_error = total_end - total_start
+        print(f"❌ Error: {str(e)}")
+        print(f"[⏱️] Total time before error: {processing_time_before_error:.2f} seconds.")
+        return {
+            "success": False, "error": str(e), "predicted_accent": "Error",
+            "confidence_score": 0.0, "confidence_percentage": "0.0%", "video_url": video_url,
+            "processing_time": processing_time_before_error
+        }
+if __name__ == "__main__":
+    video_url = "https://www.youtube.com/shorts/sWUvKMC2450"
+    result = analyze_video_accent(video_url, confidence_threshold=0.6)
+    if result["success"]:
+        print(f"\n🎤 Final Predicted Accent: {result['predicted_accent']}")
+        print(f"🔢 Confidence Score: {result['confidence_score']:.4f}")
+        print(f"📊 Confidence Percentage: {result['confidence_percentage']}")
+        print(f"🎯 Based on {result['confident_chunks_count']} confident chunks out of {result['processed_chunks_count']} total")
+    else:
+        print(f"❌ Error: {result['error']}")
+        print(f"⏱️ Processing Time: {result.get('processing_time', 0):.2f} seconds")

pretrained_models/accent-id-commonaccent_ecapa/hyperparams.yaml ADDED Viewed

	@@ -0,0 +1 @@


1	+ C:/Users/Amr/.cache/huggingface/hub/models--Jzuluaga--accent-id-commonaccent_ecapa/snapshots/14bebf44b7e7a34204d0acc2c897935945fb5c51/hyperparams.yaml

requirements.txt ADDED Viewed

	@@ -0,0 +1,13 @@

+yt_dlp==2025.5.22
+speechbrain==1.0.3
+torch==2.7.0+cpu
+torchaudio==2.7.0+cpu
+requests==2.32.3
+ipywidgets==8.1.5
+IPython==7.34.0
+ffmpeg-python==0.2.0
+validators==0.35.0
+streamlit==1.45.1
+plotly==6.1.2
+pandas==2.2.3
+numpy==2.2.6