import pandas as pd import matplotlib.pyplot as plt import seaborn as sns import numpy as np def load_and_preprocess_data(file_path): # Read Excel file, skipping the first 2 rows df = pd.read_excel(file_path, skiprows=2) # Extract data for each configuration using column letters milvus_llama = df.iloc[:, 2:8].copy() # Columns C to H milvus_llama.columns = ['RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC', 'Retrieval_Time', 'Context_Relevance', 'Context_Utilization'] weaviate_mistral = df.iloc[:, 9:16].copy() # Columns J to P weaviate_mistral.columns = ['Retrieval_Time', 'Context_Rel', 'Util', 'Adherence', 'RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC'] milvus_mistral = df.iloc[:, 17:24].copy() # Columns R to X milvus_mistral.columns = ['Retrieval_Time', 'Context_Rel', 'Util', 'Adherence', 'RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC'] # Replace 'na' with NaN and convert to float milvus_llama = milvus_llama.replace('na', np.nan).astype(float) weaviate_mistral = weaviate_mistral.replace('na', np.nan).astype(float) milvus_mistral = milvus_mistral.replace('na', np.nan).astype(float) return milvus_llama, weaviate_mistral, milvus_mistral def create_performance_comparison(milvus_llama, weaviate_mistral, milvus_mistral): plt.style.use('default') # Using default style instead of seaborn fig, axes = plt.subplots(2, 2, figsize=(15, 12)) # Retrieval Time Comparison data = { 'Milvus + LLaMA': milvus_llama['Retrieval_Time'].dropna(), 'Weaviate + Mistral': weaviate_mistral['Retrieval_Time'].dropna(), 'Milvus + Mistral': milvus_mistral['Retrieval_Time'].dropna() } sns.boxplot(data=pd.DataFrame(data), ax=axes[0,0]) axes[0,0].set_title('VectorDB Retrieval Time Comparison') axes[0,0].set_ylabel('Time (seconds)') axes[0,0].tick_params(axis='x', rotation=45) # RMSE Context Relevance Comparison data = { 'Milvus + LLaMA': milvus_llama['RMSE_Context_Rel'].dropna(), 'Weaviate + Mistral': weaviate_mistral['RMSE_Context_Rel'].dropna(), 'Milvus + Mistral': milvus_mistral['RMSE_Context_Rel'].dropna() } sns.boxplot(data=pd.DataFrame(data), ax=axes[0,1]) axes[0,1].set_title('RMSE Context Relevance') axes[0,1].tick_params(axis='x', rotation=45) # RMSE Context Utilization Comparison data = { 'Milvus + LLaMA': milvus_llama['RMSE_Context_Util'].dropna(), 'Weaviate + Mistral': weaviate_mistral['RMSE_Context_Util'].dropna(), 'Milvus + Mistral': milvus_mistral['RMSE_Context_Util'].dropna() } sns.boxplot(data=pd.DataFrame(data), ax=axes[1,0]) axes[1,0].set_title('RMSE Context Utilization') axes[1,0].tick_params(axis='x', rotation=45) # AUROC Comparison data = { 'Milvus + LLaMA': milvus_llama['AUCROC'].dropna(), 'Weaviate + Mistral': weaviate_mistral['AUCROC'].dropna(), 'Milvus + Mistral': milvus_mistral['AUCROC'].dropna() } sns.boxplot(data=pd.DataFrame(data), ax=axes[1,1]) axes[1,1].set_title('AUROC Scores') axes[1,1].tick_params(axis='x', rotation=45) plt.tight_layout() plt.savefig('report/visualizations/performance_comparison.png', dpi=300, bbox_inches='tight') plt.close() def create_correlation_plots(milvus_llama, weaviate_mistral, milvus_mistral): # Create separate plots for each model # 1. Milvus + LLaMA plt.figure(figsize=(15, 10)) # Relevance comparison plt.subplot(2, 1, 1) plt.plot(range(len(milvus_llama)), milvus_llama['RMSE_Context_Rel'], 'o--', color='red', label='RMSE Context Relevance', linewidth=2, alpha=0.7) plt.plot(range(len(milvus_llama)), milvus_llama['Context_Relevance'], 'o-', color='darkred', label='Context Relevance', linewidth=2, alpha=0.7) plt.title('Milvus + LLaMA: Context Relevance vs RMSE') plt.xlabel('Data Points') plt.ylabel('Score') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() # Utilization comparison plt.subplot(2, 1, 2) plt.plot(range(len(milvus_llama)), milvus_llama['RMSE_Context_Util'], 'o--', color='blue', label='RMSE Context Utilization', linewidth=2, alpha=0.7) plt.plot(range(len(milvus_llama)), milvus_llama['Context_Utilization'], 'o-', color='darkblue', label='Context Utilization', linewidth=2, alpha=0.7) plt.title('Milvus + LLaMA: Context Utilization vs RMSE') plt.xlabel('Data Points') plt.ylabel('Score') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() plt.tight_layout() plt.savefig('report/visualizations/milvus_llama_plots.png', bbox_inches='tight', dpi=300) plt.close() # 2. Weaviate + Mistral plt.figure(figsize=(15, 10)) # Relevance comparison plt.subplot(2, 1, 1) plt.plot(range(len(weaviate_mistral)), weaviate_mistral['RMSE_Context_Rel'], 'o--', color='red', label='RMSE Context Relevance', linewidth=2, alpha=0.7) plt.plot(range(len(weaviate_mistral)), weaviate_mistral['Context_Rel'], 'o-', color='darkred', label='Context Relevance', linewidth=2, alpha=0.7) plt.title('Weaviate + Mistral: Context Relevance vs RMSE') plt.xlabel('Data Points') plt.ylabel('Score') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() # Utilization comparison plt.subplot(2, 1, 2) plt.plot(range(len(weaviate_mistral)), weaviate_mistral['RMSE_Context_Util'], 'o--', color='blue', label='RMSE Context Utilization', linewidth=2, alpha=0.7) plt.plot(range(len(weaviate_mistral)), weaviate_mistral['Util'], 'o-', color='darkblue', label='Context Utilization', linewidth=2, alpha=0.7) plt.title('Weaviate + Mistral: Context Utilization vs RMSE') plt.xlabel('Data Points') plt.ylabel('Score') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() plt.tight_layout() plt.savefig('report/visualizations/weaviate_mistral_plots.png', bbox_inches='tight', dpi=300) plt.close() # 3. Milvus + Mistral plt.figure(figsize=(15, 10)) # Relevance comparison plt.subplot(2, 1, 1) plt.plot(range(len(milvus_mistral)), milvus_mistral['RMSE_Context_Rel'], 'o--', color='red', label='RMSE Context Relevance', linewidth=2, alpha=0.7) plt.plot(range(len(milvus_mistral)), milvus_mistral['Context_Rel'], 'o-', color='darkred', label='Context Relevance', linewidth=2, alpha=0.7) plt.title('Milvus + Mistral: Context Relevance vs RMSE') plt.xlabel('Data Points') plt.ylabel('Score') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() # Utilization comparison plt.subplot(2, 1, 2) plt.plot(range(len(milvus_mistral)), milvus_mistral['RMSE_Context_Util'], 'o--', color='blue', label='RMSE Context Utilization', linewidth=2, alpha=0.7) plt.plot(range(len(milvus_mistral)), milvus_mistral['Util'], 'o-', color='darkblue', label='Context Utilization', linewidth=2, alpha=0.7) plt.title('Milvus + Mistral: Context Utilization vs RMSE') plt.xlabel('Data Points') plt.ylabel('Score') plt.grid(True, linestyle='--', alpha=0.7) plt.legend() plt.tight_layout() plt.savefig('report/visualizations/milvus_mistral_plots.png', bbox_inches='tight', dpi=300) plt.close() # Print statistical analysis for each model print("\nStatistical Analysis:") models = { 'Milvus + LLaMA': (milvus_llama['RMSE_Context_Rel'], milvus_llama['Context_Relevance'], milvus_llama['RMSE_Context_Util'], milvus_llama['Context_Utilization']), 'Weaviate + Mistral': (weaviate_mistral['RMSE_Context_Rel'], weaviate_mistral['Context_Rel'], weaviate_mistral['RMSE_Context_Util'], weaviate_mistral['Util']), 'Milvus + Mistral': (milvus_mistral['RMSE_Context_Rel'], milvus_mistral['Context_Rel'], milvus_mistral['RMSE_Context_Util'], milvus_mistral['Util']) } for model, (rmse_rel, rel, rmse_util, util) in models.items(): print(f"\n{model}:") print(f"Context Relevance - Mean: {rel.mean():.3f}, Std: {rel.std():.3f}") print(f"RMSE Context Rel - Mean: {rmse_rel.mean():.3f}, Std: {rmse_rel.std():.3f}") print(f"Context Utilization - Mean: {util.mean():.3f}, Std: {util.std():.3f}") print(f"RMSE Context Util - Mean: {rmse_util.mean():.3f}, Std: {rmse_util.std():.3f}") def create_violin_plots(milvus_llama, weaviate_mistral, milvus_mistral): metrics = ['RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC'] plt.figure(figsize=(15, 5)) for i, metric in enumerate(metrics, 1): plt.subplot(1, 3, i) data = { 'Milvus + LLaMA': milvus_llama[metric].dropna(), 'Weaviate + Mistral': weaviate_mistral[metric].dropna(), 'Milvus + Mistral': milvus_mistral[metric].dropna() } sns.violinplot(data=pd.DataFrame(data)) plt.title(f'{metric} Distribution') plt.xticks(rotation=45) plt.tight_layout() plt.savefig('report/visualizations/metric_distributions.png', dpi=300, bbox_inches='tight') plt.close() def print_summary_statistics(milvus_llama, weaviate_mistral, milvus_mistral): print("\nSummary Statistics:") print("\nMilvus + LLaMA:") print(milvus_llama.describe().round(4)) print("\nWeaviate + Mistral:") print(weaviate_mistral.describe().round(4)) print("\nMilvus + Mistral:") print(milvus_mistral.describe().round(4)) def main(): # Create visualizations directory import os os.makedirs("report/visualizations", exist_ok=True) # Load data milvus_llama, weaviate_mistral, milvus_mistral = load_and_preprocess_data("report/Scores for RAGBenchCapstone.xlsx") # Create visualizations create_performance_comparison(milvus_llama, weaviate_mistral, milvus_mistral) create_correlation_plots(milvus_llama, weaviate_mistral, milvus_mistral) create_violin_plots(milvus_llama, weaviate_mistral, milvus_mistral) # Print statistics print_summary_statistics(milvus_llama, weaviate_mistral, milvus_mistral) if __name__ == "__main__": main()