Spaces:
Sleeping
Sleeping
import pandas as pd | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
import numpy as np | |
def load_and_preprocess_data(file_path): | |
# Read Excel file, skipping the first 2 rows | |
df = pd.read_excel(file_path, skiprows=2) | |
# Extract data for each configuration using column letters | |
milvus_llama = df.iloc[:, 2:8].copy() # Columns C to H | |
milvus_llama.columns = ['RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC', | |
'Retrieval_Time', 'Context_Relevance', 'Context_Utilization'] | |
weaviate_mistral = df.iloc[:, 9:16].copy() # Columns J to P | |
weaviate_mistral.columns = ['Retrieval_Time', 'Context_Rel', 'Util', | |
'Adherence', 'RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC'] | |
milvus_mistral = df.iloc[:, 17:24].copy() # Columns R to X | |
milvus_mistral.columns = ['Retrieval_Time', 'Context_Rel', 'Util', | |
'Adherence', 'RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC'] | |
# Replace 'na' with NaN and convert to float | |
milvus_llama = milvus_llama.replace('na', np.nan).astype(float) | |
weaviate_mistral = weaviate_mistral.replace('na', np.nan).astype(float) | |
milvus_mistral = milvus_mistral.replace('na', np.nan).astype(float) | |
return milvus_llama, weaviate_mistral, milvus_mistral | |
def create_performance_comparison(milvus_llama, weaviate_mistral, milvus_mistral): | |
plt.style.use('default') # Using default style instead of seaborn | |
fig, axes = plt.subplots(2, 2, figsize=(15, 12)) | |
# Retrieval Time Comparison | |
data = { | |
'Milvus + LLaMA': milvus_llama['Retrieval_Time'].dropna(), | |
'Weaviate + Mistral': weaviate_mistral['Retrieval_Time'].dropna(), | |
'Milvus + Mistral': milvus_mistral['Retrieval_Time'].dropna() | |
} | |
sns.boxplot(data=pd.DataFrame(data), ax=axes[0,0]) | |
axes[0,0].set_title('VectorDB Retrieval Time Comparison') | |
axes[0,0].set_ylabel('Time (seconds)') | |
axes[0,0].tick_params(axis='x', rotation=45) | |
# RMSE Context Relevance Comparison | |
data = { | |
'Milvus + LLaMA': milvus_llama['RMSE_Context_Rel'].dropna(), | |
'Weaviate + Mistral': weaviate_mistral['RMSE_Context_Rel'].dropna(), | |
'Milvus + Mistral': milvus_mistral['RMSE_Context_Rel'].dropna() | |
} | |
sns.boxplot(data=pd.DataFrame(data), ax=axes[0,1]) | |
axes[0,1].set_title('RMSE Context Relevance') | |
axes[0,1].tick_params(axis='x', rotation=45) | |
# RMSE Context Utilization Comparison | |
data = { | |
'Milvus + LLaMA': milvus_llama['RMSE_Context_Util'].dropna(), | |
'Weaviate + Mistral': weaviate_mistral['RMSE_Context_Util'].dropna(), | |
'Milvus + Mistral': milvus_mistral['RMSE_Context_Util'].dropna() | |
} | |
sns.boxplot(data=pd.DataFrame(data), ax=axes[1,0]) | |
axes[1,0].set_title('RMSE Context Utilization') | |
axes[1,0].tick_params(axis='x', rotation=45) | |
# AUROC Comparison | |
data = { | |
'Milvus + LLaMA': milvus_llama['AUCROC'].dropna(), | |
'Weaviate + Mistral': weaviate_mistral['AUCROC'].dropna(), | |
'Milvus + Mistral': milvus_mistral['AUCROC'].dropna() | |
} | |
sns.boxplot(data=pd.DataFrame(data), ax=axes[1,1]) | |
axes[1,1].set_title('AUROC Scores') | |
axes[1,1].tick_params(axis='x', rotation=45) | |
plt.tight_layout() | |
plt.savefig('report/visualizations/performance_comparison.png', dpi=300, bbox_inches='tight') | |
plt.close() | |
def create_correlation_plots(milvus_llama, weaviate_mistral, milvus_mistral): | |
# Create separate plots for each model | |
# 1. Milvus + LLaMA | |
plt.figure(figsize=(15, 10)) | |
# Relevance comparison | |
plt.subplot(2, 1, 1) | |
plt.plot(range(len(milvus_llama)), milvus_llama['RMSE_Context_Rel'], 'o--', | |
color='red', label='RMSE Context Relevance', linewidth=2, alpha=0.7) | |
plt.plot(range(len(milvus_llama)), milvus_llama['Context_Relevance'], 'o-', | |
color='darkred', label='Context Relevance', linewidth=2, alpha=0.7) | |
plt.title('Milvus + LLaMA: Context Relevance vs RMSE') | |
plt.xlabel('Data Points') | |
plt.ylabel('Score') | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.legend() | |
# Utilization comparison | |
plt.subplot(2, 1, 2) | |
plt.plot(range(len(milvus_llama)), milvus_llama['RMSE_Context_Util'], 'o--', | |
color='blue', label='RMSE Context Utilization', linewidth=2, alpha=0.7) | |
plt.plot(range(len(milvus_llama)), milvus_llama['Context_Utilization'], 'o-', | |
color='darkblue', label='Context Utilization', linewidth=2, alpha=0.7) | |
plt.title('Milvus + LLaMA: Context Utilization vs RMSE') | |
plt.xlabel('Data Points') | |
plt.ylabel('Score') | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig('report/visualizations/milvus_llama_plots.png', bbox_inches='tight', dpi=300) | |
plt.close() | |
# 2. Weaviate + Mistral | |
plt.figure(figsize=(15, 10)) | |
# Relevance comparison | |
plt.subplot(2, 1, 1) | |
plt.plot(range(len(weaviate_mistral)), weaviate_mistral['RMSE_Context_Rel'], 'o--', | |
color='red', label='RMSE Context Relevance', linewidth=2, alpha=0.7) | |
plt.plot(range(len(weaviate_mistral)), weaviate_mistral['Context_Rel'], 'o-', | |
color='darkred', label='Context Relevance', linewidth=2, alpha=0.7) | |
plt.title('Weaviate + Mistral: Context Relevance vs RMSE') | |
plt.xlabel('Data Points') | |
plt.ylabel('Score') | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.legend() | |
# Utilization comparison | |
plt.subplot(2, 1, 2) | |
plt.plot(range(len(weaviate_mistral)), weaviate_mistral['RMSE_Context_Util'], 'o--', | |
color='blue', label='RMSE Context Utilization', linewidth=2, alpha=0.7) | |
plt.plot(range(len(weaviate_mistral)), weaviate_mistral['Util'], 'o-', | |
color='darkblue', label='Context Utilization', linewidth=2, alpha=0.7) | |
plt.title('Weaviate + Mistral: Context Utilization vs RMSE') | |
plt.xlabel('Data Points') | |
plt.ylabel('Score') | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig('report/visualizations/weaviate_mistral_plots.png', bbox_inches='tight', dpi=300) | |
plt.close() | |
# 3. Milvus + Mistral | |
plt.figure(figsize=(15, 10)) | |
# Relevance comparison | |
plt.subplot(2, 1, 1) | |
plt.plot(range(len(milvus_mistral)), milvus_mistral['RMSE_Context_Rel'], 'o--', | |
color='red', label='RMSE Context Relevance', linewidth=2, alpha=0.7) | |
plt.plot(range(len(milvus_mistral)), milvus_mistral['Context_Rel'], 'o-', | |
color='darkred', label='Context Relevance', linewidth=2, alpha=0.7) | |
plt.title('Milvus + Mistral: Context Relevance vs RMSE') | |
plt.xlabel('Data Points') | |
plt.ylabel('Score') | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.legend() | |
# Utilization comparison | |
plt.subplot(2, 1, 2) | |
plt.plot(range(len(milvus_mistral)), milvus_mistral['RMSE_Context_Util'], 'o--', | |
color='blue', label='RMSE Context Utilization', linewidth=2, alpha=0.7) | |
plt.plot(range(len(milvus_mistral)), milvus_mistral['Util'], 'o-', | |
color='darkblue', label='Context Utilization', linewidth=2, alpha=0.7) | |
plt.title('Milvus + Mistral: Context Utilization vs RMSE') | |
plt.xlabel('Data Points') | |
plt.ylabel('Score') | |
plt.grid(True, linestyle='--', alpha=0.7) | |
plt.legend() | |
plt.tight_layout() | |
plt.savefig('report/visualizations/milvus_mistral_plots.png', bbox_inches='tight', dpi=300) | |
plt.close() | |
# Print statistical analysis for each model | |
print("\nStatistical Analysis:") | |
models = { | |
'Milvus + LLaMA': (milvus_llama['RMSE_Context_Rel'], milvus_llama['Context_Relevance'], | |
milvus_llama['RMSE_Context_Util'], milvus_llama['Context_Utilization']), | |
'Weaviate + Mistral': (weaviate_mistral['RMSE_Context_Rel'], weaviate_mistral['Context_Rel'], | |
weaviate_mistral['RMSE_Context_Util'], weaviate_mistral['Util']), | |
'Milvus + Mistral': (milvus_mistral['RMSE_Context_Rel'], milvus_mistral['Context_Rel'], | |
milvus_mistral['RMSE_Context_Util'], milvus_mistral['Util']) | |
} | |
for model, (rmse_rel, rel, rmse_util, util) in models.items(): | |
print(f"\n{model}:") | |
print(f"Context Relevance - Mean: {rel.mean():.3f}, Std: {rel.std():.3f}") | |
print(f"RMSE Context Rel - Mean: {rmse_rel.mean():.3f}, Std: {rmse_rel.std():.3f}") | |
print(f"Context Utilization - Mean: {util.mean():.3f}, Std: {util.std():.3f}") | |
print(f"RMSE Context Util - Mean: {rmse_util.mean():.3f}, Std: {rmse_util.std():.3f}") | |
def create_violin_plots(milvus_llama, weaviate_mistral, milvus_mistral): | |
metrics = ['RMSE_Context_Rel', 'RMSE_Context_Util', 'AUCROC'] | |
plt.figure(figsize=(15, 5)) | |
for i, metric in enumerate(metrics, 1): | |
plt.subplot(1, 3, i) | |
data = { | |
'Milvus + LLaMA': milvus_llama[metric].dropna(), | |
'Weaviate + Mistral': weaviate_mistral[metric].dropna(), | |
'Milvus + Mistral': milvus_mistral[metric].dropna() | |
} | |
sns.violinplot(data=pd.DataFrame(data)) | |
plt.title(f'{metric} Distribution') | |
plt.xticks(rotation=45) | |
plt.tight_layout() | |
plt.savefig('report/visualizations/metric_distributions.png', dpi=300, bbox_inches='tight') | |
plt.close() | |
def print_summary_statistics(milvus_llama, weaviate_mistral, milvus_mistral): | |
print("\nSummary Statistics:") | |
print("\nMilvus + LLaMA:") | |
print(milvus_llama.describe().round(4)) | |
print("\nWeaviate + Mistral:") | |
print(weaviate_mistral.describe().round(4)) | |
print("\nMilvus + Mistral:") | |
print(milvus_mistral.describe().round(4)) | |
def main(): | |
# Create visualizations directory | |
import os | |
os.makedirs("report/visualizations", exist_ok=True) | |
# Load data | |
milvus_llama, weaviate_mistral, milvus_mistral = load_and_preprocess_data("report/Scores for RAGBenchCapstone.xlsx") | |
# Create visualizations | |
create_performance_comparison(milvus_llama, weaviate_mistral, milvus_mistral) | |
create_correlation_plots(milvus_llama, weaviate_mistral, milvus_mistral) | |
create_violin_plots(milvus_llama, weaviate_mistral, milvus_mistral) | |
# Print statistics | |
print_summary_statistics(milvus_llama, weaviate_mistral, milvus_mistral) | |
if __name__ == "__main__": | |
main() |