Spaces:
Runtime error
Runtime error
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import pandas as pd | |
import scipy.stats as ss | |
import seaborn as sns | |
from scipy.stats import chi2_contingency | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
import os | |
def cramers_V(var1,var2) : | |
crosstab =np.array(pd.crosstab(var1,var2, rownames=None, colnames=None)) # Cross table building | |
stat = chi2_contingency(crosstab)[0] # Keeping of the test statistic of the Chi2 test | |
obs = np.sum(crosstab) # Number of observations | |
mini = min(crosstab.shape)-1 # Take the minimum value between the columns and the rows of the cross table | |
return (stat/(obs*mini)) | |
def predict(file_obj): | |
df = pd.read_csv(file_obj.name) | |
cat_df = df.select_dtypes(include=['object']) | |
rows= [] | |
for var1 in cat_df: | |
col = [] | |
for var2 in cat_df : | |
cramers =cramers_V(cat_df[var1], cat_df[var2]) # Cramer's V test | |
col.append(round(cramers,2)) # Keeping of the rounded value of the Cramer's V | |
rows.append(col) | |
cramers_results = np.array(rows) | |
df_final= pd.DataFrame(cramers_results, columns = cat_df.columns, index =cat_df.columns) | |
# return df_final | |
# data = np.random.randint(low=1, | |
# high=1000, | |
# size=(10, 10)) | |
annot = True | |
# plotting the heatmap | |
plt.close() | |
hm = sns.heatmap(data=df_final, | |
annot=annot) | |
# return plt.show() | |
# return plt.figure() | |
# plt.savefig('box.png') | |
return plt.gcf() | |
# plt.clf() | |
# return plt.plot() | |
iface = gr.Interface(predict,inputs="file",outputs="plot",examples=["StudentsPerformance.csv"],theme="dark-peach",title='Correlation Tool for Categorical features',description="This tool identifies and explains the correlation between categorical features.") | |
iface.launch(inline=False) |