Spaces:
Sleeping
Sleeping
import plotly.express as px | |
import numpy as np | |
paper_dump = pd.read_csv('data/dump.csv', sep="\t") | |
# Calculate total number of URLs per year and venue | |
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"] | |
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles') | |
# Calculate the number of URLs with errors per year and venue | |
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls') | |
# Merge the DataFrames to calculate the error rate | |
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left') | |
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles'] | |
# Plot the error rates using Plotly, with year on x-axis and color by venue | |
fig = px.bar( | |
merged_df, | |
x='year', | |
y='total_titles', | |
color='venue', | |
barmode='group', | |
title=f'Number of papers per venue', | |
labels={'error_rate': 'Success Rate', 'year': 'Year'}, | |
category_orders={'venue': custom_order} | |
) | |
fig.update_xaxes(range=[2018, 2024]) | |
fig.show() | |
import plotly.express as px | |
import numpy as np | |
# Calculate total number of URLs per year and venue | |
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles') | |
# Calculate the number of URLs with errors per year and venue | |
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls') | |
# Merge the DataFrames to calculate the error rate | |
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left') | |
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles'] | |
# Plot the error rates using Plotly, with year on x-axis and color by venue | |
fig = px.bar( | |
merged_df, | |
x='year', | |
y='total_titles', | |
color='venue', | |
barmode='group', | |
title=f'Number of papers per venue', | |
labels={'error_rate': 'Success Rate', 'year': 'Year'}, | |
category_orders={'venue': custom_order} | |
) | |
fig.update_xaxes(range=[2018, 2024]) | |
fig.show() | |
# Plot the error rates using Plotly, with year on x-axis and color by venue | |
fig = px.bar( | |
merged_df, | |
x='year', | |
y='total_urls', | |
color='venue', | |
barmode='group', | |
title=f'Number of papers per venue', | |
labels={'error_rate': 'Success Rate', 'year': 'Year'}, | |
category_orders={'venue': custom_order} | |
) | |
fig.update_xaxes(range=[2018, 2024]) | |
fig.show() | |
# Plot the error rates using Plotly, with year on x-axis and color by venue | |
fig = px.bar( | |
merged_df, | |
x='year', | |
y='repo_rate', | |
color='venue', | |
barmode='group', | |
title=f'Number of repositories per venue', | |
labels={'error_rate': 'Success Rate', 'year': 'Year'}, | |
category_orders={'venue': custom_order} | |
) | |
fig.update_xaxes(range=[2018, 2024]) | |
fig.update_yaxes(range=[0, 1]) | |
fig.show() |