reproduce / plotting /paper_plots.py
attilasimko's picture
What did I do before then?
77f290b
raw
history blame
2.88 kB
import plotly.express as px
import numpy as np
paper_dump = pd.read_csv('data/dump.csv', sep="\t")
# Calculate total number of URLs per year and venue
custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"]
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')
# Calculate the number of URLs with errors per year and venue
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')
# Merge the DataFrames to calculate the error rate
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']
# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
merged_df,
x='year',
y='total_titles',
color='venue',
barmode='group',
title=f'Number of papers per venue',
labels={'error_rate': 'Success Rate', 'year': 'Year'},
category_orders={'venue': custom_order}
)
fig.update_xaxes(range=[2018, 2024])
fig.show()
import plotly.express as px
import numpy as np
# Calculate total number of URLs per year and venue
total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles')
# Calculate the number of URLs with errors per year and venue
total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls')
# Merge the DataFrames to calculate the error rate
merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left')
merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles']
# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
merged_df,
x='year',
y='total_titles',
color='venue',
barmode='group',
title=f'Number of papers per venue',
labels={'error_rate': 'Success Rate', 'year': 'Year'},
category_orders={'venue': custom_order}
)
fig.update_xaxes(range=[2018, 2024])
fig.show()
# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
merged_df,
x='year',
y='total_urls',
color='venue',
barmode='group',
title=f'Number of papers per venue',
labels={'error_rate': 'Success Rate', 'year': 'Year'},
category_orders={'venue': custom_order}
)
fig.update_xaxes(range=[2018, 2024])
fig.show()
# Plot the error rates using Plotly, with year on x-axis and color by venue
fig = px.bar(
merged_df,
x='year',
y='repo_rate',
color='venue',
barmode='group',
title=f'Number of repositories per venue',
labels={'error_rate': 'Success Rate', 'year': 'Year'},
category_orders={'venue': custom_order}
)
fig.update_xaxes(range=[2018, 2024])
fig.update_yaxes(range=[0, 1])
fig.show()