import plotly.express as px import numpy as np paper_dump = pd.read_csv('data/dump.csv', sep="\t") # Calculate total number of URLs per year and venue custom_order = ["MICCAI", "MIDL", "Nature", "arXiv"] total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles') # Calculate the number of URLs with errors per year and venue total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls') # Merge the DataFrames to calculate the error rate merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left') merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles'] # Plot the error rates using Plotly, with year on x-axis and color by venue fig = px.bar( merged_df, x='year', y='total_titles', color='venue', barmode='group', title=f'Number of papers per venue', labels={'error_rate': 'Success Rate', 'year': 'Year'}, category_orders={'venue': custom_order} ) fig.update_xaxes(range=[2018, 2024]) fig.show() import plotly.express as px import numpy as np # Calculate total number of URLs per year and venue total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles') # Calculate the number of URLs with errors per year and venue total_url_per_venue = paper_dump[paper_dump["url"] != ""].groupby(['year', 'venue']).size().reset_index(name='total_urls') # Merge the DataFrames to calculate the error rate merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left') merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles'] # Plot the error rates using Plotly, with year on x-axis and color by venue fig = px.bar( merged_df, x='year', y='total_titles', color='venue', barmode='group', title=f'Number of papers per venue', labels={'error_rate': 'Success Rate', 'year': 'Year'}, category_orders={'venue': custom_order} ) fig.update_xaxes(range=[2018, 2024]) fig.show() # Plot the error rates using Plotly, with year on x-axis and color by venue fig = px.bar( merged_df, x='year', y='total_urls', color='venue', barmode='group', title=f'Number of papers per venue', labels={'error_rate': 'Success Rate', 'year': 'Year'}, category_orders={'venue': custom_order} ) fig.update_xaxes(range=[2018, 2024]) fig.show() # Plot the error rates using Plotly, with year on x-axis and color by venue fig = px.bar( merged_df, x='year', y='repo_rate', color='venue', barmode='group', title=f'Number of repositories per venue', labels={'error_rate': 'Success Rate', 'year': 'Year'}, category_orders={'venue': custom_order} ) fig.update_xaxes(range=[2018, 2024]) fig.update_yaxes(range=[0, 1]) fig.show()