import plotly.express as px import os import sys ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) sys.path.append(ROOT_DIR) import numpy as np import pandas as pd import plotly.express as px import numpy as np from config.constants import VENUE_ORDER import json from core.paper import _parse_url_field paper_dump = pd.read_csv('data/urls.csv', sep="\t") paper_dump['urls_manual'] = paper_dump['urls_manual'].apply( lambda x: _parse_url_field(json.loads(x)) ) paper_dump['urls_auto'] = paper_dump['urls_auto'].apply( lambda x: _parse_url_field(json.loads(x)) ) paper_dump['url'] = paper_dump.apply( lambda row: next((u for u in [*row['urls_manual'], *row['urls_auto']] if "github.com" in u), None), axis=1 ) # Calculate total number of URLs per year and venue total_titles_per_venue = paper_dump.groupby(['year', 'venue']).size().reset_index(name='total_titles') # Calculate the number of URLs with errors per year and venue total_url_per_venue = paper_dump[ paper_dump["url"].notna() & (paper_dump["url"] != "") ].groupby(['year', 'venue']).size().reset_index(name='total_urls') # Merge the DataFrames to calculate the error rate merged_df = pd.merge(total_titles_per_venue, total_url_per_venue, on=['year', 'venue'], how='left') merged_df['repo_rate'] = merged_df['total_urls'] / merged_df['total_titles'] # Plot the error rates using Plotly, with year on x-axis and color by venue fig = px.bar( merged_df, x='year', y='total_titles', color='venue', barmode='group', title=f'Number of papers per venue', labels={'error_rate': 'Success Rate', 'year': 'Year'}, category_orders={'venue': VENUE_ORDER} ) fig.update_xaxes(range=[2018, 2025]) fig.show() # Plot the error rates using Plotly, with year on x-axis and color by venue fig = px.bar( merged_df, x='year', y='repo_rate', color='venue', barmode='group', title=f'Number of repositories per venue', labels={'error_rate': 'Success Rate', 'year': 'Year'}, category_orders={'venue': VENUE_ORDER} ) fig.update_xaxes(range=[2018, 2025]) fig.update_yaxes(range=[0, 1]) fig.show()