from gradio_client import Client import json from concurrent.futures import ThreadPoolExecutor, as_completed def fetch_category_ids(api_key): if not api_key: raise ValueError("API access key not found. Please check your environment variables.") cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI") try: result = cat_ids_api_client.predict( user_access_key=api_key, api_name="/fetch_paper_ids" ) cat_ids = json.loads(result) if cat_ids.get('status') == 'success': return cat_ids.get('data') else: print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}") return None except Exception as e: print(f"Exception while fetching category IDs: {e}") return None def fetch_single_paper_data(paper_id): paper_data_api_client = Client("raannakasturi/ReXplorePaperDataFetcher") try: result = paper_data_api_client.predict( id=paper_id, api_name="/fetch_paper_data" ) paper_data = json.loads(result) if paper_data.get('status') == 'success': return paper_id, paper_data.get('data') else: print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}") return paper_id, None except Exception as e: print(f"Exception while fetching data for paper ID {paper_id}: {e}") return paper_id, None def fetch_paper_data_concurrently(paper_ids, max_threads=12): paper_id_data = {} with ThreadPoolExecutor(max_workers=max_threads) as executor: future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids} for future in as_completed(future_to_paper): paper_id = future_to_paper[future] try: pid, data = future.result() if data: paper_id_data[pid] = data except Exception as e: print(f"Error fetching data for paper ID {paper_id}: {e}") return paper_id_data def fetch_paper_data_with_category(api_key): all_data = {} try: cat_ids = fetch_category_ids(api_key) if cat_ids: for category, info in cat_ids.items(): print(f"Fetching data for category: {category}") try: paper_ids = info.get('ids', []) if paper_ids: paper_data = fetch_paper_data_concurrently(paper_ids) if paper_data: all_data[category] = paper_data else: print(f"No paper IDs found for category: {category}") except Exception as e: print(f"Error fetching data for category {category}: {e}") continue return json.dumps(all_data, indent=4, ensure_ascii=False) except Exception as e: print(f"Exception while fetching paper data by category: {e}") return None