Spaces:
Running
Running
File size: 3,195 Bytes
3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 f50b29d 3ec5aa6 c3ef24f f50b29d c3ef24f f50b29d c3ef24f f50b29d 3ec5aa6 f50b29d 3ec5aa6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
from gradio_client import Client
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
def fetch_category_ids(api_key):
if not api_key:
raise ValueError("API access key not found. Please check your environment variables.")
cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
try:
result = cat_ids_api_client.predict(
user_access_key=api_key,
api_name="/fetch_paper_ids"
)
cat_ids = json.loads(result)
if cat_ids.get('status') == 'success':
return cat_ids.get('data')
else:
print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}")
return None
except Exception as e:
print(f"Exception while fetching category IDs: {e}")
return None
def fetch_single_paper_data(paper_id):
paper_data_api_client = Client("raannakasturi/ReXplorePaperDataFetcher")
try:
result = paper_data_api_client.predict(
id=paper_id,
api_name="/fetch_paper_data"
)
paper_data = json.loads(result)
if paper_data.get('status') == 'success':
return paper_id, paper_data.get('data')
else:
print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
return paper_id, None
except Exception as e:
print(f"Exception while fetching data for paper ID {paper_id}: {e}")
return paper_id, None
def fetch_paper_data_concurrently(paper_ids, max_threads=12):
paper_id_data = {}
with ThreadPoolExecutor(max_workers=max_threads) as executor:
future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids}
for future in as_completed(future_to_paper):
paper_id = future_to_paper[future]
try:
pid, data = future.result()
if data:
paper_id_data[pid] = data
except Exception as e:
print(f"Error fetching data for paper ID {paper_id}: {e}")
return paper_id_data
def fetch_paper_data_with_category(api_key):
all_data = {}
try:
cat_ids = fetch_category_ids(api_key)
if cat_ids:
for category, info in cat_ids.items():
print(f"Fetching data for category: {category}")
try:
paper_ids = info.get('ids', [])
if paper_ids:
paper_data = fetch_paper_data_concurrently(paper_ids)
if paper_data:
all_data[category] = paper_data
else:
print(f"No paper IDs found for category: {category}")
except Exception as e:
print(f"Error fetching data for category {category}: {e}")
continue
return json.dumps(all_data, indent=4, ensure_ascii=False)
except Exception as e:
print(f"Exception while fetching paper data by category: {e}")
return None
|