ScientryBackend

Running

App Files Files Community

ScientryBackend / fetch_data.py

raannakasturi

Refactor summarization and email sending logic; improve error handling and environment variable checks

f50b29d 5 months ago

raw

history blame

3.2 kB

	from gradio_client import Client
	import json
	from concurrent.futures import ThreadPoolExecutor, as_completed

	def fetch_category_ids(api_key):
	if not api_key:
	raise ValueError("API access key not found. Please check your environment variables.")
	cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
	try:
	result = cat_ids_api_client.predict(
	user_access_key=api_key,
	api_name="/fetch_paper_ids"
	)
	cat_ids = json.loads(result)
	if cat_ids.get('status') == 'success':
	return cat_ids.get('data')
	else:
	print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}")
	return None
	except Exception as e:
	print(f"Exception while fetching category IDs: {e}")
	return None

	def fetch_single_paper_data(paper_id):
	paper_data_api_client = Client("raannakasturi/ReXplorePaperDataFetcher")
	try:
	result = paper_data_api_client.predict(
	id=paper_id,
	api_name="/fetch_paper_data"
	)
	paper_data = json.loads(result)
	if paper_data.get('status') == 'success':
	return paper_id, paper_data.get('data')
	else:
	print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
	return paper_id, None
	except Exception as e:
	print(f"Exception while fetching data for paper ID {paper_id}: {e}")
	return paper_id, None

	def fetch_paper_data_concurrently(paper_ids, max_threads=12):
	paper_id_data = {}
	with ThreadPoolExecutor(max_workers=max_threads) as executor:
	future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids}
	for future in as_completed(future_to_paper):
	paper_id = future_to_paper[future]
	try:
	pid, data = future.result()
	if data:
	paper_id_data[pid] = data
	except Exception as e:
	print(f"Error fetching data for paper ID {paper_id}: {e}")
	return paper_id_data

	def fetch_paper_data_with_category(api_key):
	all_data = {}
	try:
	cat_ids = fetch_category_ids(api_key)
	if cat_ids:
	for category, info in cat_ids.items():
	print(f"Fetching data for category: {category}")
	try:
	paper_ids = info.get('ids', [])
	if paper_ids:
	paper_data = fetch_paper_data_concurrently(paper_ids)
	if paper_data:
	all_data[category] = paper_data
	else:
	print(f"No paper IDs found for category: {category}")
	except Exception as e:
	print(f"Error fetching data for category {category}: {e}")
	continue
	return json.dumps(all_data, indent=4, ensure_ascii=False)
	except Exception as e:
	print(f"Exception while fetching paper data by category: {e}")
	return None