File size: 3,195 Bytes
3ec5aa6
 
 
 
f50b29d
 
3ec5aa6
 
 
 
f50b29d
3ec5aa6
 
 
f50b29d
 
3ec5aa6
f50b29d
3ec5aa6
 
f50b29d
3ec5aa6
 
 
 
 
 
 
 
 
 
f50b29d
 
3ec5aa6
 
 
 
f50b29d
3ec5aa6
 
 
 
 
f50b29d
 
 
3ec5aa6
f50b29d
3ec5aa6
f50b29d
3ec5aa6
f50b29d
3ec5aa6
 
f50b29d
 
3ec5aa6
f50b29d
3ec5aa6
f50b29d
3ec5aa6
c3ef24f
f50b29d
 
 
 
 
 
 
c3ef24f
f50b29d
c3ef24f
f50b29d
3ec5aa6
f50b29d
3ec5aa6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
from gradio_client import Client
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

def fetch_category_ids(api_key):
    if not api_key:
        raise ValueError("API access key not found. Please check your environment variables.")
    cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
    try:
        result = cat_ids_api_client.predict(
            user_access_key=api_key,
            api_name="/fetch_paper_ids"
        )
        cat_ids = json.loads(result)
        if cat_ids.get('status') == 'success':
            return cat_ids.get('data')
        else:
            print(f"Failed to fetch category IDs: {cat_ids.get('message', 'No message provided')}")
            return None
    except Exception as e:
        print(f"Exception while fetching category IDs: {e}")
        return None

def fetch_single_paper_data(paper_id):
    paper_data_api_client = Client("raannakasturi/ReXplorePaperDataFetcher")
    try:
        result = paper_data_api_client.predict(
            id=paper_id,
            api_name="/fetch_paper_data"
        )
        paper_data = json.loads(result)
        if paper_data.get('status') == 'success':
            return paper_id, paper_data.get('data')
        else:
            print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
            return paper_id, None
    except Exception as e:
        print(f"Exception while fetching data for paper ID {paper_id}: {e}")
        return paper_id, None

def fetch_paper_data_concurrently(paper_ids, max_threads=12):
    paper_id_data = {}
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        future_to_paper = {executor.submit(fetch_single_paper_data, pid): pid for pid in paper_ids}
        for future in as_completed(future_to_paper):
            paper_id = future_to_paper[future]
            try:
                pid, data = future.result()
                if data:
                    paper_id_data[pid] = data
            except Exception as e:
                print(f"Error fetching data for paper ID {paper_id}: {e}")
    return paper_id_data

def fetch_paper_data_with_category(api_key):
    all_data = {}
    try:
        cat_ids = fetch_category_ids(api_key)
        if cat_ids:
            for category, info in cat_ids.items():
                print(f"Fetching data for category: {category}")
                try:
                    paper_ids = info.get('ids', [])
                    if paper_ids:
                        paper_data = fetch_paper_data_concurrently(paper_ids)
                        if paper_data:
                            all_data[category] = paper_data
                    else:
                        print(f"No paper IDs found for category: {category}")
                except Exception as e:
                    print(f"Error fetching data for category {category}: {e}")
                    continue
        return json.dumps(all_data, indent=4, ensure_ascii=False)
    except Exception as e:
        print(f"Exception while fetching paper data by category: {e}")
        return None