File size: 3,013 Bytes
3ec5aa6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3ef24f
 
 
 
 
 
 
3ec5aa6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
from gradio_client import Client
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

def fetch_category_ids(cat_ids_api_key):
    """Fetch category IDs using the category API."""
    if not cat_ids_api_key:
        raise ValueError("API access key not found. Please check your environment variables.")
    cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
    try:
        result = cat_ids_api_client.predict(
            user_access_key=cat_ids_api_key,
            api_name="/fetch_paper_ids"
        )
        cat_ids = json.loads(result)
        if cat_ids['status'] == 'success':
            return cat_ids['data']
        else:
            return None
    except Exception as e:
        print(f"Exception while fetching category IDs: {str(e)}")
        return None

def fetch_single_paper_data(paper_id):
    paper_data_api_client = Client("raannakasturi/ReXplorePaperDataFetcher")
    try:
        result = paper_data_api_client.predict(
            id=paper_id,
            api_name="/fetch_paper_data"
        )
        paper_data = json.loads(result)
        if paper_data['status'] == 'success':
            return paper_id, paper_data['data']
        else:
            print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
            return paper_id, None
    except Exception as e:
        print(f"Exception while fetching data for paper ID {paper_id}: {str(e)}")
        return paper_id, None

def fetch_paper_data_concurrently(paper_ids, max_threads=12):
    paper_id_data = {}
    with ThreadPoolExecutor(max_workers=max_threads) as executor:
        future_to_paper_id = {executor.submit(fetch_single_paper_data, paper_id): paper_id for paper_id in paper_ids}
        for future in as_completed(future_to_paper_id):
            paper_id = future_to_paper_id[future]
            try:
                paper_id, data = future.result()
                if data:
                    paper_id_data[paper_id] = data
            except Exception as e:
                print(f"Error fetching data for paper ID {paper_id}: {str(e)}")
    return paper_id_data

def fetch_paper_data_with_category(cat_ids_api_key):
    data = {}
    try:
        cat_ids = fetch_category_ids(cat_ids_api_key)
        if cat_ids:
            for category, ids in cat_ids.items():
                print(f"Fetching data for category: {category}")
                try:
                    paper_data = fetch_paper_data_concurrently(ids['ids'])
                    if paper_data:
                        data[category] = paper_data
                except Exception as e:
                    print(f"Error fetching data for category {category}: {str(e)}")
                    continue
        return json.dumps(data, indent=4, ensure_ascii=False)
    except Exception as e:
        print(f"Exception while fetching paper data by category: {str(e)}")
        return None