Spaces:
Running
Running
File size: 3,013 Bytes
3ec5aa6 c3ef24f 3ec5aa6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
from gradio_client import Client
import json
from concurrent.futures import ThreadPoolExecutor, as_completed
def fetch_category_ids(cat_ids_api_key):
"""Fetch category IDs using the category API."""
if not cat_ids_api_key:
raise ValueError("API access key not found. Please check your environment variables.")
cat_ids_api_client = Client("raannakasturi/ReXploreIDFetchingAPI")
try:
result = cat_ids_api_client.predict(
user_access_key=cat_ids_api_key,
api_name="/fetch_paper_ids"
)
cat_ids = json.loads(result)
if cat_ids['status'] == 'success':
return cat_ids['data']
else:
return None
except Exception as e:
print(f"Exception while fetching category IDs: {str(e)}")
return None
def fetch_single_paper_data(paper_id):
paper_data_api_client = Client("raannakasturi/ReXplorePaperDataFetcher")
try:
result = paper_data_api_client.predict(
id=paper_id,
api_name="/fetch_paper_data"
)
paper_data = json.loads(result)
if paper_data['status'] == 'success':
return paper_id, paper_data['data']
else:
print(f"Failed to fetch data for paper ID {paper_id}: {paper_data.get('message', 'Unknown error')}")
return paper_id, None
except Exception as e:
print(f"Exception while fetching data for paper ID {paper_id}: {str(e)}")
return paper_id, None
def fetch_paper_data_concurrently(paper_ids, max_threads=12):
paper_id_data = {}
with ThreadPoolExecutor(max_workers=max_threads) as executor:
future_to_paper_id = {executor.submit(fetch_single_paper_data, paper_id): paper_id for paper_id in paper_ids}
for future in as_completed(future_to_paper_id):
paper_id = future_to_paper_id[future]
try:
paper_id, data = future.result()
if data:
paper_id_data[paper_id] = data
except Exception as e:
print(f"Error fetching data for paper ID {paper_id}: {str(e)}")
return paper_id_data
def fetch_paper_data_with_category(cat_ids_api_key):
data = {}
try:
cat_ids = fetch_category_ids(cat_ids_api_key)
if cat_ids:
for category, ids in cat_ids.items():
print(f"Fetching data for category: {category}")
try:
paper_data = fetch_paper_data_concurrently(ids['ids'])
if paper_data:
data[category] = paper_data
except Exception as e:
print(f"Error fetching data for category {category}: {str(e)}")
continue
return json.dumps(data, indent=4, ensure_ascii=False)
except Exception as e:
print(f"Exception while fetching paper data by category: {str(e)}")
return None
|