ReXploreIDFetchingAPI / fetch_data.py
raannakasturi's picture
Refactor data extraction functions: rename extract_pmc_data to extract_sd_data, add extract_phys_data, and update file handling for new data sources
a4b6d0b
from arvix import extract_arxiv_data
from pmc import extract_sd_data
from phys import extract_phys_data
import json
import dotenv
import os
from concurrent.futures import ThreadPoolExecutor
dotenv.load_dotenv()
ACCESS_KEY = os.getenv("ACCESS_KEY")
def fetch_arxiv_data():
return json.loads(extract_arxiv_data())
def fetch_sd_data():
return json.loads(extract_sd_data())
def fetch_phys_data():
return json.loads(extract_phys_data())
def fetch_data(user_access_key):
if user_access_key != ACCESS_KEY:
papers_data = {"status": "Invalid access key"}
else:
papers_data = {}
try:
papers_data['status'] = 'success'
papers_data['data'] = {}
with ThreadPoolExecutor() as executor:
pmc_future = executor.submit(fetch_sd_data)
arxiv_future = executor.submit(fetch_arxiv_data)
phys_future = executor.submit(fetch_phys_data)
pmc_data = pmc_future.result()
arxiv_data = arxiv_future.result()
phys_data = phys_future.result()
for topic, topic_data in pmc_data.items():
if topic_data['count'] == 0:
continue
else:
papers_data['data'][topic] = {}
papers_data['data'][topic]['ids'] = topic_data['ids']
for topic, topic_data in arxiv_data.items():
if topic_data['count'] == 0:
continue
else:
papers_data['data'][topic] = {}
papers_data['data'][topic]['ids'] = topic_data['ids']
for topic, topic_data in phys_data.items():
if topic_data['count'] == 0:
continue
else:
papers_data['data'][topic] = {}
papers_data['data'][topic]['ids'] = topic_data['ids']
except Exception as e:
print(str(e))
papers_data['status'] = 'error'
data = json.dumps(papers_data, indent=4, ensure_ascii=False)
return data
if __name__ == '__main__':
data = fetch_data(ACCESS_KEY)
with open('data.json', 'w') as f:
f.write(data)