Refactor data extraction functions: rename extract_pmc_data to extract_sd_data, add extract_phys_data, and update file handling for new data sources
a4b6d0b
from arvix import extract_arxiv_data | |
from pmc import extract_sd_data | |
from phys import extract_phys_data | |
import json | |
import dotenv | |
import os | |
from concurrent.futures import ThreadPoolExecutor | |
dotenv.load_dotenv() | |
ACCESS_KEY = os.getenv("ACCESS_KEY") | |
def fetch_arxiv_data(): | |
return json.loads(extract_arxiv_data()) | |
def fetch_sd_data(): | |
return json.loads(extract_sd_data()) | |
def fetch_phys_data(): | |
return json.loads(extract_phys_data()) | |
def fetch_data(user_access_key): | |
if user_access_key != ACCESS_KEY: | |
papers_data = {"status": "Invalid access key"} | |
else: | |
papers_data = {} | |
try: | |
papers_data['status'] = 'success' | |
papers_data['data'] = {} | |
with ThreadPoolExecutor() as executor: | |
pmc_future = executor.submit(fetch_sd_data) | |
arxiv_future = executor.submit(fetch_arxiv_data) | |
phys_future = executor.submit(fetch_phys_data) | |
pmc_data = pmc_future.result() | |
arxiv_data = arxiv_future.result() | |
phys_data = phys_future.result() | |
for topic, topic_data in pmc_data.items(): | |
if topic_data['count'] == 0: | |
continue | |
else: | |
papers_data['data'][topic] = {} | |
papers_data['data'][topic]['ids'] = topic_data['ids'] | |
for topic, topic_data in arxiv_data.items(): | |
if topic_data['count'] == 0: | |
continue | |
else: | |
papers_data['data'][topic] = {} | |
papers_data['data'][topic]['ids'] = topic_data['ids'] | |
for topic, topic_data in phys_data.items(): | |
if topic_data['count'] == 0: | |
continue | |
else: | |
papers_data['data'][topic] = {} | |
papers_data['data'][topic]['ids'] = topic_data['ids'] | |
except Exception as e: | |
print(str(e)) | |
papers_data['status'] = 'error' | |
data = json.dumps(papers_data, indent=4, ensure_ascii=False) | |
return data | |
if __name__ == '__main__': | |
data = fetch_data(ACCESS_KEY) | |
with open('data.json', 'w') as f: | |
f.write(data) |