Commit
·
5a2457c
1
Parent(s):
77e6cdd
Add count of category and PMC IDs to data output in extract_arxiv_data and extract_pmc_data functions
Browse files
arvix.py
CHANGED
@@ -92,7 +92,7 @@ def extract_arxiv_data():
|
|
92 |
break
|
93 |
while len(category_ids) < 4:
|
94 |
category_ids.add(random.choice(list(used_ids)))
|
95 |
-
data[category] = {"ids": list(category_ids)}
|
96 |
if not utils.upload_datafile('arxiv.txt'):
|
97 |
raise Exception("Failed to upload datafile")
|
98 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
|
|
92 |
break
|
93 |
while len(category_ids) < 4:
|
94 |
category_ids.add(random.choice(list(used_ids)))
|
95 |
+
data[category] = {"ids": list(category_ids), "count": len(category_ids)}
|
96 |
if not utils.upload_datafile('arxiv.txt'):
|
97 |
raise Exception("Failed to upload datafile")
|
98 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
pmc.py
CHANGED
@@ -79,7 +79,7 @@ def extract_pmc_data():
|
|
79 |
if not utils.check_data_in_file(pmcid, 'pmc.txt'):
|
80 |
utils.write_data_to_file(pmcid, 'pmc.txt')
|
81 |
pmc_ids.append(pmcid)
|
82 |
-
pmc_data[topic] = {"ids": pmc_ids}
|
83 |
if not utils.upload_datafile('pmc.txt'):
|
84 |
raise Exception("Failed to upload datafile")
|
85 |
return json.dumps(pmc_data, indent=4, ensure_ascii=False)
|
|
|
79 |
if not utils.check_data_in_file(pmcid, 'pmc.txt'):
|
80 |
utils.write_data_to_file(pmcid, 'pmc.txt')
|
81 |
pmc_ids.append(pmcid)
|
82 |
+
pmc_data[topic] = {"ids": pmc_ids, "count": len(pmc_ids)}
|
83 |
if not utils.upload_datafile('pmc.txt'):
|
84 |
raise Exception("Failed to upload datafile")
|
85 |
return json.dumps(pmc_data, indent=4, ensure_ascii=False)
|