Commit
·
77e6cdd
1
Parent(s):
2ce7bcb
Add error handling for datafile download and upload in extract_arxiv_data function
Browse files
arvix.py
CHANGED
@@ -55,6 +55,8 @@ def extract_data(category):
|
|
55 |
return list(all_ids)
|
56 |
|
57 |
def extract_arxiv_data():
|
|
|
|
|
58 |
categories = {
|
59 |
"Astrophysics": ["astro-ph"],
|
60 |
"Condensed Matter": ["cond-mat"],
|
@@ -76,7 +78,6 @@ def extract_arxiv_data():
|
|
76 |
}
|
77 |
data = {}
|
78 |
used_ids = set()
|
79 |
-
|
80 |
for category, subcategories in categories.items():
|
81 |
category_ids = set()
|
82 |
for subcategory in subcategories:
|
@@ -89,13 +90,11 @@ def extract_arxiv_data():
|
|
89 |
break
|
90 |
if len(category_ids) == 4:
|
91 |
break
|
92 |
-
|
93 |
-
# Ensure exactly 4 IDs for each category
|
94 |
while len(category_ids) < 4:
|
95 |
category_ids.add(random.choice(list(used_ids)))
|
96 |
-
|
97 |
data[category] = {"ids": list(category_ids)}
|
98 |
-
|
|
|
99 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
100 |
|
101 |
if __name__ == '__main__':
|
|
|
55 |
return list(all_ids)
|
56 |
|
57 |
def extract_arxiv_data():
|
58 |
+
if not utils.download_datafile('arxiv.txt'):
|
59 |
+
raise Exception("Failed to download datafile")
|
60 |
categories = {
|
61 |
"Astrophysics": ["astro-ph"],
|
62 |
"Condensed Matter": ["cond-mat"],
|
|
|
78 |
}
|
79 |
data = {}
|
80 |
used_ids = set()
|
|
|
81 |
for category, subcategories in categories.items():
|
82 |
category_ids = set()
|
83 |
for subcategory in subcategories:
|
|
|
90 |
break
|
91 |
if len(category_ids) == 4:
|
92 |
break
|
|
|
|
|
93 |
while len(category_ids) < 4:
|
94 |
category_ids.add(random.choice(list(used_ids)))
|
|
|
95 |
data[category] = {"ids": list(category_ids)}
|
96 |
+
if not utils.upload_datafile('arxiv.txt'):
|
97 |
+
raise Exception("Failed to upload datafile")
|
98 |
return json.dumps(data, indent=4, ensure_ascii=False)
|
99 |
|
100 |
if __name__ == '__main__':
|