raannakasturi commited on
Commit
77e6cdd
·
1 Parent(s): 2ce7bcb

Add error handling for datafile download and upload in extract_arxiv_data function

Browse files
Files changed (1) hide show
  1. arvix.py +4 -5
arvix.py CHANGED
@@ -55,6 +55,8 @@ def extract_data(category):
55
  return list(all_ids)
56
 
57
  def extract_arxiv_data():
 
 
58
  categories = {
59
  "Astrophysics": ["astro-ph"],
60
  "Condensed Matter": ["cond-mat"],
@@ -76,7 +78,6 @@ def extract_arxiv_data():
76
  }
77
  data = {}
78
  used_ids = set()
79
-
80
  for category, subcategories in categories.items():
81
  category_ids = set()
82
  for subcategory in subcategories:
@@ -89,13 +90,11 @@ def extract_arxiv_data():
89
  break
90
  if len(category_ids) == 4:
91
  break
92
-
93
- # Ensure exactly 4 IDs for each category
94
  while len(category_ids) < 4:
95
  category_ids.add(random.choice(list(used_ids)))
96
-
97
  data[category] = {"ids": list(category_ids)}
98
-
 
99
  return json.dumps(data, indent=4, ensure_ascii=False)
100
 
101
  if __name__ == '__main__':
 
55
  return list(all_ids)
56
 
57
  def extract_arxiv_data():
58
+ if not utils.download_datafile('arxiv.txt'):
59
+ raise Exception("Failed to download datafile")
60
  categories = {
61
  "Astrophysics": ["astro-ph"],
62
  "Condensed Matter": ["cond-mat"],
 
78
  }
79
  data = {}
80
  used_ids = set()
 
81
  for category, subcategories in categories.items():
82
  category_ids = set()
83
  for subcategory in subcategories:
 
90
  break
91
  if len(category_ids) == 4:
92
  break
 
 
93
  while len(category_ids) < 4:
94
  category_ids.add(random.choice(list(used_ids)))
 
95
  data[category] = {"ids": list(category_ids)}
96
+ if not utils.upload_datafile('arxiv.txt'):
97
+ raise Exception("Failed to upload datafile")
98
  return json.dumps(data, indent=4, ensure_ascii=False)
99
 
100
  if __name__ == '__main__':