Spaces:
Runtime error
Runtime error
gamingflexer
commited on
Commit
·
d012361
1
Parent(s):
8b67ee1
Add utility functions for generating UUID and checking ID existence in JSON file
Browse files- src/utils.py +32 -1
src/utils.py
CHANGED
@@ -1,4 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
def compare_paper_ids(data, paper_ids):
|
2 |
existing_dois = {item['doi_no'] for item in data}
|
3 |
missing_paper_ids = [paper_id for paper_id in paper_ids if paper_id not in existing_dois]
|
4 |
-
return missing_paper_ids
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import json
|
2 |
+
import uuid
|
3 |
+
|
4 |
+
def generate_uuid():
|
5 |
+
return str(uuid.uuid4().g
|
6 |
+
|
7 |
+
def check_id_extis_in_json(file_id):
|
8 |
+
with open('file_ids.json', 'r') as f:
|
9 |
+
file_ids = json.load(f)
|
10 |
+
if file_id in file_ids:
|
11 |
+
return True
|
12 |
+
else:
|
13 |
+
return False
|
14 |
+
|
15 |
def compare_paper_ids(data, paper_ids):
|
16 |
existing_dois = {item['doi_no'] for item in data}
|
17 |
missing_paper_ids = [paper_id for paper_id in paper_ids if paper_id not in existing_dois]
|
18 |
+
return missing_paper_ids
|
19 |
+
|
20 |
+
def extract_json_from_text(text):
|
21 |
+
text = str(text)
|
22 |
+
# print("text",text)
|
23 |
+
try:
|
24 |
+
# Find the JSON part within the text
|
25 |
+
start_index = text.find('{')
|
26 |
+
end_index = text.rfind('}') + 1
|
27 |
+
json_part = text[start_index:end_index]
|
28 |
+
json_part = json.loads(json_part.lower())
|
29 |
+
print("json",type(json_part))
|
30 |
+
print(json_part)
|
31 |
+
return json_part.get('data', [])
|
32 |
+
|
33 |
+
except Exception as e:
|
34 |
+
print(f"\033[31m Exception occurred while loading JSON: {str(e)} [0m")
|
35 |
+
return text
|