Refactor imports and consolidate utility functions into utils.py; remove tools.py
2ce7bcb
import requests | |
import re | |
import os | |
import dotenv | |
from huggingface_hub import HfApi | |
dotenv.load_dotenv() | |
hf_token = os.getenv("HF_API_TOKEN") | |
access_key = os.getenv("ACCESS_KEY") | |
api = HfApi(token=hf_token) | |
def fetch_page(url): | |
HEADERS = { | |
'User-Agent': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/131.0.6778.135 Safari/537.36' | |
} | |
page_content = requests.get(url, headers=HEADERS).content | |
return page_content | |
def check_data_in_file(data, file): | |
with open(file, 'r') as f: | |
existing_data = f.read().splitlines() | |
if data in existing_data: | |
return True | |
else: | |
return False | |
def write_data_to_file(data, file): | |
with open(file, 'a') as f: | |
f.write(data + '\n') | |
return True | |
def verify_simple_title(title): | |
pattern = re.compile(r'^[a-zA-Z0-9\s\.\-\+\*/=\(\)\[\]\{\},:;"\'?\>\<\@\#\%\^\*\|\_\~\`]+$') | |
if pattern.match(title): | |
return True | |
else: | |
return False | |
def download_datafile(filename): | |
try: | |
api.hf_hub_download(repo_id="raannakasturi/ReXploreData", filename=filename, repo_type="dataset", local_dir='.', cache_dir='.', force_download=True) | |
return True | |
except Exception as e: | |
print(str(e)) | |
return False | |
def upload_datafile(filename): | |
try: | |
api.upload_file(path_or_fileobj=filename, path_in_repo=filename, repo_id="raannakasturi/ReXploreData", repo_type="dataset") | |
os.remove(filename) | |
return True | |
except Exception as e: | |
print(str(e)) | |
return False | |
def reset_datafiles(user_access_key): | |
if user_access_key != access_key: | |
return "Invalid access key" | |
else: | |
files = ['arxiv.txt', 'pmc.txt'] | |
try: | |
for filename in files: | |
try: | |
download_datafile(filename) | |
with open(filename, 'w') as f: | |
f.write('') | |
upload_datafile(filename) | |
except Exception as e: | |
print(str(e)) | |
continue | |
return True | |
except Exception as e: | |
print(str(e)) | |
return False | |