Spaces:

raannakasturi
/

ReXploreIDFetchingAPI

Running

App Files Files Community

ReXploreIDFetchingAPI / utils.py

raannakasturi

Refactor imports and consolidate utility functions into utils.py; remove tools.py

2ce7bcb about 1 month ago

raw

history blame contribute delete

2.32 kB

	import requests
	import re
	import os
	import dotenv
	from huggingface_hub import HfApi

	dotenv.load_dotenv()
	hf_token = os.getenv("HF_API_TOKEN")
	access_key = os.getenv("ACCESS_KEY")
	api = HfApi(token=hf_token)

	def fetch_page(url):
	HEADERS = {
	'User-Agent': 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/131.0.6778.135 Safari/537.36'
	}
	page_content = requests.get(url, headers=HEADERS).content
	return page_content

	def check_data_in_file(data, file):
	with open(file, 'r') as f:
	existing_data = f.read().splitlines()
	if data in existing_data:
	return True
	else:
	return False

	def write_data_to_file(data, file):
	with open(file, 'a') as f:
	f.write(data + '\n')
	return True

	def verify_simple_title(title):
	pattern = re.compile(r'^[a-zA-Z0-9\s\.\-\+\/=\[\]\{\},:;"\'?\>\<\@\#\%\^\\\|\_\~\`]+$')
	if pattern.match(title):
	return True
	else:
	return False

	def download_datafile(filename):
	try:
	api.hf_hub_download(repo_id="raannakasturi/ReXploreData", filename=filename, repo_type="dataset", local_dir='.', cache_dir='.', force_download=True)
	return True
	except Exception as e:
	print(str(e))
	return False

	def upload_datafile(filename):
	try:
	api.upload_file(path_or_fileobj=filename, path_in_repo=filename, repo_id="raannakasturi/ReXploreData", repo_type="dataset")
	os.remove(filename)
	return True
	except Exception as e:
	print(str(e))
	return False

	def reset_datafiles(user_access_key):
	if user_access_key != access_key:
	return "Invalid access key"
	else:
	files = ['arxiv.txt', 'pmc.txt']
	try:
	for filename in files:
	try:
	download_datafile(filename)
	with open(filename, 'w') as f:
	f.write('')
	upload_datafile(filename)
	except Exception as e:
	print(str(e))
	continue
	return True
	except Exception as e:
	print(str(e))
	return False