Spaces:
Running
Running
File size: 1,540 Bytes
25f01d1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import fitz
import os
import logging
import random
from models import Paper, PaperProcessor
def extract_text_from_pdf(filename):
with fitz.open(filename) as pdf_document:
text = ""
for page in pdf_document:
text += page.get_text()
return text.encode('latin-1', 'replace').decode('latin-1')
def process_paper(pdf_file, paper_dir, prompt_dir, api_keys):
logging.info(f"Processing file type in process_paper: {type(pdf_file)}")
logging.debug(f"Starting to process paper: {pdf_file}")
os.makedirs(paper_dir, exist_ok=True)
if isinstance(pdf_file, str):
pdf_path = pdf_file
elif hasattr(pdf_file, 'name') and hasattr(pdf_file, 'read'):
pdf_path = os.path.join(paper_dir, pdf_file.name)
with open(pdf_path, "wb") as f:
f.write(pdf_file.read())
else:
logging.error(
"Received object is neither a path nor a file-like object.")
return [], []
extracted_text = extract_text_from_pdf(pdf_path)
paper = Paper(pdf_file.name if hasattr(pdf_file, 'name')
else os.path.basename(pdf_path), extracted_text)
models = ['gpt', 'claude', 'gemini', 'commandr']
selected_models = random.sample(models, 2)
reviews = []
for model in selected_models:
processor = PaperProcessor(prompt_dir, model, **api_keys)
review_text = processor.process_paper(paper)
reviews.append(review_text)
logging.debug(f"Reviews generated: {reviews}")
return reviews, selected_models
|