Spaces:

janar
/

toypdf

Runtime error

File size: 2,521 Bytes

92ef095
 
 
51a7f02
 
 
 
 
 
 
 
 
 
f2932e2
51a7f02
 
6de2046
51a7f02
 
6de2046
f2932e2
cc3240a
 
 
 
 
f2932e2
92ef095
f2932e2
92ef095
f2932e2
6de2046
92ef095
51a7f02
 
92ef095
51a7f02
cc3240a
 
 
 
 
 
f2932e2
51a7f02
f2932e2
a0a3c87
c7e10e4
 
51a7f02
6de2046
51a7f02
 
 
6de2046
51a7f02

from typing import Annotated

from fastapi import APIRouter, UploadFile, File, Body
from fastapi.responses import JSONResponse
import openai
import io
import os
from pypdf import PdfReader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Qdrant
from langchain.schema import Document
from langchain.chains.question_answering import load_qa_chain
from langchain.llms import OpenAI
from db.vector_store import ToyVectorStore

router = APIRouter()
_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", verbose=True)

@router.post("/v1/docs")
async def create_or_update(name: Annotated[str, Body()], file_name: Annotated[str, Body()], file: UploadFile = File(...)):
    """Create or update an existing collection with information from the file 
    `name` of the collection
    `file` to upload.
    `fileName` name of the file.
    """

    _db = ToyVectorStore.get_instance().get_collection(name)
    if not _db:
        #todo. fix this to create a collection, may be.
        return JSONResponse(status_code=404, content={})

    async for doc in generate_documents(file, file_name):
        print(doc)
        _db.add_documents([doc])
    #todo return something sensible
    return JSONResponse(status_code=200, content={"name": name})

@router.get("/v1/doc/{name}/answer")
async def answer(name: str, query: str):
    """ Answer a question from the collection
    `name` of the collection.
    `query` to be answered.
    """
    _db = ToyVectorStore.get_instance().get_collection(name)
    print(query)
    docs = _db.similarity_search_with_score(query=query)
    print(docs)
    answer = _chain.run(input_documents=[tup[0] for tup in docs], question=query)
    return JSONResponse(status_code=200, content={"answer": answer, "file_score": [[f"{d[0].metadata['file']} : {d[0].metadata['page']}", d[1]] for d in docs]}) 

async def generate_documents(file: UploadFile, file_name: str):
    num=0
    async for txt in convert_documents(file):
        num += 1
        document = Document(page_content=txt,metadata={"file": file_name, "page": num})
        yield document
 
async def convert_documents(file: UploadFile):
    #parse pdf document
    if file.content_type == 'application/pdf':
        content = await file.read()
        pdf_reader = PdfReader(io.BytesIO(content))
        try:
            for page in pdf_reader.pages:
                yield page.extract_text()
        except Exception as e:
            print(f"Exception {e}")
    else:
        return