|
from typing import Annotated |
|
|
|
from fastapi import APIRouter, UploadFile, File, Body |
|
from fastapi.responses import JSONResponse |
|
import openai |
|
import io |
|
import os |
|
from pypdf import PdfReader |
|
from langchain.embeddings.openai import OpenAIEmbeddings |
|
from langchain.vectorstores import Qdrant |
|
from langchain.schema import Document |
|
from langchain.chains.question_answering import load_qa_chain |
|
from langchain.llms import OpenAI |
|
from db.vector_store import ToyVectorStore |
|
|
|
router = APIRouter() |
|
_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff", verbose=True) |
|
|
|
@router.post("/v1/docs") |
|
async def create_or_update(name: Annotated[str, Body()], file_name: Annotated[str, Body()], file: UploadFile = File(...)): |
|
"""Create or update an existing collection with information from the file |
|
`name` of the collection |
|
`file` to upload. |
|
`fileName` name of the file. |
|
""" |
|
|
|
_db = ToyVectorStore.get_instance().get_collection(name) |
|
if not _db: |
|
|
|
return JSONResponse(status_code=404, content={}) |
|
|
|
async for doc in generate_documents(file, file_name): |
|
print(doc) |
|
_db.add_documents([doc]) |
|
|
|
return JSONResponse(status_code=200, content={"name": name}) |
|
|
|
@router.get("/v1/doc/{name}/answer") |
|
async def answer(name: str, query: str): |
|
""" Answer a question from the collection |
|
`name` of the collection. |
|
`query` to be answered. |
|
""" |
|
_db = ToyVectorStore.get_instance().get_collection(name) |
|
print(query) |
|
docs = _db.similarity_search_with_score(query=query) |
|
print(docs) |
|
answer = _chain.run(input_documents=[tup[0] for tup in docs], question=query) |
|
return JSONResponse(status_code=200, content={"answer": answer, "file_score": [[f"{d[0].metadata['file']} : {d[0].metadata['page']}", d[1]] for d in docs]}) |
|
|
|
async def generate_documents(file: UploadFile, file_name: str): |
|
num=0 |
|
async for txt in convert_documents(file): |
|
num += 1 |
|
document = Document(page_content=txt,metadata={"file": file_name, "page": num}) |
|
yield document |
|
|
|
async def convert_documents(file: UploadFile): |
|
|
|
if file.content_type == 'application/pdf': |
|
content = await file.read() |
|
pdf_reader = PdfReader(io.BytesIO(content)) |
|
try: |
|
for page in pdf_reader.pages: |
|
yield page.extract_text() |
|
except Exception as e: |
|
print(f"Exception {e}") |
|
else: |
|
return |