File size: 2,501 Bytes
6c57304
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import langchain.document_loaders

from langchain.document_loaders import DirectoryLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.chroma import Chroma
import os
import shutil

from langchain.vectorstores.chroma import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.prompts import ChatPromptTemplate


def get_chunks(file_path):
    
    loader = PyPDFLoader(file_path)
    
    documents = loader.load()

    text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=300,
    chunk_overlap=100,
    length_function=len,
    add_start_index=True,
    )

    chunks = text_splitter.split_documents(documents)

    return chunks

def get_vectordb(chunks):

    db = Chroma.from_documents(chunks, embedding_function=OpenAIEmbeddings())

    # if os.path.exists(CHROMA_PATH):
    #     db = Chroma(persist_directory=CHROMA_PATH, embedding_function=OpenAIEmbeddings())

    # else:
    #     db = Chroma.from_documents(
    #         chunks, OpenAIEmbeddings(), persist_directory=CHROMA_PATH
    #     )

    #     db.persist()

    #     print(f"Saved {len(chunks)} chunks to {CHROMA_PATH}.")
    
    return db


def gen_sample(text, decision, db):

    PROMPT_TEMPLATE = """
    Answer the question based only on the following context:

    {context}

    ---

    Answer the question based on the above context: {question}
    """

    query_text = f"""

    Act as the author of a Choose Your Own Adventure Book. This book is special as it is based on existing material.
    Now, as with any choose your own adventure book, there are inifinite paths based on the choices a user makes.
    Given some relevant text and the decision taken with respect to the relevant text, generate the next part of the story.
    It should be within 6-8 sentences and be coherent as it were actually part of the story.

    Relevant: {text}

    Decision: {decision}

    """

    results = db.similarity_search_with_relevance_scores(query_text, k=5)

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
    prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
    prompt = prompt_template.format(context=context_text, question=query_text)

    model = ChatOpenAI()
    response_text = model.predict(prompt)

    return eval(response_text)