Spaces:

beeguy
/

website-RAG

Sleeping

App Files Files Community

beeguy commited on Dec 9, 2024

Commit

6edcaaa

1 Parent(s): 27f4327

styles and installs

Browse files

Files changed (7) hide show

.gitignore +3 -1
app.py +35 -3
modules/langchain_init.py +25 -0
modules/soup_extractor.py +11 -0
requirements.txt +5 -1
static/index.html +1 -1
static/style.css +22 -0

.gitignore CHANGED Viewed

@@ -33,4 +33,6 @@ instance/
 dmypy.json
 # Pyre type checker
-.pyre/

 dmypy.json
 # Pyre type checker
+.pyre/
+*.ipynb

app.py CHANGED Viewed

@@ -3,11 +3,39 @@ from os import getenv
 from langchain_huggingface import HuggingFaceEmbeddings
 from fastapi.responses import HTMLResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
 app = FastAPI()
 MY_KEY = getenv("MY_KEY")
 embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v2-small-en")
 app.mount("/static", StaticFiles(directory="static", html=True), name="static")
@@ -23,14 +51,18 @@ async def chat(request: Request):
     data = await request.json()
     message = data.get("message")
     # Process the message and generate a reply
-    reply = f"Received your message: {message}"
     return {"reply": reply}
 @app.get("/embeddings")
 def get_embeddings(input: str):
-    result = embeddings.embed_query(input)
     return {
-        "embeddings": result,
         "test": "testtext"
         }

 from langchain_huggingface import HuggingFaceEmbeddings
 from fastapi.responses import HTMLResponse, FileResponse
 from fastapi.staticfiles import StaticFiles
+from modules.langchain_init import get_llm
+from modules.soup_extractor import bs4_extractor
+from langchain_community.document_loaders import WebBaseLoader, RecursiveUrlLoader
+from langchain_core.vectorstores import InMemoryVectorStore
+from langchain import hub
 app = FastAPI()
 MY_KEY = getenv("MY_KEY")
 embeddings = HuggingFaceEmbeddings(model_name="jinaai/jina-embeddings-v2-small-en")
+llm = get_llm()
+def create_loader(url:str):
+    return RecursiveUrlLoader(
+        # "https://book.cairo-lang.org/",
+        url,
+        extractor=bs4_extractor,
+        max_depth=2,
+    )
+loader = {}
+docs = []
+my_vector_store = {}
+prompt = hub.pull("rlm/rag-prompt")
+def simple_rag(question, prompt):
+    retrieved_docs = my_vector_store.similarity_search(question)
+    docs_content = "\n\n".join(doc.page_content for doc in retrieved_docs)
+    prompt = prompt.invoke({"question": question, "context": docs_content})
+    return llm.invoke(prompt)
 app.mount("/static", StaticFiles(directory="static", html=True), name="static")
     data = await request.json()
     message = data.get("message")
     # Process the message and generate a reply
+    response = simple_rag(message, prompt)
+    reply = response.content
     return {"reply": reply}
 @app.get("/embeddings")
 def get_embeddings(input: str):
+    loader = create_loader(input)
+    docs = loader.load()
+    my_vector_store = InMemoryVectorStore.from_documents(docs, embeddings)
     return {
+        "embeddings": [],
         "test": "testtext"
         }

modules/langchain_init.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from langchain_openai import ChatOpenAI
+from dotenv import load_dotenv
+import os
+# from langchain_cohere import CohereEmbeddings
+load_dotenv()
+openrouter_api_key=os.environ["OPENROUTER_API_KEY"]
+# cohere_api_key=os.environ["COHERE_API_KEY"]
+def get_llm(model_name: str = "openai/gpt-4o-mini"):
+    return ChatOpenAI(
+        model=model_name,
+        temperature=0.6,
+        openai_api_key=openrouter_api_key,
+        openai_api_base="https://openrouter.ai/api/v1"
+    )
+# def get_embeddings(model_name: str = "embed-multilingual-light-v3.0"):
+#     return CohereEmbeddings(
+#         model=model_name,
+#         cohere_api_key=cohere_api_key,
+#     )

modules/soup_extractor.py ADDED Viewed

	@@ -0,0 +1,11 @@

+import re
+from bs4 import BeautifulSoup, SoupStrainer
+def_strainer = SoupStrainer(class_ = 'content')
+def bs4_extractor(html: str, strainer: SoupStrainer = def_strainer) -> str:
+    '''
+    Extract text from html using BeautifulSoup
+    '''
+    soup = BeautifulSoup(html, "lxml", parse_only=strainer)
+    return re.sub(r"\n\n+", "\n\n", soup.text).strip()

requirements.txt CHANGED Viewed

@@ -1,4 +1,8 @@
 fastapi
 uvicorn[standard]
 langchain-huggingface

 fastapi
 uvicorn[standard]
+langchain
 langchain-huggingface
+bs4
+langchain_openai
+langchain_community
+lxml

static/index.html CHANGED Viewed

@@ -35,7 +35,7 @@
             <h2>Chat with the Model</h2>
             <div id="chat-box"></div>
             <form id="chat-form">
-                <input id="chat-input" type="text" placeholder="Type your message here..." />
                 <button type="submit">Send</button>
             </form>
         </section>

             <h2>Chat with the Model</h2>
             <div id="chat-box"></div>
             <form id="chat-form">
+                <textarea id="chat-input" placeholder="Type your message here..."></textarea>
                 <button type="submit">Send</button>
             </form>
         </section>

static/style.css CHANGED Viewed

@@ -28,18 +28,40 @@ a {
 form {
     width: 30rem;
     margin: 0 auto;
 }
 input {
     width: 100%;
 }
 button {
     cursor: pointer;
 }
 .text-gen-output {
     min-height: 1.2rem;
     margin: 1rem;
     border: 0.5px solid grey;
 }

 form {
     width: 30rem;
     margin: 0 auto;
+    border-radius: 3px;
 }
 input {
     width: 100%;
+    border-radius: 3px;
 }
 button {
     cursor: pointer;
+    border-radius: 3px;
 }
 .text-gen-output {
     min-height: 1.2rem;
     margin: 1rem;
     border: 0.5px solid grey;
+    border-radius: 3px;
+}
+#chat-box {
+    width: 60%;
+    height: 40vh;
+    overflow-y: auto;
+    border: 1px solid #ccc;
+    padding: 1rem;
+    border-radius: 3px;
+    resize: none;
+}
+#chat-input {
+    width: 60%;
+    height: 3rem;
+    overflow-y: auto;
+    resize: none;
+    border-radius: 3px;
 }