chahah commited on
Commit
9db9263
1 Parent(s): 7ca03db

Update app.py

Browse files

replaced arxivretreiver with arxivloader

Files changed (1) hide show
  1. app.py +13 -14
app.py CHANGED
@@ -11,26 +11,29 @@ from langchain_mistralai import ChatMistralAI
11
  from langchain_community.document_loaders import PyPDFLoader
12
  import requests
13
  from pathlib import Path
14
- from langchain_community.document_loaders import WebBaseLoader
15
- from langchain_community.retrievers import ArxivRetriever
16
  import bs4
17
  from langchain_core.rate_limiters import InMemoryRateLimiter
18
  from urllib.parse import urljoin
19
 
20
 
21
  def initialize(arxivcode):
 
 
 
 
 
 
 
 
 
 
 
22
  rate_limiter = InMemoryRateLimiter(
23
  requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
24
  check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
25
  max_bucket_size=10, # Controls the maximum burst size.
26
- )
27
-
28
- retriever = ArxivRetriever(
29
- load_max_docs=2,
30
- get_ful_documents=True,
31
- )
32
-
33
- # LLM model
34
  llm = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
35
 
36
  # Embeddings
@@ -38,10 +41,6 @@ def initialize(arxivcode):
38
  # embed_model = "nvidia/NV-Embed-v2"
39
  embeddings = HuggingFaceInstructEmbeddings(model_name=embed_model)
40
  # embeddings = MistralAIEmbeddings()
41
-
42
- docs = retriever.invoke(str(arxivcode))
43
- for i in range(len(docs)):
44
- docs[i].metadata['Published'] = str(docs[i].metadata['Published'])
45
 
46
  def format_docs(docs):
47
  return "\n\n".join(doc.page_content for doc in docs)
 
11
  from langchain_community.document_loaders import PyPDFLoader
12
  import requests
13
  from pathlib import Path
14
+ from langchain_community.document_loaders import WebBaseLoader, ArxivLoader
 
15
  import bs4
16
  from langchain_core.rate_limiters import InMemoryRateLimiter
17
  from urllib.parse import urljoin
18
 
19
 
20
  def initialize(arxivcode):
21
+ loader = ArxivLoader(query=arxivcode,)
22
+ docs = loader.load()
23
+ #retriever = ArxivRetriever(
24
+ # load_max_docs=2,
25
+ # get_full_documents=True,
26
+ #)
27
+ #docs = retriever.invoke(str(arxivcode))
28
+ #for i in range(len(docs)):
29
+ # docs[i].metadata['Published'] = str(docs[i].metadata['Published'])
30
+
31
+ # LLM model
32
  rate_limiter = InMemoryRateLimiter(
33
  requests_per_second=0.1, # <-- MistralAI free. We can only make a request once every second
34
  check_every_n_seconds=0.01, # Wake up every 100 ms to check whether allowed to make a request,
35
  max_bucket_size=10, # Controls the maximum burst size.
36
+ )
 
 
 
 
 
 
 
37
  llm = ChatMistralAI(model="mistral-large-latest", rate_limiter=rate_limiter)
38
 
39
  # Embeddings
 
41
  # embed_model = "nvidia/NV-Embed-v2"
42
  embeddings = HuggingFaceInstructEmbeddings(model_name=embed_model)
43
  # embeddings = MistralAIEmbeddings()
 
 
 
 
44
 
45
  def format_docs(docs):
46
  return "\n\n".join(doc.page_content for doc in docs)