Artin2009 commited on
Commit
f716596
1 Parent(s): 2c1b638

Update chain_app.py

Browse files
Files changed (1) hide show
  1. chain_app.py +15 -15
chain_app.py CHANGED
@@ -16,8 +16,6 @@ openai_api_key = os.environ.get('OPENAI_API_KEY')
16
  groq_api_key = os.environ.get('GROQ_API_KEY')
17
  cohere_api_key = os.environ.get('COHERE_API_KEY')
18
  fireworks_api_key = os.environ.get('FIREWORKS_API_KEY')
19
- LLAMA_403_ONLY = os.environ.get('LLAMA_403_ONLY')
20
-
21
  hf_text_client = Client("Artin2009/text-generation", hf_token=hf_token)
22
  # hf_image_client = Client('Artin2009/image-generation')
23
  openai_client = OpenAI(api_key=openai_api_key)
@@ -26,6 +24,7 @@ groq_client = Groq(api_key=groq_api_key)
26
  co = cohere.Client(
27
  api_key=cohere_api_key,
28
  )
 
29
 
30
  # API_URL = "https://api-inference.huggingface.co/models/PartAI/TookaBERT-Large"
31
  # headers = {"Authorization": f"Bearer {hf_token}"}
@@ -2329,23 +2328,24 @@ async def main(message: cl.Message):
2329
  # await cl.Message(
2330
  # content=response.choices[0].message.content,
2331
  # ).send()
2332
-
2333
- API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-405B"
 
 
2334
 
2335
- headers = {"Authorization": "Bearer LLAMA_403_ONLY"}
2336
- def query(payload):
2337
- response = requests.post(API_URL, headers=headers, json=payload)
2338
- return response.json()
 
 
 
 
 
 
2339
 
2340
- output = query({
2341
- "inputs": message.content,
2342
- })
2343
 
2344
- await cl.Message(
2345
- content=output
2346
- ).send()
2347
 
2348
-
2349
  elif chat_profile == 'Llama-3.1-70B':
2350
  completion = groq_client.chat.completions.create(
2351
  model="llama-3.1-70b-versatile",
 
16
  groq_api_key = os.environ.get('GROQ_API_KEY')
17
  cohere_api_key = os.environ.get('COHERE_API_KEY')
18
  fireworks_api_key = os.environ.get('FIREWORKS_API_KEY')
 
 
19
  hf_text_client = Client("Artin2009/text-generation", hf_token=hf_token)
20
  # hf_image_client = Client('Artin2009/image-generation')
21
  openai_client = OpenAI(api_key=openai_api_key)
 
24
  co = cohere.Client(
25
  api_key=cohere_api_key,
26
  )
27
+ NVIDIA_API_KEY = os.environ.get('NVIDIA_API_KEY')
28
 
29
  # API_URL = "https://api-inference.huggingface.co/models/PartAI/TookaBERT-Large"
30
  # headers = {"Authorization": f"Bearer {hf_token}"}
 
2328
  # await cl.Message(
2329
  # content=response.choices[0].message.content,
2330
  # ).send()
2331
+ client = OpenAI(
2332
+ base_url = "https://integrate.api.nvidia.com/v1",
2333
+ api_key = nvidia_api
2334
+ )
2335
 
2336
+ completion = client.chat.completions.create(
2337
+ model="meta/llama-3.1-405b-instruct",
2338
+ messages=[{"role":"user","content": message.content}],
2339
+ temperature=0.2,
2340
+ top_p=0.7,
2341
+ max_tokens=1024,
2342
+ stream=True
2343
+ )
2344
+ for chunk in completion:
2345
+ await msg.stream_token(chunk.choices[0].delta.content)
2346
 
 
 
 
2347
 
 
 
 
2348
 
 
2349
  elif chat_profile == 'Llama-3.1-70B':
2350
  completion = groq_client.chat.completions.create(
2351
  model="llama-3.1-70b-versatile",