Spaces:

Artin2009
/

MultiLLM-ChainLit

Runtime error

Artin2009 commited on Jul 25, 2024

Commit

f716596

•

1 Parent(s): 2c1b638

Update chain_app.py

Files changed (1) hide show

chain_app.py CHANGED Viewed

@@ -16,8 +16,6 @@ openai_api_key = os.environ.get('OPENAI_API_KEY')
 groq_api_key = os.environ.get('GROQ_API_KEY')
 cohere_api_key = os.environ.get('COHERE_API_KEY')
 fireworks_api_key = os.environ.get('FIREWORKS_API_KEY')
-LLAMA_403_ONLY = os.environ.get('LLAMA_403_ONLY')
 hf_text_client = Client("Artin2009/text-generation", hf_token=hf_token)
 # hf_image_client = Client('Artin2009/image-generation')
 openai_client = OpenAI(api_key=openai_api_key)
@@ -26,6 +24,7 @@ groq_client = Groq(api_key=groq_api_key)
 co = cohere.Client(
   api_key=cohere_api_key,
 )
 # API_URL = "https://api-inference.huggingface.co/models/PartAI/TookaBERT-Large"
 # headers = {"Authorization": f"Bearer {hf_token}"}
@@ -2329,23 +2328,24 @@ async def main(message: cl.Message):
         # await cl.Message(
         #     content=response.choices[0].message.content,
         # ).send()
-        API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3.1-405B"
-        headers = {"Authorization": "Bearer LLAMA_403_ONLY"}
-        def query(payload):
-        	response = requests.post(API_URL, headers=headers, json=payload)
-        	return response.json()
-        output = query({
-            "inputs": message.content,
-        })
-        await cl.Message(
-            content=output
-        ).send()
     elif chat_profile == 'Llama-3.1-70B':
         completion = groq_client.chat.completions.create(
             model="llama-3.1-70b-versatile",

 groq_api_key = os.environ.get('GROQ_API_KEY')
 cohere_api_key = os.environ.get('COHERE_API_KEY')
 fireworks_api_key = os.environ.get('FIREWORKS_API_KEY')
 hf_text_client = Client("Artin2009/text-generation", hf_token=hf_token)
 # hf_image_client = Client('Artin2009/image-generation')
 openai_client = OpenAI(api_key=openai_api_key)
 co = cohere.Client(
   api_key=cohere_api_key,
 )
+NVIDIA_API_KEY = os.environ.get('NVIDIA_API_KEY')
 # API_URL = "https://api-inference.huggingface.co/models/PartAI/TookaBERT-Large"
 # headers = {"Authorization": f"Bearer {hf_token}"}
         # await cl.Message(
         #     content=response.choices[0].message.content,
         # ).send()
+        client = OpenAI(
+          base_url = "https://integrate.api.nvidia.com/v1",
+          api_key = nvidia_api
+        )
+        completion = client.chat.completions.create(
+          model="meta/llama-3.1-405b-instruct",
+          messages=[{"role":"user","content": message.content}],
+          temperature=0.2,
+          top_p=0.7,
+          max_tokens=1024,
+          stream=True
+        )
+        for chunk in completion:
+          await msg.stream_token(chunk.choices[0].delta.content)
     elif chat_profile == 'Llama-3.1-70B':
         completion = groq_client.chat.completions.create(
             model="llama-3.1-70b-versatile",