import gradio as gr import requests import os import spaces API_URL = "https://api-inference.huggingface.co/models/meta-llama/Meta-Llama-3-8B-Instruct" api_token = os.environ.get("TOKEN") headers = {"Authorization": f"Bearer {api_token}"} @spaces.GPU def query(payload): response = requests.post(API_URL, headers=headers, json=payload) return response.json() def generate_response(prompt): payload = { "inputs": prompt, "parameters": { "max_new_tokens": 2000, "temperature": 0.7, "top_p": 0.95, "do_sample": True } } response = query(payload) if isinstance(response, list) and len(response) > 0: return response[0].get('generated_text', '') elif isinstance(response, dict) and 'generated_text' in response: return response['generated_text'] return "Désolé, je n'ai pas pu générer de réponse." def chatbot(message, history): response = generate_response(message) return response iface = gr.ChatInterface( fn=chatbot, title="Chatbot Meta-Llama-3-8B-Instruct", description="Interagissez avec le modèle Meta-Llama-3-8B-Instruct." ) iface.launch()