import os from flask import Flask, request, Response, jsonify import requests import json import random from helper import create_jwt, github_username_zed_userid_list app = Flask(__name__) @app.route('/hf/v1/chat/completions', methods=['POST']) def chat(): # Get the payload from the request payload = request.json # Get the model from the payload, defaulting to "claude-3-5-sonnet-20240620" model = payload.get('model', 'claude-3-5-sonnet-20240620') # Prepare the request for the LLM API url = "https://llm.zed.dev/completion?" llm_payload = { "provider": "anthropic", "model": model, "provider_request": { "model": model, "max_tokens": payload.get('max_tokens', 8192), "temperature": payload.get('temperature', 0), "top_p": payload.get('top_p', 0.7), "messages": payload['messages'], "stream": payload.get('stream', False), "system": payload.get('system', "") } } github_username, zed_user_id = random.choice(github_username_zed_userid_list) jwt = create_jwt(github_username, zed_user_id) headers = { 'Host': 'llm.zed.dev', 'accept': '*/*', 'content-type': 'application/json', 'authorization': f'Bearer {jwt}', 'user-agent': 'Zed/0.149.3 (macos; aarch64)' } # Get proxy from environment variable proxy = os.environ.get('HTTP_PROXY', None) proxies = {'http': proxy, 'https': proxy} if proxy else None def generate(): with requests.post(url, headers=headers, json=llm_payload, stream=True, proxies=proxies) as response: for chunk in response.iter_content(chunk_size=1024): if chunk: # Parse the chunk and format it as per OpenAI's streaming format try: data = json.loads(chunk.decode('utf-8')) content = data.get('completion', '') yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n" except json.JSONDecodeError: continue yield "data: [DONE]\n\n" if payload.get('stream', False): return Response(generate(), content_type='text/event-stream') else: with requests.post(url, headers=headers, json=llm_payload, proxies=proxies) as response: data = response.json() return jsonify({ "id": "chatcmpl-" + os.urandom(12).hex(), "object": "chat.completion", "created": int(time.time()), "model": model, "choices": [{ "index": 0, "message": { "role": "assistant", "content": data.get('completion', '') }, "finish_reason": "stop" }], "usage": { "prompt_tokens": -1, # We don't have this information "completion_tokens": -1, # We don't have this information "total_tokens": -1 # We don't have this information } }) if __name__ == '__main__': app.run(debug=True)