Spaces:
Running
Running
File size: 2,585 Bytes
8df0f23 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
from openai import OpenAI
import logging
from typing import List
import os
BASE_URL = "https://api.together.xyz/v1"
DEFAULT_API_KEY = os.getenv("TOGETHER_API_KEY")
def model_name_mapping(model_name):
if model_name == "Llama-3-8B":
_model_name = "meta-llama/Llama-3-8b-hf"
elif model_name == "Llama-3-70B":
_model_name = "meta-llama/Llama-3-70b-hf"
elif model_name == "Llama-2-7B":
_model_name = "meta-llama/Llama-2-7b-hf"
elif model_name == "Llama-2-70B":
_model_name = "meta-llama/Llama-2-70b-hf"
elif model_name == "Mistral-7B-v0.1":
_model_name = "mistralai/Mistral-7B-v0.1"
elif model_name == "Mixtral-8x22B":
_model_name = "mistralai/Mixtral-8x22B"
elif model_name == "Qwen1.5-72B":
_model_name = "Qwen/Qwen1.5-72B"
elif model_name == "Yi-34B":
_model_name = "zero-one-ai/Yi-34B"
elif model_name == "Yi-6B":
_model_name = "zero-one-ai/Yi-6B"
elif model_name == "OLMO":
_model_name = "allenai/OLMo-7B"
else:
raise ValueError("Invalid model name")
return _model_name
def urial_template(urial_prompt, history, message):
current_prompt = urial_prompt + "\n"
for user_msg, ai_msg in history:
current_prompt += f'# Query:\n"""\n{user_msg}\n"""\n\n# Answer:\n"""\n{ai_msg}\n"""\n\n'
current_prompt += f'# Query:\n"""\n{message}\n"""\n\n# Answer:\n"""\n'
return current_prompt
def openai_base_request(
model: str=None,
temperature: float=0,
max_tokens: int=512,
top_p: float=1.0,
prompt: str=None,
n: int=1,
repetition_penalty: float=1.0,
stop: List[str]=None,
api_key: str=None,
):
if api_key is None:
api_key = DEFAULT_API_KEY
client = OpenAI(api_key=api_key, base_url=BASE_URL)
# print(f"Requesting chat completion from OpenAI API with model {model}")
logging.info(f"Requesting chat completion from OpenAI API with model {model}")
logging.info(f"Prompt: {prompt}")
logging.info(f"Temperature: {temperature}")
logging.info(f"Max tokens: {max_tokens}")
logging.info(f"Top-p: {top_p}")
logging.info(f"Repetition penalty: {repetition_penalty}")
logging.info(f"Stop: {stop}")
request = client.completions.create(
model=model,
prompt=prompt,
temperature=float(temperature),
max_tokens=int(max_tokens),
top_p=float(top_p),
n=n,
extra_body={'repetition_penalty': float(repetition_penalty)},
stop=stop,
stream=True
)
return request
|