metadata
library_name: ctranslate2
license: mit
base_model:
- microsoft/phi-4
base_model_relation: quantized
tags:
- ctranslate2
- phi-4
- chat
Sample Script:
import ctranslate2
from transformers import AutoTokenizer
def generate_response(prompt, system_message, model_path):
# Initialize the model and tokenizer
generator = ctranslate2.Generator(
model_path,
device="cuda",
compute_type="int8"
)
tokenizer = AutoTokenizer.from_pretrained(model_path)
# Format the prompt
formatted_prompt = f"""<|im_start|>system<|im_sep|>{system_message}<|im_end|>
<|im_start|>user<|im_sep|>{prompt}<|im_end|>
<|im_start|>assistant<|im_sep|>"""
# Tokenize and generate
tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(formatted_prompt))
results = generator.generate_batch(
[tokens],
max_batch_size=8192,
max_length=1024,
sampling_temperature=0.7
)
# Decode and return the response
response = tokenizer.decode(results[0].sequences_ids[0])
return response
if __name__ == "__main__":
model_path = "path/to/your/phi-4-ct2-model"
system_message = "You are a helpful AI assistant."
user_prompt = "Write a short poem about a cat."
response = generate_response(user_prompt, system_message, model_path)
print("\nGenerated response:")
print(response)