ctranslate2-4you commited on
Commit
975dbc9
·
verified ·
1 Parent(s): fe0e75d

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +51 -12
README.md CHANGED
@@ -10,45 +10,84 @@ tags:
10
  - chat
11
  ---
12
 
13
- Sample Script:
 
 
 
 
14
 
15
  ```python
16
  import ctranslate2
17
  from transformers import AutoTokenizer
18
 
19
  def generate_response(prompt, system_message, model_path):
20
- # Initialize the model and tokenizer
21
  generator = ctranslate2.Generator(
22
  model_path,
23
  device="cuda",
24
  compute_type="int8"
25
  )
26
  tokenizer = AutoTokenizer.from_pretrained(model_path)
27
-
28
- # Format the prompt
29
  formatted_prompt = f"""<|im_start|>system<|im_sep|>{system_message}<|im_end|>
30
  <|im_start|>user<|im_sep|>{prompt}<|im_end|>
31
  <|im_start|>assistant<|im_sep|>"""
32
-
33
- # Tokenize and generate
34
  tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(formatted_prompt))
35
  results = generator.generate_batch(
36
  [tokens],
37
- max_batch_size=8192,
38
  max_length=1024,
39
  sampling_temperature=0.7
40
  )
41
-
42
- # Decode and return the response
43
- response = tokenizer.decode(results[0].sequences_ids[0])
44
  return response
45
 
46
  if __name__ == "__main__":
47
  model_path = "path/to/your/phi-4-ct2-model"
48
  system_message = "You are a helpful AI assistant."
49
  user_prompt = "Write a short poem about a cat."
50
-
51
  response = generate_response(user_prompt, system_message, model_path)
52
  print("\nGenerated response:")
53
  print(response)
54
- ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  - chat
11
  ---
12
 
13
+ Ctranslate2 convesion of Phi-4
14
+
15
+ # Example Usage
16
+
17
+ <details><summary>Non-Streaming Example:</summary>
18
 
19
  ```python
20
  import ctranslate2
21
  from transformers import AutoTokenizer
22
 
23
  def generate_response(prompt, system_message, model_path):
 
24
  generator = ctranslate2.Generator(
25
  model_path,
26
  device="cuda",
27
  compute_type="int8"
28
  )
29
  tokenizer = AutoTokenizer.from_pretrained(model_path)
 
 
30
  formatted_prompt = f"""<|im_start|>system<|im_sep|>{system_message}<|im_end|>
31
  <|im_start|>user<|im_sep|>{prompt}<|im_end|>
32
  <|im_start|>assistant<|im_sep|>"""
 
 
33
  tokens = tokenizer.convert_ids_to_tokens(tokenizer.encode(formatted_prompt))
34
  results = generator.generate_batch(
35
  [tokens],
 
36
  max_length=1024,
37
  sampling_temperature=0.7
38
  )
39
+ response = tokenizer.decode(results[0].sequences_ids[0], skip_special_tokens=True)
 
 
40
  return response
41
 
42
  if __name__ == "__main__":
43
  model_path = "path/to/your/phi-4-ct2-model"
44
  system_message = "You are a helpful AI assistant."
45
  user_prompt = "Write a short poem about a cat."
 
46
  response = generate_response(user_prompt, system_message, model_path)
47
  print("\nGenerated response:")
48
  print(response)
49
+
50
+ ```
51
+ </details>
52
+
53
+ <details><summary>Streaming Example:</summary>
54
+
55
+ ```python
56
+ import ctranslate2
57
+ from transformers import AutoTokenizer
58
+ import sys
59
+
60
+ def generate_response(prompt, system_message, model_path):
61
+ """
62
+ Generates and streams a response from an AI assistant.
63
+
64
+ Initializes the CTranslate2 generator and tokenizer, formats the input prompt,
65
+ tokenizes it, and streams the generated tokens by printing them as they are produced.
66
+
67
+ Parameters:
68
+ prompt (str): The user's input prompt.
69
+ system_message (str): The system-level instruction.
70
+ model_path (str): Path to the CTranslate2 model directory.
71
+ """
72
+ generator = ctranslate2.Generator(model_path, device="cuda", compute_type="int8")
73
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
74
+ formatted_prompt = f"""<|im_start|>system<|im_sep|>{system_message}<|im_end|>
75
+ <|im_start|>user<|im_sep|>{prompt}<|im_end|>
76
+ <|im_start|>assistant<|im_sep|>"""
77
+ tokens = tokenizer.tokenize(formatted_prompt)
78
+ for step in generator.generate_tokens([tokens], max_length=1024, sampling_temperature=0.7):
79
+ token = step.tokens[0]
80
+ decoded_token = tokenizer.decode([step.token_ids[0]])
81
+ print(decoded_token, end="", flush=True)
82
+ if token in tokenizer.eos_token or token in tokenizer.all_special_tokens:
83
+ break
84
+
85
+ if __name__ == "__main__":
86
+ model_path = "path/to/your/phi-4-ct2-model"
87
+ system_message = "You are a helpful AI assistant."
88
+ user_prompt = "Write a short poem about a cat."
89
+ print("\nGenerating response:")
90
+ generate_response(user_prompt, system_message, model_path)
91
+
92
+ ```
93
+ </details>