macadeliccc
/

laser-dolphin-mixtral-2x7b-dpo

@@ -46,29 +46,54 @@ Please give ideas and a detailed plan about how to assemble and train an army of
 Switch the commented model definition to use in 4-bit. Should work with 9GB and still exceed the single 7B model by 5-6 points roughly
 ```python
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
 model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
-# model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo", load_in_4bit=True)
-# Define the chat messages
 messages = [
-    {"role": "system", "content": "You are Dolphin, an AI assistant"},
-    {"role": "user", "content": "Hello, who are you?"}
 ]
-# Apply chat template to input messages
-gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
-# Generate a response
-output = model.generate(**gen_input)
-# Decode the generated tokens to a string
-response = tokenizer.decode(output[0], skip_special_tokens=True)
-# Print the response
-print("Response:", response)
 ```
 [colab](https://colab.research.google.com/drive/1cmRhAkDWItV7utHNqNANVZnqDqQNsTUr?usp=sharing) with usage example

 Switch the commented model definition to use in 4-bit. Should work with 9GB and still exceed the single 7B model by 5-6 points roughly
 ```python
+# Import necessary libraries
 from transformers import AutoTokenizer, AutoModelForCausalLM
 # Load tokenizer and model
 tokenizer = AutoTokenizer.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
 model = AutoModelForCausalLM.from_pretrained("macadeliccc/laser-dolphin-mixtral-2x7b-dpo")
+# Define a function to generate responses with adjustable hyperparameters
+def generate_response(messages, max_length=50, num_return_sequences=1, temperature=1.0, top_k=50, top_p=1.0):
+    """
+    Generate a response from the model based on the input chat messages and hyperparameters.
+    Args:
+    messages (list): List of message dictionaries with 'role' and 'content'.
+    max_length (int): Maximum length of the model's response.
+    num_return_sequences (int): Number of response sequences to generate.
+    temperature (float): Sampling temperature for model generation.
+    top_k (int): The number of highest probability vocabulary tokens to keep for top-k filtering.
+    top_p (float): If set to float < 1, only the most probable tokens with probabilities that add up to top_p or higher are kept for generation.
+    Returns:
+    str: The generated response from the model.
+    """
+    # Apply chat template to input messages
+    gen_input = tokenizer.apply_chat_template(messages, return_tensors="pt")
+    # Generate a response
+    output = model.generate(**gen_input,
+                            max_length=max_length,
+                            num_return_sequences=num_return_sequences,
+                            temperature=temperature,
+                            top_k=top_k,
+                            top_p=top_p)
+    # Decode the generated tokens to a string
+    response = tokenizer.decode(output[0], skip_special_tokens=True)
+    return response
+# Example chat messages
 messages = [
+    {"role": "system", "content": "You are Dolphin, an AI assistant."},
+    {"role": "user", "content": "Write a quicksort algorithm in python"}
 ]
+# Generate and print the response
+response = generate_response(messages, max_length=100, temperature=0.8)
+print("Response:\n", response)
 ```
 [colab](https://colab.research.google.com/drive/1cmRhAkDWItV7utHNqNANVZnqDqQNsTUr?usp=sharing) with usage example