In [1]:
!pip install torch transformers -q

In [2]:
import torch
from transformers import pipeline
from IPython.display import clear_output
from google.colab import output

In [3]:
class ChatBot:
 _instance = None
 _current_model = None

 def __init__(self, model_slug=None):
 if model_slug and model_slug != ChatBot._current_model:
 self.load_model(model_slug)
 ChatBot._current_model = model_slug

 self.messages = []
 self.max_tokens = 2048
 self.temperature = 0.01
 self.top_k = 100
 self.top_p = 0.95

 @classmethod
 def get_instance(cls, model_slug=None):
 if not cls._instance or (model_slug and model_slug != cls._current_model):
 cls._instance = cls(model_slug)
 return cls._instance

 def load_model(self, model_slug):
 print(f"Loading model {model_slug}...")
 self.pipeline = pipeline(
 "text-generation",
 model=model_slug,
 device_map="auto",
 )
 clear_output()
 print("Model loaded successfully!")

 def reset_conversation(self, system_message):
 """Reset the conversation with a new system message"""
 self.messages = [{"role": "system", "content": system_message}]

 def get_response(self, user_input):
 """Get response with current parameters"""
 self.messages.append({"role": "user", "content": user_input})
 outputs = self.pipeline(
 self.messages,
 max_new_tokens=self.max_tokens,
 do_sample=True,
 temperature=self.temperature,
 top_k=self.top_k,
 top_p=self.top_p
 )
 response = outputs[0]["generated_text"][-1]
 content = response.get('content', 'No content available')
 self.messages.append({"role": "assistant", "content": content})
 return content

 def update_params(self, max_tokens=None, temperature=None, top_k=None, top_p=None):
 """Update generation parameters"""
 if max_tokens is not None:
 self.max_tokens = max_tokens
 if temperature is not None:
 self.temperature = temperature
 if top_k is not None:
 self.top_k = top_k
 if top_p is not None:
 self.top_p = top_p

In [4]:
def run_chatbot(
 model=None,
 system_message="You are Orca Mini, You are expert in following given instructions, Think step by step before coming up with final answer",
 max_tokens=None,
 temperature=None,
 top_k=None,
 top_p=None,
):
 try:
 # Get or create chatbot instance
 chatbot = ChatBot.get_instance(model)

 # Update parameters if provided
 chatbot.update_params(max_tokens, temperature, top_k, top_p)

 # Reset conversation with new system message
 chatbot.reset_conversation(system_message)

 print("Chatbot: Hi! Type 'quit' to exit.")

 while True:
 user_input = input("You: ").strip()
 if user_input.lower() == 'quit':
 break
 try:
 response = chatbot.get_response(user_input)
 print("Chatbot:", response)
 except Exception as e:
 print(f"Chatbot: An error occurred: {str(e)}")
 print("Please try again.")

 except Exception as e:
 print(f"Error in chatbot: {str(e)}")

In [None]:
run_chatbot(model="pankajmathur/orca_mini_v9_6_3B-Instruct")

Model loaded successfully!
Chatbot: Hi! Type 'quit' to exit.
You: Write a casual summary of the U.S. maternity leave policy with two sections (Section 1 and Section 2) and at least 25 sentences.
Chatbot: Section 1: Introduction to U.S. Maternity Leave Policy

The United States has a complex and evolving maternity leave policy. The policy is primarily governed by the Family and Medical Leave Act (FMLA), which was enacted in 1993. This law provides eligible employees with up to 12 weeks of unpaid leave for certain family and medical reasons, including childbirth and the care of a newborn or adopted child. The policy is not uniform across all industries and employers, with some companies offering more generous leave benefits than others.

Section 2: Key Aspects of U.S. Maternity Leave Policy

1. Eligibility: To be eligible for FMLA, an employee must have worked for their employer for at least 12 months and completed at least 1,250 hours of service in the 12 months preceding the start of t

In [None]:
# # change system message
# run_chatbot(
# system_message="You are Orca Mini, You are expert in logic, Think step by step before coming up with final answer",
# max_tokens=1024,
# temperature=0.3
# )