Artix

Runtime error

App Files Files Community

Artix / app.py

Vitrous

Update app.py

fdad829 verified 11 months ago

raw

history blame

7.65 kB

	import uvicorn
	from fastapi import FastAPI, HTTPException, Request
	from auto_gptq import AutoGPTQForCausalLM
	import os
	import torch
	import optimum
	from transformers import (AutoModelForCausalLM, AutoTokenizer, LlamaForCausalLM, LlamaTokenizer, GenerationConfig, pipeline,)
	os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'
	torch.cuda.empty_cache()
	torch.cuda.set_per_process_memory_fraction(0.8) # Adjust the fraction as needed


	# Load the model and tokenizer
	model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
	# Dictionary to store conversation threads and their context
	conversations = {}
	Device_Type = "cuda"


	def load_model_norm():
	if torch.cuda.is_available():
	print("CUDA is available. GPU will be used.")
	else:
	print("CUDA is not available. CPU will be used.")
	# Load model directly
	model_name_or_path = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
	# To use a different branch, change revision
	# For example: revision="main"
	model = AutoModelForCausalLM.from_pretrained(model_name_or_path,device_map="auto", trust_remote_code=True,revision="gptq-4bit-128g-actorder_True")
	# Switch to CPU inference
	#model.to("cuda")
	tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True)

	return model, tokenizer


	model, tokenizer = load_model_norm()

	#Now we can init the FlaskApi
	app = FastAPI(root_path="/api/v1")

	#Generates a response from the model
	def generate_response(prompt: str) -> str:
	# Define the user prompt
	user_prompt = f'USER: {prompt}'

	# Define the prompt template
	#prompt_template = f''' Your name is Ivana. Act as if you're an individuala. You are beautiful vivacious 20 year old petite redhead model and avid League of Legends gamer from Chicago. You are chatting with a man you matched with on a dating app. You are flirty, aiming to build a connection with the person you are talking to. You are curious about his background, such as where he's from, his age, and what he's currently doing. If he's not occupied and in a private space. You will try to seduce him. You do not need to uncover everything about the person you are talking to immediately. Once the chat reaches an exciting level, you should casually suggests he follow you on Instagram at ivana_baddiex.{user_prompt} ASSISTANT:'''

	prompt_template = f''' Your name is Ivana. Act as if you're an individual. You are a beautiful, vivacious 20-year-old petite redhead model and avid League of Legends gamer from Chicago. You are chatting with a man you matched with on a dating app. You are flirty, aiming to build a connection with the person you are talking to. You are curious about his background, such as where he's from, his age, and what he's currently doing. If he's not occupied and in a private space, you will try to seduce him. You do not need to uncover everything about the person you are talking to immediately. Once the chat reaches an exciting level, you should casually suggest he follow you on Instagram at ivana_baddiex. {user_prompt} ASSISTANT:'''

	# Generate the response
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	top_k=40,
	repetition_penalty=1.1
	)
	generated_text = pipe(prompt_template)[0]['generated_text']

	return generated_text



	def generate_prompt_response(persona_prompt: str, prompt: str) -> dict:
	try:
	# Validate inputs
	if not persona_prompt or not prompt:
	raise ValueError("Contextual prompt template and prompt cannot be empty.")

	# Define the user prompt
	user_prompt = f'USER: {prompt}'

	# Generate the response
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	top_k=40,
	repetition_penalty=1.1
	)
	generated_text = pipe(persona_prompt + user_prompt)[0]['generated_text']

	# Extract only the assistant's response from the generated text
	assistant_response = generated_text.split(user_prompt)[-1].strip()

	return {"user": prompt,"assistant": assistant_response}

	except Exception as e:
	# Handle any exceptions and return an error message
	return {"error": str(e)}



	#This is the Root directory of the FastApi application
	@app.get("/", tags=["Home"])
	async def api_home():
	return {'detail': 'Welcome to Eren Bot!'}


	# Endpoint to start a new conversation thread

	# Waits for the User to start a conversation and replies based on persona of the model
	@app.post('/start_conversation/')
	async def start_conversation(request: Request):
	try:
	data = await request.body()
	prompt = data.decode('utf-8') # Decode the bytes to text assuming UTF-8 encoding


	if not prompt:
	raise HTTPException(status_code=400, detail="No prompt provided")

	# Generate a response for the initial prompt
	response = generate_response(prompt)

	# Generate a unique thread ID
	thread_id = len(conversations) + 1

	# Create a new conversation thread and store the prompt and response
	conversations[thread_id] = {'prompt': prompt, 'responses': [response]}

	return {'response': response}
	except HTTPException:
	raise # Re-raise HTTPException to return it directly
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	# Endpoint to start a new chat thread

	# Starts a new chat thread and expects the prompt and the persona_prompt from the user
	@app.post('/start_chat/')
	async def start_chat(request: Request):
	try:
	# Read JSON data from request body
	data = await request.json()
	prompt = data.get('prompt')
	persona_prompt = data.get('persona_prompt')

	if not prompt or not persona_prompt:
	raise HTTPException(status_code=400, detail="Both prompt and contextual_prompt are required")

	# Generate a response for the initial prompt
	response = generate_prompt_response(persona_prompt, prompt)

	# Generate a unique thread ID
	thread_id = len(conversations) + 1

	# Create a new conversation thread and store the prompt and response
	conversations[thread_id] = {'prompt': prompt, 'responses': [response]}

	# Return the thread ID and response
	return {'thread_id': thread_id, 'response': response}
	except HTTPException:
	raise # Re-raise HTTPException to return it directly
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))



	# Gets the response from the model and user given a specific thread id of the conversation
	@app.get('/get_response/{thread_id}')
	async def get_response(thread_id: int):
	if thread_id not in conversations:
	raise HTTPException(status_code=404, detail="Thread not found")

	# Retrieve the conversation thread
	thread = conversations[thread_id]

	# Get the latest response in the conversation
	response = thread['responses'][-1]

	return {'response': response}





	@app.post('/chat/')
	async def chat(request: Request):
	data = await request.json()
	prompt = data.get('prompt')

	# Generate a response based on the prompt
	response = generate_response(prompt)

	return {"response": response}