AI_Demo / pages /πŸ¦™_Alpaca.py
DrBenjamin's picture
added files
11df34d
raw
history blame
4.68 kB
##### `πŸ¦™_Alpaca.py`
##### Alpaca Model
##### https://github.com/seemanne/llamacpypy
##### https://github.com/shaunabanana/llama.py
##### Please reach out to ben@benbox.org for any questions
#### Loading needed Python libraries
import streamlit as st
#from llamacpypy import Llama
import llamacpp
from llama_cpp import Llama
import os
import subprocess
#### Streamlit initial setup
st.set_page_config(
page_title = "πŸ¦™ Alpaca",
page_icon = "images/Logo.png",
layout = "centered",
initial_sidebar_state = "expanded"
)
#### Functions of the Python Wrapper
def llama_stream(
prompt = '',
skip_prompt = True,
trim_prompt = 0,
executable = 'pages/llama.cpp/main',
model = 'models/7B/ggml-model-q4_0.bin',
threads = 4,
temperature = 0.7,
top_k = 40,
top_p = 0.5,
repeat_last_n = 256,
repeat_penalty = 1.17647,
n = 4096,
interactive = False,
reverse_prompt = "User:"
):
command = [
executable,
'-m', model,
'-t', str(threads),
'--temp', str(temperature),
'--top_k', str(top_k),
'--top_p', str(top_p),
'--repeat_last_n', str(repeat_last_n),
'--repeat_penalty', str(repeat_penalty),
'-n', str(n),
'-p', prompt
]
if interactive:
command += ['-i', '-r', reverse_prompt]
process = subprocess.Popen(
command,
stdin = subprocess.PIPE,
stdout = subprocess.PIPE,
stderr = subprocess.PIPE,
)
token = b''
generated = ''
while True:
token += process.stdout.read(1)
if token: # neither empty string nor None
try:
decoded = token.decode('utf-8')
trimmed_prompt = prompt
if trim_prompt > 0:
trimmed_prompt = prompt[:-trim_prompt]
prompt_finished = generated.startswith(trimmed_prompt)
reverse_prompt_encountered = generated.endswith(reverse_prompt)
if not skip_prompt or prompt_finished:
yield decoded
if interactive and prompt_finished and reverse_prompt_encountered:
user_input = input()
process.stdin.write(user_input.encode('utf-8') + b'\n')
process.stdin.flush()
generated += decoded
token = b''
except UnicodeDecodeError:
continue
elif process.poll() is not None:
return
def llama(
prompt = '',
stream = False,
skip_prompt = False,
trim_prompt = 0,
executable = 'pages/llama.cpp/main',
model = 'models/7B/ggml-model-q4_0.bin',
threads = 4,
temperature = 0.7,
top_k = 40,
top_p = 0.5,
repeat_last_n = 256,
repeat_penalty = 1.17647,
n = 4096,
interactive = False,
reverse_prompt = "User:"
):
streamer = llama_stream(
prompt = prompt,
skip_prompt = skip_prompt,
trim_prompt = trim_prompt,
executable = executable,
model = model,
threads = threads,
temperature = temperature,
top_k = top_k,
top_p = top_p,
repeat_last_n = repeat_last_n,
repeat_penalty = repeat_penalty,
n = n,
interactive = interactive,
reverse_prompt = reverse_prompt
)
if stream:
return streamer
else:
return ''.join(list(streamer))
### Python Wrapper (functions above
#text = []
#for token in llama(prompt = 'What is your purpose?', repeat_penalty = 1.05, skip_prompt = False, interactive = False):
# print(token, end = '', flush = True)
# text.append(token)
#st.subheader('Debug')
#st.experimental_show(text[0])
#st.experimental_show(text[1])
#st.subheader('Answer')
#st.write(''.join(text))
### llamacpypy
#llama = Llama(model_name = 'models/7B/ggml-model-q4_0.bin', warm_start = True)
#llama.load_model()
#var = llama.generate("This is the weather report, we are reporting a clown fiesta happening at backer street. The clowns ")
#st.write(var)
### llamacpp
#model_path = "./models/7B/ggml-model-q4_0.bin"
#params = llamacpp.gpt_params(model_path, 4096, 40, 0.1, 0.7, 2.0)
#model = llamacpp.PyLLAMA(model_path, params)
#text = model.predict("Hello, I'm a llama.", 10)
#st.write(text)
### Llama cpp
llm = Llama(model_path="models/7B/ggml-model-q4_0.bin")
output = llm("Q: Name the planets in the solar system? A: ", max_tokens = 32, stop = ["Q:", "\n"], echo = True)
st.write(output)