Spaces:
Runtime error
Runtime error
File size: 1,980 Bytes
762c182 86acf2f 762c182 354bd03 a6dfd28 354bd03 710ee23 354bd03 82fe858 354bd03 86acf2f 354bd03 86acf2f 354bd03 82fe858 354bd03 86acf2f 354bd03 86acf2f 354bd03 710ee23 354bd03 86acf2f 354bd03 ed610fa 354bd03 19fcd2b 354bd03 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import json
from huggingface_hub import snapshot_download
from llama_cpp import Llama
repo_name = "PY007/TinyLlama-1.1B-Chat-v0.2-GGUF"
model_name = "ggml-model-q4_0.gguf"
snapshot_download(repo_id=repo_name, local_dir=".", allow_patterns=model_name)
model = Llama(
model_path=model_name,
n_ctx=1024,
n_parts=1,
)
template = "<|im_start|>user\n{}<|im_end|>\n<|im_start|>assistant\n"
def generate(
input=None,
temperature=0.1,
top_p=0.75,
top_k=40,
max_tokens=512,
):
prompt = template.format(input)
output = ""
for chunk in model.create_completion(prompt,
temperature = temperature,
top_k = top_k,
top_p = top_p,
max_tokens = max_tokens,
stop=["<|im_end|>"],
echo = False,
stream = True):
output +=chunk["choices"][0]["text"]
yield output
return output
g = gr.Interface(
fn=generate,
inputs=[
gr.components.Textbox(
lines=2, label="Prompt", value = "What is Huggingface?"
),
gr.components.Slider(minimum=0, maximum=1, value=0.1, label="Temperature"),
gr.components.Slider(minimum=0, maximum=1, value=1, label="Top p"),
gr.components.Slider(minimum=0, maximum=100, step=1, value=50, label="Top k"),
gr.components.Slider(minimum=1, maximum=1024, step=1, value=256, label="Max tokens"),
],
outputs=[
gr.Textbox(
lines=10,
label="Output",
)
],
title = "TinyLlama 1.1B Chat GGUF",
description = """
original model: [PY007/TinyLlama-1.1B-Chat-v0.2](https://huggingface.co/PY007/TinyLlama-1.1B-Chat-v0.2)
quantized_model: [kirp/TinyLlama-1.1B-Chat-v0.2-gguf](https://huggingface.co/kirp/TinyLlama-1.1B-Chat-v0.2-gguf)
"""
)
g.queue(concurrency_count=1)
g.launch()
|