Spaces:
Runtime error
Runtime error
| import os | |
| import wget | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| import random | |
| url = 'https://huggingface.co/TheBloke/Nous-Hermes-13B-GGML/resolve/main/nous-hermes-13b.ggmlv3.q2_K.bin' | |
| filename = wget.download(url) | |
| llm = Llama(model_path=filename, seed=random.randint(1, 2**31)) | |
| with gr.Blocks() as demo: | |
| chatbot = gr.Chatbot() | |
| msg = gr.Textbox() | |
| clear = gr.ClearButton([msg, chatbot]) | |
| #instruction = gr.Textbox(label="Instruction", placeholder=) | |
| def user(user_message, history): | |
| return gr.update(value="", interactive=True), history + [[user_message, None]] | |
| def bot(history): | |
| #instruction = history[-1][1] or "" | |
| user_message = history[-1][0] | |
| #token1 = llm.tokenize(b"### Instruction: ") | |
| #token2 = llm.tokenize(instruction.encode()) | |
| token3 = llm.tokenize(b"### Input: ") | |
| tokens3 = llm.tokenize(user_message.encode()) | |
| token4 = llm.tokenize(b"### Response:") | |
| tokens = token3 + tokens3 + token4 | |
| history[-1][1] = "" | |
| count = 0 | |
| output = "" | |
| for token in llm.generate(tokens, top_k=50, top_p=0.73, temp=0.72, repeat_penalty=1.1): | |
| text = llm.detokenize([token]) | |
| output += text.decode() | |
| count += 1 | |
| if count >= 500 or (token == llm.token_eos()): | |
| break | |
| history[-1][1] += text.decode() | |
| yield history | |
| response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( | |
| bot, chatbot, chatbot | |
| ) | |
| response.then(lambda: gr.update(interactive=True), None, [msg], queue=False) | |
| demo.queue() | |
| demo.launch(debug=True) | |