desert commited on
Commit
01945bd
1 Parent(s): f61a70f

init inference

Browse files
Files changed (2) hide show
  1. app.py +53 -51
  2. requirements.txt +1 -2
app.py CHANGED
@@ -1,53 +1,55 @@
1
  import gradio as gr
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
-
4
- # Load your model and tokenizer
5
- model_name = "Mat17892/llama_lora_G14" # Replace with your Hugging Face model name
6
- tokenizer = AutoTokenizer.from_pretrained(model_name)
7
- model = AutoModelForCausalLM.from_pretrained(model_name)
8
-
9
- def respond(
10
- message,
11
- history: list[tuple[str, str]],
12
- system_message,
13
- max_tokens,
14
- temperature,
15
- top_p,
16
- ):
17
- messages = [{"role": "system", "content": system_message}]
18
-
19
- for val in history:
20
- if val[0]:
21
- messages.append({"role": "user", "content": val[0]})
22
- if val[1]:
23
- messages.append({"role": "assistant", "content": val[1]})
24
-
25
- messages.append({"role": "user", "content": message})
26
-
27
- # Prepare input for the model
28
- input_text = message
29
- inputs = tokenizer(input_text, return_tensors="pt")
30
-
31
- # Generate response
32
- outputs = model.generate(
33
- **inputs,
34
- max_new_tokens=max_tokens,
35
- temperature=temperature,
36
- top_p=top_p
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  )
38
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
39
- return response
40
-
41
- # Create the Gradio interface
42
- demo = gr.ChatInterface(
43
- respond,
44
- additional_inputs=[
45
- gr.Textbox(value="You are a friendly chatbot.", label="System message"),
46
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
47
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
48
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"),
49
- ],
50
- )
51
-
52
- if __name__ == "__main__":
53
- demo.launch()
 
1
  import gradio as gr
2
+ from llama_cpp import Llama
3
+
4
+ # Path to the GGUF model file
5
+ model_path = "Mat17892/lora_llama_gguf_g14/model.gguf" # Update this path to your model
6
+
7
+ # Load the GGUF model using llama-cpp-python
8
+ print("Loading model...")
9
+ llm = Llama(model_path=model_path, n_ctx=2048, n_threads=8) # Adjust threads as needed
10
+ print("Model loaded!")
11
+
12
+ # Chat function
13
+ def chat_with_model(user_input, chat_history):
14
+ """
15
+ Process user input and generate a response from the model.
16
+ :param user_input: User's input string
17
+ :param chat_history: Conversation history
18
+ :return: Updated chat history
19
+ """
20
+ # Format chat history for the Llama model
21
+ prompt = ""
22
+ for turn in chat_history:
23
+ prompt += f"User: {turn['user']}\nAI: {turn['ai']}\n"
24
+ prompt += f"User: {user_input}\nAI:"
25
+
26
+ # Generate response from the model
27
+ response = llm(prompt)["choices"][0]["text"].strip()
28
+
29
+ # Update chat history
30
+ chat_history.append({"user": user_input, "ai": response})
31
+ return chat_history, chat_history
32
+
33
+ # Gradio UI
34
+ with gr.Blocks() as demo:
35
+ gr.Markdown("# 🦙 LLaMA GGUF Chatbot")
36
+ chat_box = gr.Chatbot(label="Chat with the GGUF Model")
37
+
38
+ with gr.Row():
39
+ with gr.Column(scale=4):
40
+ user_input = gr.Textbox(label="Your Message", placeholder="Type a message...")
41
+ with gr.Column(scale=1):
42
+ submit_btn = gr.Button("Send")
43
+
44
+ chat_history = gr.State([])
45
+
46
+ # Link components
47
+ submit_btn.click(
48
+ chat_with_model,
49
+ inputs=[user_input, chat_history],
50
+ outputs=[chat_box, chat_history],
51
+ show_progress=True,
52
  )
53
+
54
+ # Launch the app
55
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
  huggingface_hub==0.25.2
2
  gradio
3
- transformers
4
- torch
 
1
  huggingface_hub==0.25.2
2
  gradio
3
+ llama-cpp-python