sapthesh commited on
Commit
f3c977f
Β·
verified Β·
1 Parent(s): 98b740d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -26
app.py CHANGED
@@ -1,38 +1,31 @@
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
- import torch
4
 
5
- # Model and Tokenizer from Hugging Face Hub
6
- model_name = "deepseek-ai/DeepSeek-V3"
 
7
 
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- model = AutoModelForCausalLM.from_pretrained(
10
- model_name,
11
- torch_dtype=torch.bfloat16, # Use bfloat16 for faster and less memory-intensive inference if possible
12
- trust_remote_code=True, # Important for models with custom code
13
- device_map="auto" # Automatically use available GPU if possible
14
- )
15
 
16
- def generate_response(prompt, history=[]):
17
- inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
18
- outputs = model.generate(**inputs, max_new_tokens=500) # Adjust max_new_tokens as needed
19
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
20
 
21
- # Basic chat history handling (optional, can be improved)
22
- history.append((prompt, response)) # Append user prompt and model response to history
23
  return response
24
 
25
  iface = gr.ChatInterface(
26
- fn=generate_response,
27
- inputs=gr.Chatbox(lines=7, placeholder="Type your message here..."),
28
- outputs="text",
29
  title="DeepSeek-V3 Chatbot",
30
- description="Chat with the DeepSeek-V3 model. Please be patient, initial loading might take a few minutes. For better performance, use a Space with a GPU.",
31
- examples=[
32
- "Hello, how are you?",
33
- "What is the capital of France?",
34
- "Tell me a joke."
35
- ]
36
  )
37
-
38
- iface.launch(share=False)
 
1
  import gradio as gr
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
 
4
+ model_id = "deepseek-ai/DeepSeek-V3"
5
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
6
+ model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto") # device_map="auto" for GPU if available
7
 
8
+ def predict(message, history):
9
+ conversation = []
10
+ for user_msg, bot_response in history:
11
+ conversation.append({"role": "user", "content": user_msg})
12
+ if bot_response: # Only add bot response if it exists
13
+ conversation.append({"role": "assistant", "content": bot_response})
14
+ conversation.append({"role": "user", "content": message})
15
 
16
+ inputs = tokenizer.apply_chat_template(conversation=conversation, tokenizer=tokenizer, return_tensors="pt").to("cuda" if model.device.type == 'cuda' else "cpu") # Move input to GPU if model is on GPU
17
+ outputs = model.generate(**inputs, max_new_tokens=512) # Adjust max_new_tokens as needed
 
18
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
19
 
20
+ # Basic cleanup (you might need more sophisticated cleaning)
21
+ response = response.replace("<|assistant|>", "").strip()
22
  return response
23
 
24
  iface = gr.ChatInterface(
25
+ fn=predict,
26
+ inputs=gr.Chatbox(placeholder="Type a message..."),
27
+ outputs=gr.Chatbot(),
28
  title="DeepSeek-V3 Chatbot",
29
+ description="Chat with the DeepSeek-V3 model.",
 
 
 
 
 
30
  )
31
+ iface.launch()