Akjava commited on
Commit
04e5516
1 Parent(s): 716a41a
Files changed (2) hide show
  1. app.py +98 -0
  2. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import os
3
+ import torch
4
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
5
+ import gradio as gr
6
+
7
+
8
+ huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
9
+ if not huggingface_token:
10
+ pass
11
+ print("no HUGGINGFACE_TOKEN if you need set secret ")
12
+ #raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
13
+
14
+ model_id = "openbmb/MiniCPM-2B-dpo-bf16"
15
+
16
+ device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
17
+ dtype = torch.bfloat16
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
20
+
21
+ print(model_id,device,dtype)
22
+ histories = []
23
+ #model = None
24
+
25
+
26
+ def call_generate_text(prompt, system_message="You are a helpful assistant."):
27
+ if prompt =="":
28
+ print("empty prompt return")
29
+ return ""
30
+
31
+ global histories
32
+ #global model
33
+ #if model != None:# and model.is_cuda:
34
+ # print("Model is alive")
35
+ #else:
36
+ # model = AutoModelForCausalLM.from_pretrained(
37
+ # model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
38
+ #)
39
+
40
+ messages = [
41
+ {"role": "system", "content": system_message},
42
+ ]
43
+
44
+ messages += histories
45
+
46
+ user_message = {"role": "user", "content": prompt}
47
+
48
+ messages += [user_message]
49
+
50
+ try:
51
+ text = generate_text(messages)
52
+ histories += [user_message,{"role": "assistant", "content": text}]
53
+ #model.to("cpu")
54
+ return text
55
+ except RuntimeError as e:
56
+ print(f"An unexpected error occurred: {e}")
57
+ #model = None
58
+
59
+ return ""
60
+
61
+ iface = gr.Interface(
62
+ fn=call_generate_text,
63
+ inputs=[
64
+ gr.Textbox(lines=3, label="Input Prompt"),
65
+ gr.Textbox(lines=2, label="System Message", value="あなたは親切なアシスタントで常に日本語で返答します。"),
66
+ ],
67
+ outputs=gr.Textbox(label="Generated Text"),
68
+ title=f"{model_id}",
69
+ description=f"{model_id} CPU",
70
+ )
71
+ print("Initialized")
72
+
73
+ model = AutoModelForCausalLM.from_pretrained(
74
+ model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
75
+ )
76
+
77
+ def generate_text(messages):
78
+ #model.to("cuda")
79
+
80
+
81
+
82
+ text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
83
+ result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
84
+
85
+ generated_output = result[0]["generated_text"]
86
+ if isinstance(generated_output, list):
87
+ for message in reversed(generated_output):
88
+ if message.get("role") == "assistant":
89
+ content= message.get("content", "No content found.")
90
+ return content
91
+
92
+ return "No assistant response found."
93
+ else:
94
+ return "Unexpected output format."
95
+
96
+ if __name__ == "__main__":
97
+ print("Main")
98
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ numpy
2
+ torch
3
+ spaces
4
+ accelerate
5
+ transformers
6
+ bitsandbytes