govisi commited on
Commit
35cb1af
1 Parent(s): 4f5bd17
Files changed (2) hide show
  1. app.py +7 -3
  2. chatbot.py +15 -15
app.py CHANGED
@@ -13,19 +13,23 @@ bot = commands.Bot("", intents=discord.Intents.all())
13
 
14
  # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
15
  # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
16
- runtimedtype = torch.float32 # torch.float64, torch.bfloat16
17
 
18
  # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
19
  dtype = torch.bfloat16 # torch.float32, torch.float64, torch.bfloat16
20
 
21
- useGPU = False # False
22
 
23
 
24
  @bot.event
25
  async def on_ready():
26
  print(f'We have logged in as {bot.user}')
27
  global model
28
- model = RWKV("RWKV-4-Pile-3B-Instruct-test2-20230209.pth", mode=TORCH, useGPU=useGPU, runtimedtype=runtimedtype, dtype=dtype)
 
 
 
 
29
 
30
 
31
  @bot.listen('on_message')
 
13
 
14
  # model_pipe = pickle.load(open('pipe.pkl', 'rb'))
15
  # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
16
+ runtimedtype = torch.float16 # torch.float64, torch.bfloat16
17
 
18
  # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
19
  dtype = torch.bfloat16 # torch.float32, torch.float64, torch.bfloat16
20
 
21
+ useGPU = torch.cuda.is_available() # False
22
 
23
 
24
  @bot.event
25
  async def on_ready():
26
  print(f'We have logged in as {bot.user}')
27
  global model
28
+ model = RWKV("https://huggingface.co/BlinkDL/rwkv-4-pile-3b/blob/main/RWKV-4-Pile-3B-Instruct-test2-20230209.pth",
29
+ mode=TORCH,
30
+ useGPU=useGPU,
31
+ runtimedtype=runtimedtype,
32
+ dtype=dtype)
33
 
34
 
35
  @bot.listen('on_message')
chatbot.py CHANGED
@@ -1,21 +1,21 @@
1
- from rwkvstic.load import RWKV
2
- from rwkvstic.agnostic.backends import TORCH
3
- import torch
4
- import os
5
 
6
- os.environ["RWKV_JIT_ON"] = '1'
7
- os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
8
- # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
9
- runtimedtype = torch.float32 # torch.float64, torch.bfloat16
10
 
11
- # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
12
- dtype = torch.bfloat16 # torch.float32, torch.float64, torch.bfloat16
13
 
14
- useGPU = False # False
15
 
16
- model = RWKV("RWKV-4-Pile-3B-Instruct-test2-20230209.pth", mode=TORCH, useGPU=useGPU, runtimedtype=runtimedtype, dtype=dtype)
17
 
18
- model.loadContext(newctx=f"Q: How many hours are there in a day?\n\nA:")
19
- output = model.forward(number=100)["output"]
20
 
21
- print(output)
 
1
+ # from rwkvstic.load import RWKV
2
+ # from rwkvstic.agnostic.backends import TORCH
3
+ # import torch
4
+ # import os
5
 
6
+ # os.environ["RWKV_JIT_ON"] = '1'
7
+ # os.environ["RWKV_CUDA_ON"] = '1' # if '1' then use CUDA kernel for seq mode (much faster)
8
+ # # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
9
+ # runtimedtype = torch.float32 # torch.float64, torch.bfloat16
10
 
11
+ # # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
12
+ # dtype = torch.bfloat16 # torch.float32, torch.float64, torch.bfloat16
13
 
14
+ # useGPU = False # False
15
 
16
+ # model = RWKV("RWKV-4-Pile-3B-Instruct-test2-20230209.pth", mode=TORCH, useGPU=useGPU, runtimedtype=runtimedtype, dtype=dtype)
17
 
18
+ # model.loadContext(newctx=f"Q: How many hours are there in a day?\n\nA:")
19
+ # output = model.forward(number=100)["output"]
20
 
21
+ # print(output)