File size: 2,562 Bytes
9a5ae39
 
 
 
f80d9e5
9a5ae39
804e6d8
5bd1756
804e6d8
f7082fd
f80d9e5
a509010
f7082fd
5bd1756
 
857624c
5bd1756
 
 
 
 
9a5ae39
804e6d8
deff2db
804e6d8
 
deff2db
804e6d8
35cb1af
804e6d8
9a5ae39
 
 
 
4f5bd17
5bd1756
 
4f5bd17
9a5ae39
d5a5e2a
 
d5ed24c
 
9a5ae39
804e6d8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4f5bd17
 
 
 
d5a5e2a
9a5ae39
80c55a7
d5ed24c
80c55a7
7df36c9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import os, asyncio
# import pickle
import discord
from discord.ext import commands
from dotenv import load_dotenv
from threading import Thread
from rwkvstic.load import RWKV
from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT
import torch

load_dotenv()
bot = commands.Bot("", intents=discord.Intents.all())

quantized = {
    "mode": TORCH_QUANT,
    "runtimedtype": torch.float32,
    "useGPU": torch.cuda.is_available(),
    "chunksize": 32,  # larger = more accurate, but more memory
    "target": 100  # your gpu max size, excess vram offloaded to cpu
}

# model_pipe = pickle.load(open('pipe.pkl', 'rb'))
# this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
runtimedtype = torch.float32 # torch.float64, torch.bfloat16

# this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
dtype = torch.float32 # torch.float32, torch.float64, torch.bfloat16

useGPU = torch.cuda.is_available() # False


@bot.event
async def on_ready():
    print(f'We have logged in as {bot.user}')
    global model
    model = RWKV("https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-7B-Instruct.pqth", 
                 **quantized)


@bot.listen('on_message')
async def on_message_event(message):
    if message.author == bot.user:
        return   

    if message.content.startswith("!rwkv "):
            mess = await message.channel.send("Loading...")
            model.resetState()
            model.loadContext(
                newctx=f"\n\nQuestion: {message.content[6:]}\n\nExpert Long Detailed Response: ")
            tex = ""
            for i in range(10):
                print(i)
                curr = model.forward(number=10)[
                    "output"]
                tex = tex + curr
                print(curr)

                if ("<|endoftext|>" in curr):
                    break
                mess = await mess.edit(content=tex)

            await asyncio.sleep(1)
            await mess.edit(content=tex)

    # if message.content:
    #     print(message.content)
    #     # await message.channel.send(model_pipe.predict([message.content]))
    #     await message.channel.send('Hello from hugging face')
    # await bot.process_commands(message)

bot.env = "prod"  # type: ignore
bot.name = "testAniPin"  # type: ignore

# t = Thread(target=bot.run, daemon=True, args=(os.getenv("discord_token"), ))
bot.run(os.getenv("discord_token"))
# t.start()