Spaces:
Runtime error
Runtime error
File size: 2,562 Bytes
9a5ae39 f80d9e5 9a5ae39 804e6d8 5bd1756 804e6d8 f7082fd f80d9e5 a509010 f7082fd 5bd1756 857624c 5bd1756 9a5ae39 804e6d8 deff2db 804e6d8 deff2db 804e6d8 35cb1af 804e6d8 9a5ae39 4f5bd17 5bd1756 4f5bd17 9a5ae39 d5a5e2a d5ed24c 9a5ae39 804e6d8 4f5bd17 d5a5e2a 9a5ae39 80c55a7 d5ed24c 80c55a7 7df36c9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import os, asyncio
# import pickle
import discord
from discord.ext import commands
from dotenv import load_dotenv
from threading import Thread
from rwkvstic.load import RWKV
from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT
import torch
load_dotenv()
bot = commands.Bot("", intents=discord.Intents.all())
quantized = {
"mode": TORCH_QUANT,
"runtimedtype": torch.float32,
"useGPU": torch.cuda.is_available(),
"chunksize": 32, # larger = more accurate, but more memory
"target": 100 # your gpu max size, excess vram offloaded to cpu
}
# model_pipe = pickle.load(open('pipe.pkl', 'rb'))
# this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
runtimedtype = torch.float32 # torch.float64, torch.bfloat16
# this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
dtype = torch.float32 # torch.float32, torch.float64, torch.bfloat16
useGPU = torch.cuda.is_available() # False
@bot.event
async def on_ready():
print(f'We have logged in as {bot.user}')
global model
model = RWKV("https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-7B-Instruct.pqth",
**quantized)
@bot.listen('on_message')
async def on_message_event(message):
if message.author == bot.user:
return
if message.content.startswith("!rwkv "):
mess = await message.channel.send("Loading...")
model.resetState()
model.loadContext(
newctx=f"\n\nQuestion: {message.content[6:]}\n\nExpert Long Detailed Response: ")
tex = ""
for i in range(10):
print(i)
curr = model.forward(number=10)[
"output"]
tex = tex + curr
print(curr)
if ("<|endoftext|>" in curr):
break
mess = await mess.edit(content=tex)
await asyncio.sleep(1)
await mess.edit(content=tex)
# if message.content:
# print(message.content)
# # await message.channel.send(model_pipe.predict([message.content]))
# await message.channel.send('Hello from hugging face')
# await bot.process_commands(message)
bot.env = "prod" # type: ignore
bot.name = "testAniPin" # type: ignore
# t = Thread(target=bot.run, daemon=True, args=(os.getenv("discord_token"), ))
bot.run(os.getenv("discord_token"))
# t.start()
|