import os, asyncio # import pickle import discord from discord.ext import commands from dotenv import load_dotenv from threading import Thread from rwkvstic.load import RWKV from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT import torch load_dotenv() bot = commands.Bot("", intents=discord.Intents.all()) quantized = { "mode": TORCH, "runtimedtype": torch.float32, "useGPU": torch.cuda.is_available(), # "chunksize": 32, # larger = more accurate, but more memory # "target": 100, # your gpu max size, excess vram offloaded to cpu "dtype": torch.float32 } # model_pipe = pickle.load(open('pipe.pkl', 'rb')) # this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model runtimedtype = torch.float32 # torch.float64, torch.bfloat16 # this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model dtype = torch.float32 # torch.float32, torch.float64, torch.bfloat16 useGPU = torch.cuda.is_available() # False @bot.event async def on_ready(): print(f'We have logged in as {bot.user}') global model model = RWKV(path="RWKV-4-Pile-3B-Instruct-test2-20230209.pth", **quantized) @bot.listen('on_message') async def on_message_event(message): if message.author == bot.user: return if message.content.startswith("!rwkv "): mess = await message.channel.send("Loading...") model.resetState() model.loadContext( newctx=f"\n\nQuestion: {message.content[6:]}\n\nExpert Long Detailed Response: ") tex = "" for i in range(10): print(i) curr = model.forward(number=10)[ "output"] tex = tex + curr print(curr) if ("<|endoftext|>" in curr): break mess = await mess.edit(content=tex) await asyncio.sleep(1) await mess.edit(content=tex) # if message.content: # print(message.content) # # await message.channel.send(model_pipe.predict([message.content])) # await message.channel.send('Hello from hugging face') # await bot.process_commands(message) bot.env = "prod" # type: ignore bot.name = "testAniPin" # type: ignore # t = Thread(target=bot.run, daemon=True, args=(os.getenv("discord_token"), )) bot.run(os.getenv("discord_token")) # t.start()