import os, asyncio
# import pickle
import discord
from discord.ext import commands
from dotenv import load_dotenv
from threading import Thread
from rwkvstic.load import RWKV
from rwkvstic.agnostic.backends import TORCH, TORCH_QUANT
import torch

load_dotenv()
bot = commands.Bot("", intents=discord.Intents.all())

quantized = {
    "mode": TORCH_QUANT,
    "runtimedtype": torch.float32,
    "useGPU": torch.cuda.is_available(),
    "chunksize": 32,  # larger = more accurate, but more memory
    "target": 100  # your gpu max size, excess vram offloaded to cpu
}

# model_pipe = pickle.load(open('pipe.pkl', 'rb'))
# this is the dtype used for trivial operations, such as vector->vector operations and is the dtype that will determine the accuracy of the model
runtimedtype = torch.float32 # torch.float64, torch.bfloat16

# this is the dtype used for matrix-vector operations, and is the dtype that will determine the performance and memory usage of the model
dtype = torch.float32 # torch.float32, torch.float64, torch.bfloat16

useGPU = torch.cuda.is_available() # False


@bot.event
async def on_ready():
    print(f'We have logged in as {bot.user}')
    global model
    model = RWKV("https://huggingface.co/Hazzzardous/RWKV-8Bit/resolve/main/RWKV-4-Pile-7B-Instruct.pqth", 
                 **quantized)


@bot.listen('on_message')
async def on_message_event(message):
    if message.author == bot.user:
        return   

    if message.content.startswith("!rwkv "):
            mess = await message.channel.send("Loading...")
            model.resetState()
            model.loadContext(
                newctx=f"\n\nQuestion: {message.content[6:]}\n\nExpert Long Detailed Response: ")
            tex = ""
            for i in range(10):
                print(i)
                curr = model.forward(number=10)[
                    "output"]
                tex = tex + curr
                print(curr)

                if ("<|endoftext|>" in curr):
                    break
                mess = await mess.edit(content=tex)

            await asyncio.sleep(1)
            await mess.edit(content=tex)

    # if message.content:
    #     print(message.content)
    #     # await message.channel.send(model_pipe.predict([message.content]))
    #     await message.channel.send('Hello from hugging face')
    # await bot.process_commands(message)

bot.env = "prod"  # type: ignore
bot.name = "testAniPin"  # type: ignore

# t = Thread(target=bot.run, daemon=True, args=(os.getenv("discord_token"), ))
bot.run(os.getenv("discord_token"))
# t.start()