Error while trying to implement using streamlit.
#14
by
Eemansleepdeprived
- opened
The following is my initial demo.py
code:
# %%
from llama_cpp import Llama
# %%
def generate_fun(name):
# Set gpu_layers to the number of layers to offload to GPU. Set to 0 if no GPU acceleration is available on your system.
llm = Llama(
model_path="./new_update/phi-2.Q4_K_S.gguf", # Download the model file first
n_ctx=2048, # The max sequence length to use - note that longer sequence lengths require much more resources
n_threads=8, # The number of CPU threads to use, tailor to your system and the resulting performance
n_gpu_layers=35 # The number of layers to offload to GPU, if you have GPU acceleration available
)
# Simple inference example
output = llm(
f"Instruct: your job is to teach the user how the following code works. \n{name}. Output: The code works \n", # Prompt
max_tokens=512, # Generate up to 512 tokens
stop=["</s>"], # Example stop token - not necessarily correct for this specific model! Please check before using.
echo= False, # Whether to echo the prompt
temperature=0.7
# top_p=0.9,
# top_k=40
)
return output['choices'][0]['text']
following is my app.py
:
import pandas as pd
import streamlit as st
from demo import generate_fun
st.markdown("<h1 style='text-align: center;'>LeapCode 踊</h1>", unsafe_allow_html=True)
df = pd.read_csv('./Database/database.csv')
k=st.text_input("Enter the serial number of the problem:")
if k:
k=int(k)
# st.write([list(df['Question'])])
ans = df['Question'][list(df['Answer']).index(k)]
st.code(ans , language='c++') #oops made the database ulta sorry
st.balloons()
butt = st.button("Explain Code?")
if butt:
st.write("Explanation:")
with st.spinner('Wait for it...'):
prompt_output = generate_fun(ans)
st.write(prompt_output)
st.balloons()
This is the error:
llama_model_loader: loaded meta data with 20 key-value pairs and 325 tensors from ./new_update/phi-2.Q4_K_S.gguf (version unknown)
llama_model_loader: - tensor 0: token_embd.weight q4_K [ 2560, 51200, 1, 1 ]
llama_model_loader: - tensor 1: blk.0.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 2: blk.0.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 3: blk.0.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 4: blk.0.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 5: blk.0.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 6: blk.0.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 7: blk.0.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 8: blk.0.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 9: blk.0.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 10: blk.0.ffn_down.weight q5_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 11: blk.1.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 12: blk.1.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 13: blk.1.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 14: blk.1.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 15: blk.1.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 16: blk.1.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 17: blk.1.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 18: blk.1.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 19: blk.1.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 20: blk.1.ffn_down.weight q5_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 21: blk.10.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 22: blk.10.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 23: blk.10.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 24: blk.10.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 25: blk.10.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 26: blk.10.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 27: blk.10.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 28: blk.10.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 29: blk.10.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 30: blk.10.ffn_down.weight q5_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 31: blk.11.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 32: blk.11.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 33: blk.11.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 34: blk.11.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 35: blk.11.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 36: blk.11.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 37: blk.11.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 38: blk.11.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 39: blk.11.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 40: blk.11.ffn_down.weight q5_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 41: blk.12.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 42: blk.12.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 43: blk.12.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 44: blk.12.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 45: blk.12.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 46: blk.12.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 47: blk.12.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 48: blk.12.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 49: blk.12.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 50: blk.12.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 51: blk.13.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 52: blk.13.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 53: blk.13.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 54: blk.13.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 55: blk.13.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 56: blk.13.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 57: blk.13.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 58: blk.13.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 59: blk.13.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 60: blk.13.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 61: blk.14.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 62: blk.14.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 63: blk.14.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 64: blk.14.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 65: blk.14.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 66: blk.14.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 67: blk.14.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 68: blk.14.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 69: blk.14.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 70: blk.14.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 71: blk.15.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 72: blk.15.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 73: blk.15.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 74: blk.15.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 75: blk.15.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 76: blk.15.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 77: blk.15.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 78: blk.15.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 79: blk.15.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 80: blk.15.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 81: blk.16.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 82: blk.16.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 83: blk.16.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 84: blk.16.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 85: blk.16.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 86: blk.16.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 87: blk.16.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 88: blk.16.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 89: blk.16.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 90: blk.16.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 91: blk.17.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 92: blk.17.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 93: blk.17.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 94: blk.17.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 95: blk.17.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 96: blk.17.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 97: blk.17.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 98: blk.17.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 99: blk.17.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 100: blk.17.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 101: blk.18.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 102: blk.18.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 103: blk.18.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 104: blk.18.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 105: blk.18.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 106: blk.18.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 107: blk.18.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 108: blk.18.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 109: blk.18.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 110: blk.18.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 111: blk.19.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 112: blk.19.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 113: blk.19.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 114: blk.19.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 115: blk.19.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 116: blk.19.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 117: blk.19.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 118: blk.19.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 119: blk.19.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 120: blk.19.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 121: blk.2.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 122: blk.2.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 123: blk.2.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 124: blk.2.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 125: blk.2.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 126: blk.2.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 127: blk.2.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 128: blk.2.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 129: blk.2.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 130: blk.2.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 131: blk.20.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 132: blk.20.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 133: blk.20.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 134: blk.20.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 135: blk.20.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 136: blk.20.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 137: blk.20.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 138: blk.20.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 139: blk.20.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 140: blk.20.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 141: blk.21.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 142: blk.21.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 143: blk.21.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 144: blk.21.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 145: blk.21.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 146: blk.21.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 147: blk.21.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 148: blk.21.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 149: blk.21.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 150: blk.21.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 151: blk.22.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 152: blk.22.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 153: blk.22.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 154: blk.22.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 155: blk.22.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 156: blk.22.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 157: blk.22.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 158: blk.22.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 159: blk.22.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 160: blk.22.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 161: blk.23.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 162: blk.23.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 163: blk.23.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 164: blk.23.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 165: blk.23.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 166: blk.23.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 167: blk.23.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 168: blk.23.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 169: blk.23.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 170: blk.23.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 171: blk.24.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 172: blk.24.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 173: blk.24.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 174: blk.24.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 175: blk.24.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 176: blk.24.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 177: blk.24.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 178: blk.24.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 179: blk.24.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 180: blk.24.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 181: blk.25.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 182: blk.25.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 183: blk.25.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 184: blk.25.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 185: blk.25.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 186: blk.25.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 187: blk.25.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 188: blk.25.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 189: blk.25.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 190: blk.25.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 191: blk.26.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 192: blk.26.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 193: blk.26.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 194: blk.26.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 195: blk.26.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 196: blk.26.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 197: blk.26.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 198: blk.26.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 199: blk.26.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 200: blk.26.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 201: blk.27.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 202: blk.27.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 203: blk.27.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 204: blk.27.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 205: blk.27.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 206: blk.27.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 207: blk.27.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 208: blk.27.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 209: blk.27.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 210: blk.27.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 211: blk.28.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 212: blk.28.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 213: blk.28.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 214: blk.28.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 215: blk.28.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 216: blk.28.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 217: blk.28.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 218: blk.28.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 219: blk.28.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 220: blk.28.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 221: blk.29.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 222: blk.29.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 223: blk.29.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 224: blk.29.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 225: blk.29.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 226: blk.29.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 227: blk.29.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 228: blk.29.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 229: blk.29.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 230: blk.29.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 231: blk.3.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 232: blk.3.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 233: blk.3.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 234: blk.3.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 235: blk.3.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 236: blk.3.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 237: blk.3.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 238: blk.3.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 239: blk.3.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 240: blk.3.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 241: blk.30.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 242: blk.30.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 243: blk.4.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 244: blk.4.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 245: blk.4.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 246: blk.4.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 247: blk.4.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 248: blk.4.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 249: blk.4.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 250: blk.4.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 251: blk.4.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 252: blk.4.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 253: blk.5.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 254: blk.5.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 255: blk.5.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 256: blk.5.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 257: blk.5.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 258: blk.5.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 259: blk.5.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 260: blk.5.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 261: blk.5.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 262: blk.5.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 263: blk.6.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 264: blk.6.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 265: blk.6.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 266: blk.6.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 267: blk.6.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 268: blk.6.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 269: blk.6.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 270: blk.6.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 271: blk.6.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 272: blk.6.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 273: blk.7.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 274: blk.7.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 275: blk.7.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 276: blk.7.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 277: blk.7.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 278: blk.7.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 279: blk.7.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 280: blk.7.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 281: blk.7.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 282: blk.7.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 283: blk.8.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 284: blk.8.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 285: blk.8.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 286: blk.8.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 287: blk.8.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 288: blk.8.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 289: blk.8.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 290: blk.8.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 291: blk.8.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 292: blk.8.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 293: blk.9.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 294: blk.9.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 295: blk.9.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 296: blk.9.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 297: blk.9.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 298: blk.9.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 299: blk.9.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 300: blk.9.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 301: blk.9.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 302: blk.9.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 303: output.bias f32 [ 51200, 1, 1, 1 ]
llama_model_loader: - tensor 304: output.weight q6_K [ 2560, 51200, 1, 1 ]
llama_model_loader: - tensor 305: output_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 306: output_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 307: blk.30.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 308: blk.30.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 309: blk.30.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 310: blk.30.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 311: blk.30.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 312: blk.30.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 313: blk.30.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 314: blk.30.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - tensor 315: blk.31.attn_norm.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 316: blk.31.attn_norm.weight f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 317: blk.31.attn_qkv.bias f32 [ 7680, 1, 1, 1 ]
llama_model_loader: - tensor 318: blk.31.attn_qkv.weight q4_K [ 2560, 7680, 1, 1 ]
llama_model_loader: - tensor 319: blk.31.attn_output.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 320: blk.31.attn_output.weight q4_K [ 2560, 2560, 1, 1 ]
llama_model_loader: - tensor 321: blk.31.ffn_up.bias f32 [ 10240, 1, 1, 1 ]
llama_model_loader: - tensor 322: blk.31.ffn_up.weight q4_K [ 2560, 10240, 1, 1 ]
llama_model_loader: - tensor 323: blk.31.ffn_down.bias f32 [ 2560, 1, 1, 1 ]
llama_model_loader: - tensor 324: blk.31.ffn_down.weight q4_K [ 10240, 2560, 1, 1 ]
llama_model_loader: - kv 0: general.architecture str
llama_model_loader: - kv 1: general.name str
llama_model_loader: - kv 2: phi2.context_length u32
llama_model_loader: - kv 3: phi2.embedding_length u32
llama_model_loader: - kv 4: phi2.feed_forward_length u32
llama_model_loader: - kv 5: phi2.block_count u32
llama_model_loader: - kv 6: phi2.attention.head_count u32
llama_model_loader: - kv 7: phi2.attention.head_count_kv u32
llama_model_loader: - kv 8: phi2.attention.layer_norm_epsilon f32
llama_model_loader: - kv 9: phi2.rope.dimension_count u32
llama_model_loader: - kv 10: general.file_type u32
llama_model_loader: - kv 11: tokenizer.ggml.add_bos_token bool
llama_model_loader: - kv 12: tokenizer.ggml.model str
llama_model_loader: - kv 13: tokenizer.ggml.tokens arr
llama_model_loader: - kv 14: tokenizer.ggml.token_type arr
llama_model_loader: - kv 15: tokenizer.ggml.merges arr
llama_model_loader: - kv 16: tokenizer.ggml.bos_token_id u32
llama_model_loader: - kv 17: tokenizer.ggml.eos_token_id u32
llama_model_loader: - kv 18: tokenizer.ggml.unknown_token_id u32
llama_model_loader: - kv 19: general.quantization_version u32
llama_model_loader: - type f32: 195 tensors
llama_model_loader: - type q4_K: 125 tensors
llama_model_loader: - type q5_K: 4 tensors
llama_model_loader: - type q6_K: 1 tensors
error loading model: unknown model architecture: 'phi2'
llama_load_model_from_file: failed to load model
2024-04-11 23:48:37.589 Uncaught app exception
Traceback (most recent call last):
File "/opt/homebrew/lib/python3.11/site-packages/streamlit/runtime/scriptrunner/script_runner.py", line 542, in _run_script
exec(code, module.__dict__)
File "/Users/eemanmajumder/code_shit/test/LeapCode/with_ai_app.py", line 22, in <module>
prompt_output = generate_fun(ans)
^^^^^^^^^^^^^^^^^
File "/Users/eemanmajumder/code_shit/test/LeapCode/demo.py", line 7, in generate_fun
llm = Llama(
^^^^^^
File "/opt/homebrew/lib/python3.11/site-packages/llama_cpp/llama.py", line 323, in __init__
assert self.model is not None
^^^^^^^^^^^^^^^^^^^^^^
AssertionError