Spaces:
Runtime error
Runtime error
BitsAndBytesConfig removed
Browse files
app.py
CHANGED
|
@@ -8,7 +8,7 @@ from langchain.memory import ConversationBufferMemory
|
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from transformers import pipeline
|
| 10 |
from langchain import HuggingFacePipeline
|
| 11 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM,
|
| 12 |
import torch
|
| 13 |
|
| 14 |
|
|
@@ -25,15 +25,10 @@ def init():
|
|
| 25 |
def init_llm_pipeline():
|
| 26 |
if "llm" not in st.session_state:
|
| 27 |
model_id = "bigcode/starcoder2-15b"
|
| 28 |
-
|
| 29 |
-
load_in_4bit=True,
|
| 30 |
-
bnb_4bit_compute_dtype=torch.float16
|
| 31 |
-
)
|
| 32 |
-
|
| 33 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 34 |
model = AutoModelForCausalLM.from_pretrained(
|
| 35 |
model_id,
|
| 36 |
-
quantization_config=quantization_config,
|
| 37 |
device_map="auto",
|
| 38 |
)
|
| 39 |
tokenizer.add_eos_token = True
|
|
@@ -44,7 +39,7 @@ def init_llm_pipeline():
|
|
| 44 |
model=model,
|
| 45 |
tokenizer=tokenizer,
|
| 46 |
task="text-generation",
|
| 47 |
-
temperature=0.
|
| 48 |
repetition_penalty=1.1,
|
| 49 |
return_full_text=True,
|
| 50 |
max_new_tokens=300,
|
|
|
|
| 8 |
from langchain.chains import ConversationalRetrievalChain
|
| 9 |
from transformers import pipeline
|
| 10 |
from langchain import HuggingFacePipeline
|
| 11 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
|
| 12 |
import torch
|
| 13 |
|
| 14 |
|
|
|
|
| 25 |
def init_llm_pipeline():
|
| 26 |
if "llm" not in st.session_state:
|
| 27 |
model_id = "bigcode/starcoder2-15b"
|
| 28 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 30 |
model = AutoModelForCausalLM.from_pretrained(
|
| 31 |
model_id,
|
|
|
|
| 32 |
device_map="auto",
|
| 33 |
)
|
| 34 |
tokenizer.add_eos_token = True
|
|
|
|
| 39 |
model=model,
|
| 40 |
tokenizer=tokenizer,
|
| 41 |
task="text-generation",
|
| 42 |
+
temperature=0.2,
|
| 43 |
repetition_penalty=1.1,
|
| 44 |
return_full_text=True,
|
| 45 |
max_new_tokens=300,
|