|
import gradio as gr |
|
|
|
from langchain_community.llms import LlamaCpp |
|
from langchain.prompts import PromptTemplate |
|
from langchain.chains import LLMChain |
|
from langchain_core.callbacks import StreamingStdOutCallbackHandler |
|
|
|
|
|
callbacks = [StreamingStdOutCallbackHandler()] |
|
print("creating ll started") |
|
llm = LlamaCpp( |
|
model_path="cerebras_Llama3-DocChat-1.0-8B_Base_adapt_basic_model_16bit.gguf", |
|
temperature=0.75, |
|
max_tokens=30, |
|
top_p=4, |
|
callback_manager=callbacks, |
|
verbose=True, |
|
) |
|
print("creating ll ended") |
|
|
|
template = """You are the Finiantial expert: |
|
### Instruction: |
|
{question} |
|
### Input: |
|
### Response: |
|
""" |
|
|
|
prompt = PromptTemplate(template=template, input_variables=["question"]) |
|
|
|
llm_chain_model = LLMChain(prompt=prompt, llm=llm) |
|
print("creating model created") |
|
|
|
|
|
def greet(question): |
|
print(f"question is {question}") |
|
|
|
out_gen = llm_chain_model.run(question) |
|
print(f"out is {out_gen}") |
|
return out_gen |
|
|
|
demo = gr.Interface(fn=greet, inputs="text", outputs="text") |
|
demo.launch(debug=True, share=True) |