Finance

Running on Zero

File size: 6,495 Bytes

5416372
a2e6c05
 
d81ed7c
a2e6c05
d81ed7c
a2e6c05
 
 
33b9eeb
20636b9
d777e50
 
a2e6c05
 
 
 
 
 
 
5fe2c9a
 
3b39700
a2e6c05
5fe2c9a
63ba25e
a2e6c05
e976361
63ba25e
a2e6c05
d81ed7c
 
90e8d67
 
 
8b77502
90e8d67
1240624
2a18dea
36f3f97
2b8851d
1240624
90e8d67
 
583461f
c2e1d70
d81ed7c
 
 
 
 
 
e976361
 
a2e6c05
d81ed7c
a2e6c05
 
 
 
 
 
0a910e6
 
 
a2e6c05
 
e976361
 
a2e6c05
e976361
4993069
a2e6c05
 
d81ed7c
a2e6c05
 
db00aa2
a2e6c05
 
 
 
 
e976361
 
 
a2e6c05
e976361
a2e6c05
d81ed7c
a2e6c05
d81ed7c
a2e6c05
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d81ed7c
42692c3
 
 
 
 
 
 
 
 
 
 
d81ed7c
 
 
e976361
d81ed7c
 
 
 
 
 
e976361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d81ed7c
33b9eeb
d81ed7c
b23a519
 
 
88a7fc3
18e5a55
5619915
18e5a55
3b39700
014d21e
 
a2e6c05
42692c3
 
a2e6c05
 
 
 
 
09dc1cd
08a4d08
d81ed7c
 
2c8259d

import spaces
import json
import subprocess
import gradio as gr
from huggingface_hub import hf_hub_download

subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)

#hf_hub_download(repo_id="baconnier/Finance_dolphin-2.9.1-yi-1.5-34b_GGUF", filename="Finance_dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf",  local_dir = "./models")
hf_hub_download(repo_id="baconnier/Finance_dolphin-2.9.1-yi-1.5-9b_GGUF", filename="Finance_dolphin-2.9.1-yi-1.5-9b_Q8_0.gguf",  local_dir = "./models")
#hf_hub_download(repo_id="baconnier/finance_dolphin_orpo_llama3_8B_r64_51K_GGUF", filename="finance_dolphin_orpo_llama3_8B_r64_51K_GGUF-unsloth.Q8_0.gguf",  local_dir = "./models")
#hf_hub_download(repo_id="crusoeai/dolphin-2.9.1-llama-3-8b-GGUF", filename="dolphin-2.9.1-llama-3-8b.Q6_K.gguf",  local_dir = "./models")

css = """
.message-row {
    justify-content: space-evenly !important;
}
.message-bubble-border {
    border-radius: 6px !important;
}
.dark.message-bubble-border {
    border-color: #21293b !important;
}
.dark.user {
    background: #0a1120 !important;
}
.dark.assistant {
    background: transparent !important;
}
"""

PLACEHOLDER = """
<div class="message-bubble-border" style="display:flex; max-width: 600px; border-radius: 8px; box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); backdrop-filter: blur(10px);">
    <figure style="margin: 0;">
        <img src="https://huggingface.co/spaces/baconnier/Finance/resolve/main/banker.jpg" style="width: 100%; height: 100%; border-radius: 8px;">
    </figure>
    <div style="padding: .5rem 1.5rem;">
        <img src="https://huggingface.co/spaces/baconnier/Finance/resolve/main/banker_plus.jpg" style="width: 100%; height: 10%; border-radius: 8px;">    
        <h2 style="text-align: left; font-size: 1.5rem; font-weight: 700; margin-bottom: 0.5rem;"> </h2>
        <p style="text-align: left; font-size: 16px; line-height: 1.5; margin-bottom: 15px;">Banker++ is trained to act like a Senior Banker. Use this template for learning purposes only. Also a Real time version exist</p>
    </div>    
</div>
"""

@spaces.GPU(duration=120)
def respond(
    message,
    history: list[tuple[str, str]],
    max_tokens,
    temperature,
    top_p,
    top_k,
    repeat_penalty,
    model,
):
    from llama_cpp import Llama
    from llama_cpp_agent import LlamaCppAgent
    from llama_cpp_agent import MessagesFormatterType
    from llama_cpp_agent.providers import LlamaCppPythonProvider
    from llama_cpp_agent.chat_history import BasicChatHistory
    from llama_cpp_agent.chat_history.messages import Roles
    print(message)
    print(history)
    
    llm = Llama(
        model_path=f"models/{model}",
        flash_attn=True,
        n_threads=40,
        n_gpu_layers=81,
        n_batch=1024,
        n_ctx=8192,
    )
    provider = LlamaCppPythonProvider(llm)

    agent = LlamaCppAgent(
        provider,
        system_prompt="You are Alan, a financial analyst.",
        predefined_messages_formatter_type=MessagesFormatterType.CHATML,
        debug_output=True
    )
    
    settings = provider.get_provider_default_settings()
    settings.temperature = temperature
    settings.top_k = top_k
    settings.top_p = top_p
    settings.max_tokens = max_tokens
    settings.repeat_penalty = repeat_penalty
    settings.stream = True

    messages = BasicChatHistory()

    for msn in history:
        user = {
            'role': Roles.user,
            'content': msn[0]
        }
        assistant = {
            'role': Roles.assistant,
            'content': msn[1]
        }
        messages.add_message(user)
        messages.add_message(assistant)
    
    stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
    
    outputs = ""
    for output in stream:
        outputs += output
        yield outputs


examples = [
        ["What is the difference between a CDS and a CDO, which one is better if inflation raise."],
        ["According to the latest news, is an asset swap better than the long underlying ?"],
        ["Give me the latest ESG activity of  banks in 2023"],
        ["Summarize the latest federal reserve's beige book"],
        ["Based on the recent market updates and economic trends, give me some investment advice and insights. Justify each advice."],
        ["Based only on the last two weeks news, tell me what are the most important economics and financial news in developed markets (European and US market)"],
    
]

demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Slider(minimum=1, maximum=8192, value=8192, step=1, label="Max tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p",
        ),
        gr.Slider(
            minimum=0,
            maximum=100,
            value=40,
            step=1,
            label="Top-k",
        ),
        gr.Slider(
            minimum=0.0,
            maximum=2.0,
            value=1.1,
            step=0.1,
            label="Repetition penalty",
        ),
        gr.Dropdown(["Finance_dolphin-2.9.1-yi-1.5-9b_Q8_0.gguf",'Finance_dolphin-2.9.1-yi-1.5-34b-Q8_0.gguf'], value="Finance_dolphin-2.9.1-yi-1.5-9b_Q8_0.gguf", label="Model"),
    ],
    theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="blue", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
        body_background_fill_dark="#0f172a",
        block_background_fill_dark="#0f172a",
        block_border_width="1px",
        block_title_background_fill_dark="#070d1b",
        #input_background_fill_dark="#0c1425",
        button_secondary_background_fill_dark="#070d1b",
        border_color_primary_dark="#21293b",
        background_fill_secondary_dark="#0f172a",
        color_accent_soft_dark="transparent"
    ),
    examples=examples,
    examples_per_page=3,    
    css=css,
    retry_btn="Retry",
    undo_btn="Undo",
    clear_btn="Clear",
    submit_btn="Send",
    description="BANKER++ is fine-tuned on Cognitive Computation: Chat Dolphin 🐬 2.9.1-yi-1.5-34b",
    chatbot=gr.Chatbot(scale=1, placeholder=PLACEHOLDER)
)

if __name__ == "__main__": 
    demo.launch()