File size: 5,652 Bytes
79da9de
 
 
 
 
 
 
 
 
 
a423750
79da9de
a423750
79da9de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9bebee
701a762
 
 
 
79da9de
 
 
 
 
 
 
 
 
26cac3e
 
79da9de
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3377ef9
 
 
 
79da9de
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import StreamingResponse
from fastapi.middleware.cors import CORSMiddleware
import aiohttp
import json
import time
import random
import ast
import urllib.parse
from apscheduler.schedulers.background import BackgroundScheduler
import os
from pydantic import BaseModel

SAMBA_NOVA_API_KEY = os.environ.get("SAMBA_NOVA_API_KEY", None)

app = FastAPI()

# Time-Limited Infinite Cache
cache = {}
CACHE_DURATION = 120

# Function to clean up expired cache entries
def cleanup_cache():
    current_time = time.time()
    for key, (value, timestamp) in list(cache.items()):
        if current_time - timestamp > CACHE_DURATION:
            del cache[key]

# Initialize and start the scheduler
scheduler = BackgroundScheduler()
scheduler.add_job(cleanup_cache, 'interval', seconds=60)  # Run cleanup every 60 seconds
scheduler.start()

class StreamTextRequest(BaseModel):
    query: str
    history: str = "[]"
    model: str = "llama3-8b"
    api_key: str = None

@app.post("/stream_text")
async def stream_text(request: StreamTextRequest):
    current_time = time.time()
    cache_key = (request.query, request.history, request.model)

    # Check if the request is in the cache and not expired
    if cache_key in cache:
        cached_response, timestamp = cache[cache_key]
        return StreamingResponse(iter([f"{cached_response}"]), media_type='text/event-stream')

    # Model selection logic
    if "405" in request.model:
        fmodel = "Meta-Llama-3.1-405B-Instruct"
    if "70" in request.model:
        fmodel = "Meta-Llama-3.1-70B-Instruct"
    else:
        fmodel = "Meta-Llama-3.1-8B-Instruct"

    system_message = """You are a friendly and intelligent video chat assistant created by KingNish. Your primary goal is to provide accurate, concise, and engaging responses while maintaining a positive and upbeat tone. Always deliver clear and relevant information that directly addresses the user's query, and feel free to sprinkle in a dash of humor—after all, laughter is the best app!
In addition to responding to user questions, you will receive context from a live image captured by the user. Use this context to enrich your responses and make them even more relevant. Tailor your answers based on the user's input and previous interactions, ensuring a personalized experience that feels like chatting with a witty friend.
Keep your responses brief and to the point, avoiding unnecessary details or tangents unless they’re hilariously relevant. Use a friendly and approachable tone to create a pleasant interaction, and don’t shy away from a cheeky pun or two!
Encourage users to ask follow-up questions or clarify their needs, fostering a conversational flow that’s as smooth as butter on a hot pancake. Aim to put a smile on the user's face with light-hearted and fun responses, and be proactive in offering additional help or suggestions related to the user's query.
Remember, your goal is to be the go-to assistant for users, making their experience enjoyable and informative—like a delightful dessert after a hearty meal! Always emphasize that you are a video chat assistant, ready to assist with a friendly demeanor and a touch of charm."""

    messages = [{'role': 'system', 'content': system_message}]

    messages.extend(ast.literal_eval(request.history))

    messages.append({'role': 'user', 'content': request.query})
    
    data = {'messages': messages, 'stream': True, 'model': fmodel}

    api_key = request.api_key if request.api_key != 'none' else SAMBA_NOVA_API_KEY


    async def stream_response():
        async with aiohttp.ClientSession() as session:
            async with session.post('https://api.sambanova.ai/v1/chat/completions', headers = { 'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json' }, json=data) as response:
                if response.status != 200:
                    raise HTTPException(status_code=response.status, detail="Error fetching AI response")

                response_content = ""
                async for line in response.content:
                    line = line.decode('utf-8').strip()
                    if line.startswith('data: {'):
                        json_data = line[6:]
                        try:
                            parsed_data = json.loads(json_data)
                            content = parsed_data.get("choices", [{}])[0].get("delta", {}).get("content", '')
                            if content:
                                content = content.replace("\n", " ")
                                response_content += f"data: {content}\n\n"
                                yield f"data: {content}\n\n"
                        except json.JSONDecodeError as e:
                            print(f"Error decoding JSON: {e}")
                            yield f"data: Error decoding JSON\n\n"

                # Cache the full response
                cache[cache_key] = (response_content, current_time)

    return StreamingResponse(stream_response(), media_type='text/event-stream')



# Serve index.html from the same directory as your main.py file 
from starlette.responses import FileResponse 

@app.get("/script1.js")
async def script1_js():
    return FileResponse("script1.js")

@app.get("/script2.js")
async def script2_js():
    return FileResponse("script2.js")

@app.get("/styles.css")
async def styles_css():
    return FileResponse("styles.css")

@app.get("/")
async def read_index():
    return FileResponse('index.html')

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7068, reload=True)