Spaces:
Paused
Paused
File size: 4,683 Bytes
7d51224 1044c29 7d51224 e3277b6 7d51224 c3fd9b2 1044c29 7d51224 acc58cf 7d51224 86f94f0 b7ec1ef e3277b6 b7ec1ef 86f94f0 6218ec6 e3277b6 b7ec1ef e3277b6 3151c18 7d973d2 6218ec6 7d973d2 6218ec6 7d973d2 6218ec6 e3277b6 b7ec1ef 86f94f0 e3277b6 86f94f0 e3277b6 86f94f0 e3277b6 86f94f0 b7ec1ef 86f94f0 7d51224 0d521c3 1044c29 86f94f0 7d51224 0d521c3 7d51224 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import fastapi
import markdown
import uvicorn
from ctransformers import AutoModelForCausalLM
from fastapi.responses import HTMLResponse
from fastapi.middleware.cors import CORSMiddleware
from sse_starlette.sse import EventSourceResponse
from pydantic import BaseModel
llm = AutoModelForCausalLM.from_pretrained("NeoDim/starchat-alpha-GGML",
model_file="starchat-alpha-ggml-q4_0.bin",
model_type="starcoder")
app = fastapi.FastAPI(title="Starchat Alpha")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/")
async def index():
with open("README.md", "r", encoding="utf-8") as readme_file:
md_template_string = readme_file.read()
html_content = markdown.markdown(md_template_string)
return HTMLResponse(content=html_content, status_code=200)
class ChatCompletionRequest(BaseModel):
prompt: str
@app.get("/demo")
async def demo():
html_content = """
<!DOCTYPE html>
<html>
<head>
<script src="https://cdnjs.cloudflare.com/ajax/libs/showdown/1.9.1/showdown.min.js"></script>
</head>
<body>
<style>
body {
font-family: -apple-system,BlinkMacSystemFont,"Segoe UI",Helvetica,Arial,sans-serif,"Apple Color Emoji","Segoe UI Emoji","Segoe UI Symbol";
}
code {
font-family: "SFMono-Regular",Consolas,"Liberation Mono",Menlo,Courier,monospace !important;
display: inline-block;
background-color: lightgray;
}
h1 h2 h3 h4 h5 h6 {
font-family: Roboto,-apple-system,BlinkMacSystemFont,"Helvetica Neue","Segoe UI","Oxygen","Ubuntu","Cantarell","Open Sans",sans-serif;
}
#content {
box-sizing: border-box;
min-width: 200px;
max-width: 980px;
margin: 0 auto;
padding: 45px;
font-size: 16px;
}
@media (max-width: 767px) {
#content {
padding: 15px;
}
}
</style>
<script type="module" src="https://cdn.skypack.dev/@vanillawc/wc-markdown"></script>
<wc-markdown id="content" highlight><h1>starchat-alpha-q4.0</h1></wc-markdown>
<script>
var converter = new showdown.Converter();
var source = new EventSource("https://matthoffner-starchat-alpha.hf.space/stream");
let eventCache;
source.onmessage = function(event) {
let eventData = event.data;
console.log(eventData);
if (eventData.includes("```")) {
eventCache = true;
return;
}
if (eventCache && !eventData.includes("```")) {
backticks = "```";
eventData = `${backticks}${eventData}<br /><code>`;
eventCache = false;
}
if (eventData === ":") {
eventData = `${eventData}<br />`;
}
if (eventData === "<|assistant|>") {
eventData = `<br />${eventData}`;
}
if (eventData === "<|end|>") {
eventData = "<br />";
}
document.getElementById("content").innerHTML = document.getElementById("content").innerHTML + eventData;
};
</script>
</body>
</html>
"""
return HTMLResponse(content=html_content, status_code=200)
@app.get("/stream")
async def chat(prompt = "Write a simple express server in rust"):
tokens = llm.tokenize(prompt)
async def server_sent_events(chat_chunks, llm):
yield prompt
for chat_chunk in llm.generate(chat_chunks):
yield llm.detokenize(chat_chunk)
yield ""
return EventSourceResponse(server_sent_events(tokens, llm))
@app.post("/v1/chat/completions")
async def chat(request: ChatCompletionRequest, response_mode=None):
tokens = llm.tokenize(request.prompt)
async def server_sent_events(chat_chunks, llm):
for token in llm.generate(chat_chunks):
yield llm.detokenize(token)
yield ""
return EventSourceResponse(server_sent_events(tokens, llm))
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)
|