Spaces:
Paused
Paused
import fastapi | |
import json | |
import markdown | |
import uvicorn | |
from ctransformers import AutoModelForCausalLM | |
from fastapi.responses import HTMLResponse | |
from fastapi.middleware.cors import CORSMiddleware | |
from sse_starlette.sse import EventSourceResponse | |
from ctransformers.langchain import CTransformers | |
from pydantic import BaseModel | |
from typing import List, Any | |
from typing_extensions import TypedDict, Literal | |
llm = AutoModelForCausalLM.from_pretrained("NeoDim/starchat-alpha-GGML", | |
model_file="starchat-alpha-ggml-q4_0.bin", | |
model_type="starcoder") | |
app = fastapi.FastAPI(title="Starchat Alpha") | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
async def index(): | |
with open("README.md", "r", encoding="utf-8") as readme_file: | |
md_template_string = readme_file.read() | |
html_content = markdown.markdown(md_template_string) | |
return HTMLResponse(content=html_content, status_code=200) | |
class ChatCompletionRequestMessage(BaseModel): | |
role: Literal["system", "user", "assistant"] = Field( | |
default="user", description="The role of the message." | |
) | |
content: str = Field(default="", description="The content of the message.") | |
class ChatCompletionRequest(BaseModel): | |
messages: List[ChatCompletionRequestMessage] = Field( | |
default=[], description="A list of messages to generate completions for." | |
) | |
async def chat(request: ChatCompletionRequest, response_mode=None): | |
tokens = llm.tokenize(request.messages) | |
async def server_sent_events(chat_chunks): | |
for token in llm.generate(chat_chunks): | |
yield llm.detokenize(token) | |
return EventSourceResponse(server_sent_events(tokens)) | |
if __name__ == "__main__": | |
uvicorn.run(app, host="0.0.0.0", port=8000) | |