Spaces:

Empereur-Pirate
/

Empereur

Sleeping

App Files Files Community

Empereur-Pirate commited on Feb 13

Commit

79e06e3

•

1 Parent(s): 44a140e

Update main.py

Browse files

Files changed (1) hide show

main.py +39 -67

main.py CHANGED Viewed

@@ -7,89 +7,61 @@ from typing import Optional, Any
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
-# Helper function to read raw request bodies
-async def parse_raw(request: Request):
-    return await request.body()
 app = FastAPI()
-# Serve the static files
-app.mount("/static", StaticFiles(directory="static"), name="static")
-pipe_flan = pipeline("text2text-generation", model="google/flan-t5-small")
-def t5(input: str) -> dict[str, str]:
-    output = pipe_flan(input)
-    return {"output": output[0].get("generated_text", "")}
-class ParseRaw(BaseModel):
-    raw: bytes
-@app .post("/infer_t5")
-async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
-    """Receive input and generate text."""
     try:
-        input_text = data.raw.decode("utf-8")
-        # Validate that the input is a string
-        assert isinstance(input_text, str), "Input must be a string."
-        if input_text is None:
-            return {"error": "No input text detected."}
-        result = t5(input_text)
-        return result
-    except AssertionError as e:
-        return JSONResponse({"error": f"Invalid Input Format: {e}"}, status_code=400)
-@app .get("/infer_t5")
-def get_default_inference_endpoint():
-    return {"message": "Use POST method to submit input data"}
-# Initialize device
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-try:
-    # Initiate the model and tokenizer with the corrected pre-trained weights
-    model_config = AutoConfig.from_pretrained("152334H/miqu-1-70b-sf", trust_remote_code=True)
-    model = AutoModelForCausalLM.from_pretrained("152334H/miqu-1-70b-sf", config=model_config).to(device)
-    tokenizer = AutoTokenizer.from_pretrained("152334H/miqu-1-70b-sf")
-except Exception as e:
-    print("[WARNING]: Failed to load model and tokenizer conventionally.")
-    print(f"Exception: {e}")
-    # Configure a fallback mechanism similar to the original implementation
-    model_config = AutoConfig.from_pretrained("152334H/miqu-1-70b-sf", trust_remote_code=True)
-    # Load the model using the fallback configuration
-    model = AutoModelForCausalLM.from_pretrained("152334H/miqu-1-70b-sf", config=model_config).to(device)
-    tokenizer = AutoTokenizer.from_pretrained("152334H/miqu-1-70b-sf")
-def miuk_answer(query: str) -> str:
     query_tokens = tokenizer.encode(query, return_tensors="pt")
     query_tokens = query_tokens.to(device)
     answer = model.generate(query_tokens, max_length=128, temperature=1, pad_token_id=tokenizer.pad_token_id)
-    return tokenizer.decode(answer[:, 0]).replace(" ", "")
-@app .post("/infer_miku")
-async def infer_endpoint(data: ParseRaw = Depends(parse_raw)):
-    """Receive input and generate text."""
-    try:
-        input_text = data.raw.decode("utf-8")
-        # Validate that the input is a string
-        assert isinstance(input_text, str), "Input must be a string."
-        if input_text is None:
-            return {"error": "No input text detected."}
-        result = {"output": miuk_answer(input_text)}
-        return result
-    except AssertionError as e:
-        return JSONResponse({"error": f"Invalid Input Format: {e}"}, status_code=400)
-@app .get("/infer_miku")
-def get_default_inference_endpoint():
-    return {"message": "Use POST method to submit input data"}

 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
 app = FastAPI()
+# Initialize device
+def initialize_device():
+    global device
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+initialize_device()
+# Helper function to read raw request bodies
+async def parse_raw(request: Request):
+    return await request.body()
+# Initialize the model and tokenizer with the corrected pre-trained weights
+def init_corrected_model():
+    global model_config, model, tokenizer
     try:
+        model_config = AutoConfig.from_pretrained("152334H/miqu-1-70b-sf", trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained("152334H/miqu-1-70b-sf", config=model_config).to(device)
+        tokenizer = AutoTokenizer.from_pretrained("152334H/miqu-1-70b-sf")
+    except Exception as e:
+        print("[WARNING]: Failed to load model and tokenizer conventionally.")
+        print(f"Exception: {e}")
+        model_config = AutoConfig.from_pretrained("152334H/miqu-1-70b-sf", trust_remote_code=True)
+        model = AutoModelForCausalLM.from_pretrained("152334H/miqu-1-70b-sf", config=model_config).to(device)
+        tokenizer = AutoTokenizer.from_pretrained("152334H/miqu-1-70b-sf")
+init_corrected_model()
+# Utility function to generate answers from the model
+def miuk_answer(query: str) -> dict[str, str]:
     query_tokens = tokenizer.encode(query, return_tensors="pt")
     query_tokens = query_tokens.to(device)
     answer = model.generate(query_tokens, max_length=128, temperature=1, pad_token_id=tokenizer.pad_token_id)
+    return {"output": tokenizer.decode(answer[:, 0])}
+# Endpoint handler to receive incoming queries and pass them to the utility function for processing
+@app.post("/infer_miku")
+async def infer_endpoint(data: BaseModel = Depends(parse_raw)):
+    input_text = data.raw.decode("utf-8")
+    if input_text is None or len(input_text) == 0:
+        return JSONResponse({"error": "Empty input received."}, status_code=400)
+    result = miuk_answer(input_text)
+    return result
+@app.get("/infer_miku")
+def get_default_inference_endpoint():
+    return {"message": "Use POST method to submit input data"}
+# Mount static files
+app.mount("/static", StaticFiles(directory="static"), name="static")
+# Initialization done
+print("Initialization Complete.")