from transformers import pipeline generator = pipeline(model="mrm8488/t5-base-finetuned-question-generation-ap") from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware from fastapi.encoders import jsonable_encoder from fastapi.responses import JSONResponse app = FastAPI() MODEL = None TOKENIZER = None # ?input=%22Name%203%20shows%22 origins = ['https://aiforall.netlify.app'] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/") def llama(input): # prompt = [{'role': 'user', 'content': ""+input}] # inputs = TOKENIZER.apply_chat_template( prompt, add_generation_prompt=True, return_tensors='pt' ) # tokens = MODEL.generate( inputs.to(MODEL.device), max_new_tokens=1024, temperature=0.3, do_sample=True) # tresponse = TOKENIZER.decode(tokens[0], skip_special_tokens=False) # print(tresponse) output = generator(input, max_length=100) response_message = {"message": output[0]["generated_text"]} json_response = jsonable_encoder(response_message) return JSONResponse(content=json_response) # @app.on_event("startup") # def init_model(): # global MODEL # global TOKENIZER # if not MODEL: # print("loading model") # TOKENIZER = AutoTokenizer.from_pretrained('stabilityai/stablelm-zephyr-3b') # MODEL = AutoModelForCausalLM.from_pretrained('stabilityai/stablelm-zephyr-3b', device_map="auto") # print("loaded model")