moriire commited on
Commit
542af36
·
verified ·
1 Parent(s): 96cc7ba

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -25
app.py CHANGED
@@ -36,31 +36,6 @@ llm_generate = llama_cpp.Llama.from_pretrained(
36
  logging.basicConfig(level=logging.INFO)
37
  logger = logging.getLogger(__name__)
38
 
39
- # Initialize Llama model
40
- """
41
- try:
42
- llm = Llama.from_pretrained(
43
- repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
44
- filename="*q4_0.gguf",
45
- verbose=False,
46
- n_ctx=4096,
47
- n_threads=4,
48
- n_gpu_layers=0,
49
- )
50
-
51
- llm = Llama(
52
- model_path=MODEL_PATH,
53
- chat_format="llama-2",
54
- n_ctx=4096,
55
- n_threads=8,
56
- n_gpu_layers=0,
57
- )
58
-
59
- except Exception as e:
60
- logger.error(f"Failed to load model: {e}")
61
- raise
62
- """
63
-
64
  app = fastapi.FastAPI(
65
  title="OpenGenAI",
66
  description="Your Excellect AI Physician")
@@ -105,6 +80,7 @@ async def chat(gen:GenModel):
105
  et = time()
106
  output["time"] = et - st
107
  messages.append({'role': "assistant", "content": output['choices'][0]['message']})
 
108
  return output
109
  except Exception as e:
110
  logger.error(f"Error in /complete endpoint: {e}")
 
36
  logging.basicConfig(level=logging.INFO)
37
  logger = logging.getLogger(__name__)
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  app = fastapi.FastAPI(
40
  title="OpenGenAI",
41
  description="Your Excellect AI Physician")
 
80
  et = time()
81
  output["time"] = et - st
82
  messages.append({'role': "assistant", "content": output['choices'][0]['message']})
83
+ print(messages)
84
  return output
85
  except Exception as e:
86
  logger.error(f"Error in /complete endpoint: {e}")