thomasht86 commited on
Commit
76d828c
β€’
1 Parent(s): dd64fb8

deploy at 2024-08-23 11:29:10.212130

Browse files
Files changed (6) hide show
  1. Dockerfile +2 -2
  2. README.md +3 -3
  3. deploy_app.ipynb +1 -1
  4. main.py +123 -23
  5. requirements.in +6 -0
  6. requirements.txt +164 -5
Dockerfile CHANGED
@@ -1,8 +1,8 @@
1
  FROM python:3.10
2
  WORKDIR /code
3
  COPY --link --chown=1000 . .
4
- RUN mkdir -p /tmp/cache/ session/ db/
5
- RUN chmod a+rwx -R /tmp/cache/ session/ db/ assets/
6
  ENV HF_HUB_CACHE=HF_HOME
7
  RUN pip install --no-cache-dir -r requirements.txt
8
 
 
1
  FROM python:3.10
2
  WORKDIR /code
3
  COPY --link --chown=1000 . .
4
+ RUN mkdir -p /tmp/cache/ session/ db/
5
+ RUN chmod a+rwx -R /tmp/cache/ session/ db/
6
  ENV HF_HUB_CACHE=HF_HOME
7
  RUN pip install --no-cache-dir -r requirements.txt
8
 
README.md CHANGED
@@ -35,7 +35,7 @@ Search page | Login page | Query logs
35
 
36
  ### Why?
37
 
38
- We have recognized the need, both for ourselves and others, to be able to set up a simple frontend for Vespa, without having to navigate the frontend framework jungle.
39
 
40
  This sample-app can serve as an example of how you can build and deploy a simple frontend for Vespa, using FastHTML.
41
 
@@ -43,10 +43,10 @@ This sample-app can serve as an example of how you can build and deploy a simple
43
 
44
  #### 1. Clone this folder to your local machine πŸ“‚
45
 
46
- The command below will clone the repository and only fetch the `fasthtml-frontend` folder.
47
 
48
  ```bash
49
- git clone --depth 1 --filter=blob:none --sparse https://github.com/vespa-engine/sample-apps.git temp-sample-apps && cd temp-sample-apps && git sparse-checkout set fasthtml-frontend && mkdir -p ../fasthtml-frontend && mv fasthtml-frontend/* ../fasthtml-frontend/ && cd .. && rm -rf temp-sample-apps
50
  ```
51
 
52
  #### 2. Install dependencies πŸ”§
 
35
 
36
  ### Why?
37
 
38
+ We have recognized the need, both for ourselves and others, to be able to set up a simple frontend for Vespa, without having to navigate the frontend framework jungle. Demo frontend apps often end up with a bunch of dependendcies and angry github renovate bots. :robot: :sad:
39
 
40
  This sample-app can serve as an example of how you can build and deploy a simple frontend for Vespa, using FastHTML.
41
 
 
43
 
44
  #### 1. Clone this folder to your local machine πŸ“‚
45
 
46
+ The command below will clone the repository and only fetch the `fasthtml-demo` directory.
47
 
48
  ```bash
49
+ git clone --depth 1 --filter=blob:none --sparse https://github.com/vespa-engine/sample-apps.git temp-sample-apps && cd temp-sample-apps && git sparse-checkout set fasthtml-demo && mkdir -p ../fasthtml-demo && mv fasthtml-demo/* ../fasthtml-demp/ && cd .. && rm -rf temp-sample-apps
50
  ```
51
 
52
  #### 2. Install dependencies πŸ”§
deploy_app.ipynb CHANGED
@@ -211,7 +211,7 @@
211
  " ),\n",
212
  " AuthClient(\n",
213
  " id=\"token\",\n",
214
- " permissions=[\"read\", \"write\"],\n",
215
  " parameters=[Parameter(\"token\", {\"id\": token_id})],\n",
216
  " ),\n",
217
  " ],\n",
 
211
  " ),\n",
212
  " AuthClient(\n",
213
  " id=\"token\",\n",
214
+ " permissions=[\"read\"], # Token client only needs read permission\n",
215
  " parameters=[Parameter(\"token\", {\"id\": token_id})],\n",
216
  " ),\n",
217
  " ],\n",
main.py CHANGED
@@ -37,9 +37,11 @@ from fasthtml.common import (
37
  Beforeware,
38
  Hidden,
39
  Request,
 
 
40
  )
41
  from fasthtml.components import Nav, Article, Header, Mark
42
- from fasthtml.pico import Search
43
  from starlette.middleware import Middleware
44
  from starlette.middleware.base import BaseHTTPMiddleware
45
  from starlette.middleware.sessions import SessionMiddleware
@@ -52,11 +54,15 @@ from hmac import compare_digest
52
  from io import StringIO
53
  import csv
54
  import tempfile
 
 
 
55
 
56
- DEV_MODE = True
57
 
58
  if DEV_MODE:
59
  print("Running in DEV_MODE - Hot reload enabled")
 
60
  from dotenv import load_dotenv
61
 
62
  load_dotenv()
@@ -84,7 +90,9 @@ db = database(DB_FILE)
84
  queries = db.t.queries
85
  if queries not in db.t:
86
  # You can pass a dict, or kwargs, to most MiniDataAPI methods.
87
- queries.create(dict(qid=int, query=str, sess_id=str, timestamp=int), pk="qid")
 
 
88
  # Add autoincrement to the qid column
89
  db.query("ALTER TABLE queries ADD COLUMN qid INTEGER PRIMARY KEY AUTOINCREMENT")
90
  Query = queries.dataclass()
@@ -111,7 +119,24 @@ def user_auth_before(req, sess):
111
  return login_redir
112
 
113
 
114
- headers = (picolink, MarkdownJS(), HighlightJS(langs=["json", "python"]), favicon, fa)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
116
  # Sesskey
117
  sess_key_path = "session/.sesskey"
@@ -167,6 +192,13 @@ sesskey = get_key(fname=sess_key_path)
167
  print(f"Session key: {sesskey}")
168
 
169
 
 
 
 
 
 
 
 
170
  def get_navbar(admin: bool):
171
  print(f"In get_navbar: {admin}")
172
  bar = Nav(
@@ -230,10 +262,32 @@ def get_navbar(admin: bool):
230
  return bar
231
 
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  @app.route("/")
234
  def get(sess):
235
  # Can not get auth directly, as it is skipped in beforeware
236
  auth = sess.get("auth", False)
 
 
 
 
 
 
 
 
237
  return (
238
  Title("Vespa demo"),
239
  get_navbar(auth),
@@ -249,12 +303,48 @@ def get(sess):
249
  Button(
250
  "Search",
251
  hx_get="/search",
252
- hx_include="#userquery",
 
253
  hx_target="#results",
254
  hx_indicator="#spinner",
255
  ),
256
  style="margin: 10% 10px 0 0;",
257
  ),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
258
  # Section(
259
  # Input(
260
  # id="suggestion-input",
@@ -276,16 +366,9 @@ def get(sess):
276
  # ),
277
  # id="suggestions",
278
  # ),
 
279
  Section(
280
- Div(
281
- A(
282
- id="spinner",
283
- aria_busy="true",
284
- cls="htmx-indicator",
285
- style="font-size: 2em;",
286
- ),
287
- style="text-align: center; margin-top: 40px;",
288
- ),
289
  id="results",
290
  hx_swap="innerHTML",
291
  style="margin: 20px;",
@@ -356,9 +439,9 @@ def replace_hi_with_strong(text):
356
  return elements
357
 
358
 
359
- def log_query_to_db(query, sess):
360
  return queries.insert(
361
- Query(query=query, sess_id=sesskey, timestamp=int(time.time()))
362
  )
363
 
364
 
@@ -432,22 +515,38 @@ async def expand(request: Request, docid: str, expand: bool):
432
  )
433
 
434
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
435
  @app.get("/search")
436
- async def search(userquery: str, sess):
437
  print(sess)
438
  if "queries" not in sess:
439
  sess["queries"] = []
440
- else:
441
- sess["queries"].append(userquery)
442
  print(f"Searching for: {userquery}")
443
- log_query_to_db(userquery, sess)
 
 
444
  async with vespa_app.asyncio() as session:
445
  resp = await session.query(
446
- yql="select * from sources * where userQuery() or ({targetHits:1000}nearestNeighbor(embedding,q)) limit 10;",
447
  query=userquery,
448
  hits=10,
449
- ranking="fusion",
450
- body={"input.query(q)": f"embed({userquery})"},
451
  )
452
  records = []
453
  fields = ["id", "title", "body"]
@@ -459,6 +558,7 @@ async def search(userquery: str, sess):
459
  results = parse_results(records)
460
  json_dump = json.dumps(resp.get_json(), indent=4)
461
  return Div(
 
462
  # Accordion (with Details)
463
  Details(
464
  Summary("Full JSON response"),
 
37
  Beforeware,
38
  Hidden,
39
  Request,
40
+ H3,
41
+ Style,
42
  )
43
  from fasthtml.components import Nav, Article, Header, Mark
44
+ from fasthtml.pico import Search, Grid, Fieldset, Label
45
  from starlette.middleware import Middleware
46
  from starlette.middleware.base import BaseHTTPMiddleware
47
  from starlette.middleware.sessions import SessionMiddleware
 
54
  from io import StringIO
55
  import csv
56
  import tempfile
57
+ from enum import Enum
58
+ from typing import Tuple as T
59
+ from urllib.parse import quote
60
 
61
+ DEV_MODE = False
62
 
63
  if DEV_MODE:
64
  print("Running in DEV_MODE - Hot reload enabled")
65
+ print("Loading environment variables from .env")
66
  from dotenv import load_dotenv
67
 
68
  load_dotenv()
 
90
  queries = db.t.queries
91
  if queries not in db.t:
92
  # You can pass a dict, or kwargs, to most MiniDataAPI methods.
93
+ queries.create(
94
+ dict(qid=int, query=str, ranking=str, sess_id=str, timestamp=int), pk="qid"
95
+ )
96
  # Add autoincrement to the qid column
97
  db.query("ALTER TABLE queries ADD COLUMN qid INTEGER PRIMARY KEY AUTOINCREMENT")
98
  Query = queries.dataclass()
 
119
  return login_redir
120
 
121
 
122
+ spinner_css = Style("""
123
+ .htmx-indicator {
124
+ display: none; /* Hide spinner by default */
125
+ }
126
+
127
+ .htmx-indicator.htmx-request {
128
+ display: block;
129
+ }
130
+ """)
131
+
132
+ headers = (
133
+ picolink,
134
+ MarkdownJS(),
135
+ HighlightJS(langs=["json", "python"]),
136
+ favicon,
137
+ fa,
138
+ spinner_css,
139
+ )
140
 
141
  # Sesskey
142
  sess_key_path = "session/.sesskey"
 
192
  print(f"Session key: {sesskey}")
193
 
194
 
195
+ # enum class for rank profiles
196
+ class RankProfile(str, Enum):
197
+ bm25 = "bm25"
198
+ semantic = "semantic"
199
+ fusion = "fusion"
200
+
201
+
202
  def get_navbar(admin: bool):
203
  print(f"In get_navbar: {admin}")
204
  bar = Nav(
 
262
  return bar
263
 
264
 
265
+ def spinner_div(hidden: bool = False):
266
+ return Div(
267
+ A(
268
+ id="spinner",
269
+ aria_busy="true",
270
+ cls="htmx-indicator",
271
+ style="font-size: 2em;",
272
+ ),
273
+ style="text-align: center; margin-top: 40px;"
274
+ if not hidden
275
+ else "display: none;",
276
+ )
277
+
278
+
279
  @app.route("/")
280
  def get(sess):
281
  # Can not get auth directly, as it is skipped in beforeware
282
  auth = sess.get("auth", False)
283
+ queries = [
284
+ "Breast Cancer Cells Feed on Cholesterol",
285
+ "Treating Asthma With Plants vs. Pills",
286
+ "Alkylphenol Endocrine Disruptors",
287
+ "Testing Turmeric on Smokers",
288
+ "The Role of Pesticides in Parkinson's Disease",
289
+ "Vitamin D for sleep quality in older adults",
290
+ ]
291
  return (
292
  Title("Vespa demo"),
293
  get_navbar(auth),
 
303
  Button(
304
  "Search",
305
  hx_get="/search",
306
+ # include userquery and id of selected ranking radio button
307
+ hx_include="#userquery, input[name=ranking]:checked",
308
  hx_target="#results",
309
  hx_indicator="#spinner",
310
  ),
311
  style="margin: 10% 10px 0 0;",
312
  ),
313
+ Fieldset(
314
+ Input(type="radio", id="bm25", name="ranking", value="bm25"),
315
+ Label("BM25", htmlfor="bm25"),
316
+ Input(type="radio", id="semantic", name="ranking", value="semantic"),
317
+ Label("Semantic", htmlfor="semantic"),
318
+ Input(
319
+ type="radio",
320
+ id="fusion",
321
+ name="ranking",
322
+ value="fusion",
323
+ checked="",
324
+ ),
325
+ Label("Reciprocal Rank fusion", htmlfor="fusion"),
326
+ style="margin: 10px; text-align: center;",
327
+ id="ranking",
328
+ ),
329
+ H3("Example queries"),
330
+ # Buttons with predefined search queries
331
+ Grid(
332
+ *[
333
+ Button(
334
+ query,
335
+ hx_get="/search?userquery=" + query,
336
+ hx_include="input[name=ranking]:checked",
337
+ hx_target="#results",
338
+ hx_indicator="#spinner",
339
+ style="margin: 10px; padding: 5px;",
340
+ cls="secondary outline",
341
+ id=f"example-{qid}",
342
+ )
343
+ for qid, query in enumerate(queries)
344
+ ],
345
+ # Make the grid buttons have same height and distribute evenly and center align
346
+ style="grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));",
347
+ ),
348
  # Section(
349
  # Input(
350
  # id="suggestion-input",
 
366
  # ),
367
  # id="suggestions",
368
  # ),
369
+ # Display spinner div only if it #spinner does not exist
370
  Section(
371
+ spinner_div(),
 
 
 
 
 
 
 
 
372
  id="results",
373
  hx_swap="innerHTML",
374
  style="margin: 20px;",
 
439
  return elements
440
 
441
 
442
+ def log_query_to_db(query, ranking, sess):
443
  return queries.insert(
444
+ Query(query=query, ranking=ranking, sess_id=sesskey, timestamp=int(time.time()))
445
  )
446
 
447
 
 
515
  )
516
 
517
 
518
+ # Returns tuple of (yql, body(dict)) based on the ranking profile
519
+ def get_yql(ranking: RankProfile, userquery: str) -> T[str, dict]:
520
+ if ranking == RankProfile.bm25:
521
+ yql = "select * from sources * where userQuery() limit 10"
522
+ body = {}
523
+ elif ranking == RankProfile.semantic:
524
+ yql = "select * from sources * where ({targetHits:10}nearestNeighbor(embedding,q)) limit 10"
525
+ body = {"input.query(q)": f"embed({userquery})"}
526
+ elif ranking == RankProfile.fusion:
527
+ yql = "select * from sources * where rank({targetHits:1000}nearestNeighbor(embedding,q), userQuery()) limit 10"
528
+ body = {"input.query(q)": f"embed({userquery})"}
529
+ return yql, body
530
+
531
+
532
  @app.get("/search")
533
+ async def search(userquery: str, ranking: str, sess):
534
  print(sess)
535
  if "queries" not in sess:
536
  sess["queries"] = []
537
+ quoted = quote(userquery) + "&ranking=" + ranking
538
+ sess["queries"].append(quoted)
539
  print(f"Searching for: {userquery}")
540
+ print(f"Ranking: {ranking}")
541
+ log_query_to_db(userquery, ranking, sess)
542
+ yql, body = get_yql(ranking, userquery)
543
  async with vespa_app.asyncio() as session:
544
  resp = await session.query(
545
+ yql=yql,
546
  query=userquery,
547
  hits=10,
548
+ ranking=str(ranking),
549
+ body=body,
550
  )
551
  records = []
552
  fields = ["id", "title", "body"]
 
558
  results = parse_results(records)
559
  json_dump = json.dumps(resp.get_json(), indent=4)
560
  return Div(
561
+ spinner_div(),
562
  # Accordion (with Details)
563
  Details(
564
  Summary("Full JSON response"),
requirements.in ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ python-fasthtml==0.4.4
2
+ huggingface-hub
3
+ git+https://github.com/AnswerDotAI/fasthtml-hf@a7ae831a1bd01105a9f771fb3a4e4c454ddc3176 # latest released version did not work
4
+ pyvespa
5
+ vespacli
6
+ python-dotenv
requirements.txt CHANGED
@@ -1,5 +1,164 @@
1
- python-fasthtml
2
- git+https://github.com/AnswerDotAI/fasthtml-hf@a7ae831a1bd01105a9f771fb3a4e4c454ddc3176 # latest released version did not work
3
- pyvespa
4
- vespacli
5
- python-dotenv
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file was autogenerated by uv via the following command:
2
+ # uv pip compile requirements.in --output-file requirements.txt
3
+ aiohappyeyeballs==2.4.0
4
+ # via aiohttp
5
+ aiohttp==3.10.5
6
+ # via pyvespa
7
+ aiosignal==1.3.1
8
+ # via aiohttp
9
+ anyio==4.4.0
10
+ # via
11
+ # httpx
12
+ # starlette
13
+ # watchfiles
14
+ async-timeout==4.0.3
15
+ # via aiohttp
16
+ attrs==24.2.0
17
+ # via aiohttp
18
+ beautifulsoup4==4.12.3
19
+ # via python-fasthtml
20
+ certifi==2024.7.4
21
+ # via
22
+ # httpcore
23
+ # httpx
24
+ # requests
25
+ cffi==1.17.0
26
+ # via cryptography
27
+ charset-normalizer==3.3.2
28
+ # via requests
29
+ click==8.1.7
30
+ # via uvicorn
31
+ cryptography==43.0.0
32
+ # via pyvespa
33
+ docker==7.1.0
34
+ # via pyvespa
35
+ exceptiongroup==1.2.2
36
+ # via anyio
37
+ fastcore==1.7.1
38
+ # via
39
+ # fasthtml-hf
40
+ # fastlite
41
+ # python-fasthtml
42
+ # sqlite-minutils
43
+ fasthtml-hf @ git+https://github.com/AnswerDotAI/fasthtml-hf@a7ae831a1bd01105a9f771fb3a4e4c454ddc3176
44
+ # via -r requirements.in
45
+ fastlite==0.0.9
46
+ # via python-fasthtml
47
+ filelock==3.15.4
48
+ # via huggingface-hub
49
+ frozenlist==1.4.1
50
+ # via
51
+ # aiohttp
52
+ # aiosignal
53
+ fsspec==2024.6.1
54
+ # via huggingface-hub
55
+ h11==0.14.0
56
+ # via
57
+ # httpcore
58
+ # uvicorn
59
+ h2==4.1.0
60
+ # via httpx
61
+ hpack==4.0.0
62
+ # via h2
63
+ httpcore==1.0.5
64
+ # via httpx
65
+ httptools==0.6.1
66
+ # via uvicorn
67
+ httpx==0.27.0
68
+ # via
69
+ # python-fasthtml
70
+ # pyvespa
71
+ huggingface-hub==0.24.6
72
+ # via
73
+ # -r requirements.in
74
+ # fasthtml-hf
75
+ hyperframe==6.0.1
76
+ # via h2
77
+ idna==3.7
78
+ # via
79
+ # anyio
80
+ # httpx
81
+ # requests
82
+ # yarl
83
+ itsdangerous==2.2.0
84
+ # via python-fasthtml
85
+ jinja2==3.1.4
86
+ # via pyvespa
87
+ markupsafe==2.1.5
88
+ # via jinja2
89
+ multidict==6.0.5
90
+ # via
91
+ # aiohttp
92
+ # yarl
93
+ oauthlib==3.2.2
94
+ # via python-fasthtml
95
+ packaging==24.1
96
+ # via
97
+ # fastcore
98
+ # huggingface-hub
99
+ pycparser==2.22
100
+ # via cffi
101
+ python-dateutil==2.9.0.post0
102
+ # via
103
+ # python-fasthtml
104
+ # pyvespa
105
+ python-dotenv==1.0.1
106
+ # via
107
+ # -r requirements.in
108
+ # uvicorn
109
+ python-fasthtml==0.4.4
110
+ # via -r requirements.in
111
+ python-multipart==0.0.9
112
+ # via python-fasthtml
113
+ pyvespa==0.46.0
114
+ # via -r requirements.in
115
+ pyyaml==6.0.2
116
+ # via
117
+ # huggingface-hub
118
+ # uvicorn
119
+ requests==2.31.0
120
+ # via
121
+ # docker
122
+ # huggingface-hub
123
+ # pyvespa
124
+ # requests-toolbelt
125
+ requests-toolbelt==1.0.0
126
+ # via pyvespa
127
+ six==1.16.0
128
+ # via python-dateutil
129
+ sniffio==1.3.1
130
+ # via
131
+ # anyio
132
+ # httpx
133
+ soupsieve==2.6
134
+ # via beautifulsoup4
135
+ sqlite-minutils==3.37.0.post1
136
+ # via fastlite
137
+ starlette==0.38.2
138
+ # via python-fasthtml
139
+ tenacity==9.0.0
140
+ # via pyvespa
141
+ tqdm==4.66.5
142
+ # via huggingface-hub
143
+ typing-extensions==4.12.2
144
+ # via
145
+ # anyio
146
+ # huggingface-hub
147
+ # pyvespa
148
+ # uvicorn
149
+ urllib3==2.2.2
150
+ # via
151
+ # docker
152
+ # requests
153
+ uvicorn==0.30.6
154
+ # via python-fasthtml
155
+ uvloop==0.20.0
156
+ # via uvicorn
157
+ vespacli==8.391.23
158
+ # via -r requirements.in
159
+ watchfiles==0.23.0
160
+ # via uvicorn
161
+ websockets==13.0
162
+ # via uvicorn
163
+ yarl==1.9.4
164
+ # via aiohttp