Spaces:
Running
Running
thomasht86
commited on
Commit
•
557ba5e
1
Parent(s):
7105d61
deploy at 2024-08-22 13:23:49.666417
Browse files- .env.example +8 -0
- .gitignore +15 -0
- Dockerfile +10 -0
- Dockerfile.nonhf +16 -0
- LICENSE +201 -0
- README.md +127 -5
- assets/admin-login.png +0 -0
- assets/query-log.png +0 -0
- assets/search-page.png +0 -0
- config.ini +5 -0
- deploy_app.ipynb +621 -0
- main.py +713 -0
- requirements.txt +5 -0
.env.example
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# The URL of your Vespa Cloud application (you get it when running `deploy_app.ipynb`)
|
2 |
+
VESPA_APP_URL=https://my-vespa-endpoint.z.vespa-app.cloud/
|
3 |
+
# The secret token for your Vespa Cloud application (Created in Vespa Cloud Console)
|
4 |
+
VESPA_CLOUD_SECRET_TOKEN=vespa_cloud_mysecret_tokenabcdeabcdeabcdedcba
|
5 |
+
# Username of admin user (has access to query logs in SQLite DB)
|
6 |
+
ADMIN_NAME=admin
|
7 |
+
# Password of admin user
|
8 |
+
ADMIN_PWD=admin
|
.gitignore
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ft/
|
2 |
+
db/
|
3 |
+
session/
|
4 |
+
todo/
|
5 |
+
fasthtml/
|
6 |
+
.venv/
|
7 |
+
__pycache__/
|
8 |
+
.python-version
|
9 |
+
# Python stuff
|
10 |
+
*.pyc
|
11 |
+
*.pyo
|
12 |
+
|
13 |
+
|
14 |
+
.env
|
15 |
+
.sesskey
|
Dockerfile
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
WORKDIR /code
|
3 |
+
COPY --link --chown=1000 . .
|
4 |
+
RUN mkdir -p /tmp/cache/ session/ db/
|
5 |
+
RUN chmod a+rwx -R /tmp/cache/ session/ db/
|
6 |
+
ENV HF_HUB_CACHE=HF_HOME
|
7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
+
|
9 |
+
ENV PYTHONUNBUFFERED=1 PORT=7860
|
10 |
+
CMD ["python", "main.py"]
|
Dockerfile.nonhf
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
FROM python:3.10
|
2 |
+
# Set working directory
|
3 |
+
WORKDIR /code
|
4 |
+
# Copy only the requirements file to leverage Docker cache
|
5 |
+
COPY --chown=1000 requirements.txt .
|
6 |
+
# Install dependencies
|
7 |
+
RUN pip install --no-cache-dir -r requirements.txt
|
8 |
+
# Copy the rest of the application code
|
9 |
+
COPY --chown=1000 . .
|
10 |
+
# Create necessary directories with appropriate permissions
|
11 |
+
RUN mkdir -p /tmp/cache/ session/ \
|
12 |
+
&& chmod a+rwx -R /tmp/cache/ session/
|
13 |
+
# Set environment variable
|
14 |
+
ENV PYTHONUNBUFFERED=1
|
15 |
+
# Define the command to run the application
|
16 |
+
CMD ["python", "main.py"]
|
LICENSE
ADDED
@@ -0,0 +1,201 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Apache License
|
2 |
+
Version 2.0, January 2004
|
3 |
+
http://www.apache.org/licenses/
|
4 |
+
|
5 |
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
6 |
+
|
7 |
+
1. Definitions.
|
8 |
+
|
9 |
+
"License" shall mean the terms and conditions for use, reproduction,
|
10 |
+
and distribution as defined by Sections 1 through 9 of this document.
|
11 |
+
|
12 |
+
"Licensor" shall mean the copyright owner or entity authorized by
|
13 |
+
the copyright owner that is granting the License.
|
14 |
+
|
15 |
+
"Legal Entity" shall mean the union of the acting entity and all
|
16 |
+
other entities that control, are controlled by, or are under common
|
17 |
+
control with that entity. For the purposes of this definition,
|
18 |
+
"control" means (i) the power, direct or indirect, to cause the
|
19 |
+
direction or management of such entity, whether by contract or
|
20 |
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
21 |
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
22 |
+
|
23 |
+
"You" (or "Your") shall mean an individual or Legal Entity
|
24 |
+
exercising permissions granted by this License.
|
25 |
+
|
26 |
+
"Source" form shall mean the preferred form for making modifications,
|
27 |
+
including but not limited to software source code, documentation
|
28 |
+
source, and configuration files.
|
29 |
+
|
30 |
+
"Object" form shall mean any form resulting from mechanical
|
31 |
+
transformation or translation of a Source form, including but
|
32 |
+
not limited to compiled object code, generated documentation,
|
33 |
+
and conversions to other media types.
|
34 |
+
|
35 |
+
"Work" shall mean the work of authorship, whether in Source or
|
36 |
+
Object form, made available under the License, as indicated by a
|
37 |
+
copyright notice that is included in or attached to the work
|
38 |
+
(an example is provided in the Appendix below).
|
39 |
+
|
40 |
+
"Derivative Works" shall mean any work, whether in Source or Object
|
41 |
+
form, that is based on (or derived from) the Work and for which the
|
42 |
+
editorial revisions, annotations, elaborations, or other modifications
|
43 |
+
represent, as a whole, an original work of authorship. For the purposes
|
44 |
+
of this License, Derivative Works shall not include works that remain
|
45 |
+
separable from, or merely link (or bind by name) to the interfaces of,
|
46 |
+
the Work and Derivative Works thereof.
|
47 |
+
|
48 |
+
"Contribution" shall mean any work of authorship, including
|
49 |
+
the original version of the Work and any modifications or additions
|
50 |
+
to that Work or Derivative Works thereof, that is intentionally
|
51 |
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
52 |
+
or by an individual or Legal Entity authorized to submit on behalf of
|
53 |
+
the copyright owner. For the purposes of this definition, "submitted"
|
54 |
+
means any form of electronic, verbal, or written communication sent
|
55 |
+
to the Licensor or its representatives, including but not limited to
|
56 |
+
communication on electronic mailing lists, source code control systems,
|
57 |
+
and issue tracking systems that are managed by, or on behalf of, the
|
58 |
+
Licensor for the purpose of discussing and improving the Work, but
|
59 |
+
excluding communication that is conspicuously marked or otherwise
|
60 |
+
designated in writing by the copyright owner as "Not a Contribution."
|
61 |
+
|
62 |
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
63 |
+
on behalf of whom a Contribution has been received by Licensor and
|
64 |
+
subsequently incorporated within the Work.
|
65 |
+
|
66 |
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
67 |
+
this License, each Contributor hereby grants to You a perpetual,
|
68 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
69 |
+
copyright license to reproduce, prepare Derivative Works of,
|
70 |
+
publicly display, publicly perform, sublicense, and distribute the
|
71 |
+
Work and such Derivative Works in Source or Object form.
|
72 |
+
|
73 |
+
3. Grant of Patent License. Subject to the terms and conditions of
|
74 |
+
this License, each Contributor hereby grants to You a perpetual,
|
75 |
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
76 |
+
(except as stated in this section) patent license to make, have made,
|
77 |
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
78 |
+
where such license applies only to those patent claims licensable
|
79 |
+
by such Contributor that are necessarily infringed by their
|
80 |
+
Contribution(s) alone or by combination of their Contribution(s)
|
81 |
+
with the Work to which such Contribution(s) was submitted. If You
|
82 |
+
institute patent litigation against any entity (including a
|
83 |
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
84 |
+
or a Contribution incorporated within the Work constitutes direct
|
85 |
+
or contributory patent infringement, then any patent licenses
|
86 |
+
granted to You under this License for that Work shall terminate
|
87 |
+
as of the date such litigation is filed.
|
88 |
+
|
89 |
+
4. Redistribution. You may reproduce and distribute copies of the
|
90 |
+
Work or Derivative Works thereof in any medium, with or without
|
91 |
+
modifications, and in Source or Object form, provided that You
|
92 |
+
meet the following conditions:
|
93 |
+
|
94 |
+
(a) You must give any other recipients of the Work or
|
95 |
+
Derivative Works a copy of this License; and
|
96 |
+
|
97 |
+
(b) You must cause any modified files to carry prominent notices
|
98 |
+
stating that You changed the files; and
|
99 |
+
|
100 |
+
(c) You must retain, in the Source form of any Derivative Works
|
101 |
+
that You distribute, all copyright, patent, trademark, and
|
102 |
+
attribution notices from the Source form of the Work,
|
103 |
+
excluding those notices that do not pertain to any part of
|
104 |
+
the Derivative Works; and
|
105 |
+
|
106 |
+
(d) If the Work includes a "NOTICE" text file as part of its
|
107 |
+
distribution, then any Derivative Works that You distribute must
|
108 |
+
include a readable copy of the attribution notices contained
|
109 |
+
within such NOTICE file, excluding those notices that do not
|
110 |
+
pertain to any part of the Derivative Works, in at least one
|
111 |
+
of the following places: within a NOTICE text file distributed
|
112 |
+
as part of the Derivative Works; within the Source form or
|
113 |
+
documentation, if provided along with the Derivative Works; or,
|
114 |
+
within a display generated by the Derivative Works, if and
|
115 |
+
wherever such third-party notices normally appear. The contents
|
116 |
+
of the NOTICE file are for informational purposes only and
|
117 |
+
do not modify the License. You may add Your own attribution
|
118 |
+
notices within Derivative Works that You distribute, alongside
|
119 |
+
or as an addendum to the NOTICE text from the Work, provided
|
120 |
+
that such additional attribution notices cannot be construed
|
121 |
+
as modifying the License.
|
122 |
+
|
123 |
+
You may add Your own copyright statement to Your modifications and
|
124 |
+
may provide additional or different license terms and conditions
|
125 |
+
for use, reproduction, or distribution of Your modifications, or
|
126 |
+
for any such Derivative Works as a whole, provided Your use,
|
127 |
+
reproduction, and distribution of the Work otherwise complies with
|
128 |
+
the conditions stated in this License.
|
129 |
+
|
130 |
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
131 |
+
any Contribution intentionally submitted for inclusion in the Work
|
132 |
+
by You to the Licensor shall be under the terms and conditions of
|
133 |
+
this License, without any additional terms or conditions.
|
134 |
+
Notwithstanding the above, nothing herein shall supersede or modify
|
135 |
+
the terms of any separate license agreement you may have executed
|
136 |
+
with Licensor regarding such Contributions.
|
137 |
+
|
138 |
+
6. Trademarks. This License does not grant permission to use the trade
|
139 |
+
names, trademarks, service marks, or product names of the Licensor,
|
140 |
+
except as required for reasonable and customary use in describing the
|
141 |
+
origin of the Work and reproducing the content of the NOTICE file.
|
142 |
+
|
143 |
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
144 |
+
agreed to in writing, Licensor provides the Work (and each
|
145 |
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
146 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
147 |
+
implied, including, without limitation, any warranties or conditions
|
148 |
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
149 |
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
150 |
+
appropriateness of using or redistributing the Work and assume any
|
151 |
+
risks associated with Your exercise of permissions under this License.
|
152 |
+
|
153 |
+
8. Limitation of Liability. In no event and under no legal theory,
|
154 |
+
whether in tort (including negligence), contract, or otherwise,
|
155 |
+
unless required by applicable law (such as deliberate and grossly
|
156 |
+
negligent acts) or agreed to in writing, shall any Contributor be
|
157 |
+
liable to You for damages, including any direct, indirect, special,
|
158 |
+
incidental, or consequential damages of any character arising as a
|
159 |
+
result of this License or out of the use or inability to use the
|
160 |
+
Work (including but not limited to damages for loss of goodwill,
|
161 |
+
work stoppage, computer failure or malfunction, or any and all
|
162 |
+
other commercial damages or losses), even if such Contributor
|
163 |
+
has been advised of the possibility of such damages.
|
164 |
+
|
165 |
+
9. Accepting Warranty or Additional Liability. While redistributing
|
166 |
+
the Work or Derivative Works thereof, You may choose to offer,
|
167 |
+
and charge a fee for, acceptance of support, warranty, indemnity,
|
168 |
+
or other liability obligations and/or rights consistent with this
|
169 |
+
License. However, in accepting such obligations, You may act only
|
170 |
+
on Your own behalf and on Your sole responsibility, not on behalf
|
171 |
+
of any other Contributor, and only if You agree to indemnify,
|
172 |
+
defend, and hold each Contributor harmless for any liability
|
173 |
+
incurred by, or claims asserted against, such Contributor by reason
|
174 |
+
of your accepting any such warranty or additional liability.
|
175 |
+
|
176 |
+
END OF TERMS AND CONDITIONS
|
177 |
+
|
178 |
+
APPENDIX: How to apply the Apache License to your work.
|
179 |
+
|
180 |
+
To apply the Apache License to your work, attach the following
|
181 |
+
boilerplate notice, with the fields enclosed by brackets "[]"
|
182 |
+
replaced with your own identifying information. (Don't include
|
183 |
+
the brackets!) The text should be enclosed in the appropriate
|
184 |
+
comment syntax for the file format. We also recommend that a
|
185 |
+
file or class name and description of purpose be included on the
|
186 |
+
same "printed page" as the copyright notice for easier
|
187 |
+
identification within third-party archives.
|
188 |
+
|
189 |
+
Copyright [yyyy] [name of copyright owner]
|
190 |
+
|
191 |
+
Licensed under the Apache License, Version 2.0 (the "License");
|
192 |
+
you may not use this file except in compliance with the License.
|
193 |
+
You may obtain a copy of the License at
|
194 |
+
|
195 |
+
http://www.apache.org/licenses/LICENSE-2.0
|
196 |
+
|
197 |
+
Unless required by applicable law or agreed to in writing, software
|
198 |
+
distributed under the License is distributed on an "AS IS" BASIS,
|
199 |
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
200 |
+
See the License for the specific language governing permissions and
|
201 |
+
limitations under the License.
|
README.md
CHANGED
@@ -1,10 +1,132 @@
|
|
1 |
---
|
2 |
-
title:
|
3 |
-
emoji:
|
4 |
-
colorFrom:
|
5 |
-
colorTo:
|
6 |
sdk: docker
|
|
|
7 |
pinned: false
|
|
|
8 |
---
|
9 |
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
---
|
2 |
+
title: thomasht86/fasthtml-vespa
|
3 |
+
emoji: 🚀
|
4 |
+
colorFrom: purple
|
5 |
+
colorTo: red
|
6 |
sdk: docker
|
7 |
+
app_file: app.py
|
8 |
pinned: false
|
9 |
+
termination_grace_period: 2m
|
10 |
---
|
11 |
|
12 |
+
## FastHTML Vespa-frontend
|
13 |
+
|
14 |
+
This is a simple frontend for Vespa search engine. It is built using [FastHTML](https://www.fastht.ml/) and written in pure Python.
|
15 |
+
|
16 |
+
Search page | Login page | Query logs
|
17 |
+
:-------------------------:|:-------------------------:|:-------------------------:
|
18 |
+
![search](assets/search-page.png) | ![admin-login](assets/admin-login.png) | ![query-log](assets/query-log.png)
|
19 |
+
|
20 |
+
### Features
|
21 |
+
|
22 |
+
- Simple search interface, with links to search results.
|
23 |
+
- Accordion with full JSON-response from Vespa.
|
24 |
+
- SQLite DB for storing queries.
|
25 |
+
- Admin authentication for viewing and downloading queries.
|
26 |
+
- Deployment options - Docker + [Huggingface spaces](https://huggingface.co/spaces/).
|
27 |
+
|
28 |
+
### Why?
|
29 |
+
|
30 |
+
We have recognized the need, both for ourselves and others, to be able to set up a simple frontend for Vespa, without having to navigate the frontend framework jungle.
|
31 |
+
|
32 |
+
This sample-app can serve as an example of how you can build and deploy a simple frontend for Vespa, using FastHTML.
|
33 |
+
|
34 |
+
### How to use
|
35 |
+
|
36 |
+
#### 1. Clone this folder to your local machine 📂
|
37 |
+
|
38 |
+
The command below will clone the repository and only fetch the `fasthtml-frontend` folder.
|
39 |
+
|
40 |
+
```bash
|
41 |
+
git clone --depth 1 --filter=blob:none --sparse https://github.com/vespa-engine/sample-apps.git temp-sample-apps && cd temp-sample-apps && git sparse-checkout set fasthtml-frontend && mkdir -p ../fasthtml-frontend && mv fasthtml-frontend/* ../fasthtml-frontend/ && cd .. && rm -rf temp-sample-apps
|
42 |
+
```
|
43 |
+
|
44 |
+
#### 2. Install dependencies 🔧
|
45 |
+
|
46 |
+
```bash
|
47 |
+
pip install -r requirements.txt
|
48 |
+
```
|
49 |
+
|
50 |
+
#### 3. Run the app locally 💻
|
51 |
+
|
52 |
+
```bash
|
53 |
+
python main.py
|
54 |
+
```
|
55 |
+
|
56 |
+
At this point, you should be able to access the app at [http://localhost:5001](http://localhost:5001).
|
57 |
+
|
58 |
+
But, you will _not_ be able to search for anything, as your environment variables are not set up.
|
59 |
+
|
60 |
+
#### 4. Deploy and feed your Vespa application ▶️
|
61 |
+
|
62 |
+
By running the `deploy_app.ipynb` notebook, you will deploy a Vespa application to the Vespa Cloud. The application is just a sample hybrid search application using the [BEIR/nfcorpus](https://huggingface.co/datasets/BeIR/nfcorpus) dataset.
|
63 |
+
Feel free to replace the dataset and application with your own.
|
64 |
+
|
65 |
+
Make sure to replace these variables at the top of the notebook with your own values:
|
66 |
+
|
67 |
+
```python
|
68 |
+
# Replace with your tenant name from the Vespa Cloud Console
|
69 |
+
tenant_name = "mytenant"
|
70 |
+
# Replace with your application name (does not need to exist yet)
|
71 |
+
application = "fasthtml"
|
72 |
+
# Token id (from Vespa Cloud Console)
|
73 |
+
token_id = "fasthtmltoken"
|
74 |
+
```
|
75 |
+
|
76 |
+
#### 5. Set up environment variables 🔐
|
77 |
+
|
78 |
+
Make sure to add the output of the `token_endpoint` from the `deploy_app.ipynb`- notebook to your `.env.example` file.
|
79 |
+
This value should be placed in the `VESPA_APP_URL` environment variable.
|
80 |
+
|
81 |
+
At the same time, you should rename the `.env.example` file to `.env`. This is added to the `.gitignore` file.
|
82 |
+
|
83 |
+
#### 6. Run the app locally 🚀
|
84 |
+
|
85 |
+
Now, you should be able to run the app locally and search for queries.
|
86 |
+
|
87 |
+
```bash
|
88 |
+
python main.py
|
89 |
+
```
|
90 |
+
|
91 |
+
Open your browser and navigate to [http://localhost:5001](http://localhost:5001).
|
92 |
+
|
93 |
+
### Deployment
|
94 |
+
|
95 |
+
If you want to deploy the app, you set the `DEV_MODE=False` in `main.py`.
|
96 |
+
This will disable loading of environment variables from the `.env` file, and instead use the environment variables set in the deployment environment.
|
97 |
+
|
98 |
+
#### Docker 🐳
|
99 |
+
|
100 |
+
You can build and run the app using Docker.
|
101 |
+
|
102 |
+
Note that there are two Dockerfiles in the repo:
|
103 |
+
|
104 |
+
- `Dockerfile` is for building the image for Huggingface Spaces.
|
105 |
+
- `Dockerfile.nonhf` is for building an image that can be run locally or on any other platform.
|
106 |
+
|
107 |
+
Build the image:
|
108 |
+
|
109 |
+
```bash
|
110 |
+
docker build -t fhtdemoimg . -f Dockerfile.nonhf
|
111 |
+
```
|
112 |
+
|
113 |
+
**Run the container:**
|
114 |
+
|
115 |
+
- Makes the environment variables in the `.env` file available to the container.
|
116 |
+
- Will mount the `db/` folder to the container, so that the SQLite database is persisted between runs.
|
117 |
+
- Sets the hostname to `dockerhost`, so that we can know use that to enable hot-reloading in the FastHTML app.
|
118 |
+
- Maps the default Starlette port `5001` to `8000` on the host.
|
119 |
+
|
120 |
+
```bash
|
121 |
+
docker run --name fhtdemo --rm --env-file .env -p 8000:5001 -h dockerhost -v $(pwd)/db:/code/db fhtdemoimg
|
122 |
+
```
|
123 |
+
|
124 |
+
#### Huggingface 🤗 Spaces
|
125 |
+
|
126 |
+
This deployment option is free. The deployment script is shamelessly copied from the [fasthtml-hf](https://github.com/AnswerDotAI/fasthtml-hf) repo. Check it out for details on cli-options, configuration and DB-backup options.
|
127 |
+
|
128 |
+
1. Get a huggingface token with `write` permissions. You can do this by going to your [Huggingface profile](https://huggingface.co/settings/tokens) and create a new token.
|
129 |
+
2. Set the `HF_TOKEN` environment variable to the token you just created.
|
130 |
+
3. Run `python deploy_hf.py <your-space-name> [--private true]` to deploy the app to Huggingface Spaces.
|
131 |
+
|
132 |
+
### Go build and show us some cool Vespa apps! 🚀
|
assets/admin-login.png
ADDED
assets/query-log.png
ADDED
assets/search-page.png
ADDED
config.ini
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[DEFAULT]
|
2 |
+
dataset_id = space-backup
|
3 |
+
db_dir = data
|
4 |
+
private_backup = True
|
5 |
+
interval = 5
|
deploy_app.ipynb
ADDED
@@ -0,0 +1,621 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cells": [
|
3 |
+
{
|
4 |
+
"cell_type": "markdown",
|
5 |
+
"id": "given-adoption",
|
6 |
+
"metadata": {
|
7 |
+
"pycharm": {
|
8 |
+
"name": "#%% md\n"
|
9 |
+
}
|
10 |
+
},
|
11 |
+
"source": [
|
12 |
+
"<picture>\n",
|
13 |
+
" <source media=\"(prefers-color-scheme: dark)\" srcset=\"https://vespa.ai/assets/vespa-ai-logo-heather.svg\">\n",
|
14 |
+
" <source media=\"(prefers-color-scheme: light)\" srcset=\"https://vespa.ai/assets/vespa-ai-logo-rock.svg\">\n",
|
15 |
+
" <img alt=\"#Vespa\" width=\"200\" src=\"https://vespa.ai/assets/vespa-ai-logo-rock.svg\" style=\"margin-bottom: 25px;\">\n",
|
16 |
+
"</picture>\n",
|
17 |
+
"\n",
|
18 |
+
"# Deploy a sample app to Vespa Cloud\n",
|
19 |
+
"\n",
|
20 |
+
"This is the same guide as [getting-started-pyvespa](https://pyvespa.readthedocs.io/en/latest/getting-started-pyvespa.html), deploying to Vespa Cloud.\n"
|
21 |
+
]
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"cell_type": "markdown",
|
25 |
+
"id": "4f8c1448",
|
26 |
+
"metadata": {},
|
27 |
+
"source": [
|
28 |
+
"<div class=\"alert alert-info\">\n",
|
29 |
+
" Refer to <a href=\"https://pyvespa.readthedocs.io/en/latest/troubleshooting.html\">troubleshooting</a>\n",
|
30 |
+
" for any problem when running this guide.\n",
|
31 |
+
"</div>\n"
|
32 |
+
]
|
33 |
+
},
|
34 |
+
{
|
35 |
+
"cell_type": "markdown",
|
36 |
+
"id": "148d275b",
|
37 |
+
"metadata": {},
|
38 |
+
"source": [
|
39 |
+
"**Pre-requisite**: Create a tenant at [cloud.vespa.ai](https://cloud.vespa.ai/), save the tenant name.\n",
|
40 |
+
"\n",
|
41 |
+
"[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vespa-engine/pyvespa/blob/master/docs/sphinx/source/getting-started-pyvespa-cloud.ipynb)\n"
|
42 |
+
]
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"cell_type": "markdown",
|
46 |
+
"id": "366b0d83",
|
47 |
+
"metadata": {},
|
48 |
+
"source": [
|
49 |
+
"## Install\n",
|
50 |
+
"\n",
|
51 |
+
"Install [pyvespa](https://pyvespa.readthedocs.io/) >= 0.45\n",
|
52 |
+
"and the [Vespa CLI](https://docs.vespa.ai/en/vespa-cli.html).\n",
|
53 |
+
"The Vespa CLI is used for data and control plane key management ([Vespa Cloud Security Guide](https://cloud.vespa.ai/en/security/guide)).\n"
|
54 |
+
]
|
55 |
+
},
|
56 |
+
{
|
57 |
+
"cell_type": "code",
|
58 |
+
"execution_count": null,
|
59 |
+
"id": "136750de",
|
60 |
+
"metadata": {},
|
61 |
+
"outputs": [],
|
62 |
+
"source": [
|
63 |
+
"!pip3 install pyvespa vespacli datasets"
|
64 |
+
]
|
65 |
+
},
|
66 |
+
{
|
67 |
+
"cell_type": "markdown",
|
68 |
+
"id": "02f706ff",
|
69 |
+
"metadata": {},
|
70 |
+
"source": [
|
71 |
+
"## Configure application\n"
|
72 |
+
]
|
73 |
+
},
|
74 |
+
{
|
75 |
+
"cell_type": "code",
|
76 |
+
"execution_count": 5,
|
77 |
+
"id": "9ca4da83",
|
78 |
+
"metadata": {},
|
79 |
+
"outputs": [],
|
80 |
+
"source": [
|
81 |
+
"# Replace with your tenant name from the Vespa Cloud Console\n",
|
82 |
+
"tenant_name = \"mytenant\"\n",
|
83 |
+
"# Replace with your application name (does not need to exist yet)\n",
|
84 |
+
"application = \"fasthtml\"\n",
|
85 |
+
"# Token id (from Vespa Cloud Console)\n",
|
86 |
+
"token_id = \"fasthtmltoken\""
|
87 |
+
]
|
88 |
+
},
|
89 |
+
{
|
90 |
+
"cell_type": "markdown",
|
91 |
+
"id": "db637322",
|
92 |
+
"metadata": {},
|
93 |
+
"source": [
|
94 |
+
"## Create an application package\n",
|
95 |
+
"\n",
|
96 |
+
"The [application package](https://pyvespa.readthedocs.io/en/latest/reference-api.html#vespa.package.ApplicationPackage)\n",
|
97 |
+
"has all the Vespa configuration files -\n",
|
98 |
+
"create one from scratch:\n"
|
99 |
+
]
|
100 |
+
},
|
101 |
+
{
|
102 |
+
"cell_type": "code",
|
103 |
+
"execution_count": 6,
|
104 |
+
"id": "bd5c2629",
|
105 |
+
"metadata": {},
|
106 |
+
"outputs": [],
|
107 |
+
"source": [
|
108 |
+
"from vespa.package import (\n",
|
109 |
+
" ApplicationPackage,\n",
|
110 |
+
" Field,\n",
|
111 |
+
" Schema,\n",
|
112 |
+
" Document,\n",
|
113 |
+
" HNSW,\n",
|
114 |
+
" RankProfile,\n",
|
115 |
+
" Component,\n",
|
116 |
+
" Parameter,\n",
|
117 |
+
" FieldSet,\n",
|
118 |
+
" GlobalPhaseRanking,\n",
|
119 |
+
" Function,\n",
|
120 |
+
" AuthClient,\n",
|
121 |
+
")\n",
|
122 |
+
"\n",
|
123 |
+
"package = ApplicationPackage(\n",
|
124 |
+
" name=application,\n",
|
125 |
+
" schema=[\n",
|
126 |
+
" Schema(\n",
|
127 |
+
" name=\"doc\",\n",
|
128 |
+
" document=Document(\n",
|
129 |
+
" fields=[\n",
|
130 |
+
" Field(name=\"id\", type=\"string\", indexing=[\"summary\"]),\n",
|
131 |
+
" Field(\n",
|
132 |
+
" name=\"title\",\n",
|
133 |
+
" type=\"string\",\n",
|
134 |
+
" indexing=[\"index\", \"summary\"],\n",
|
135 |
+
" index=\"enable-bm25\",\n",
|
136 |
+
" ),\n",
|
137 |
+
" Field(\n",
|
138 |
+
" name=\"body\",\n",
|
139 |
+
" type=\"string\",\n",
|
140 |
+
" indexing=[\"index\", \"summary\"],\n",
|
141 |
+
" index=\"enable-bm25\",\n",
|
142 |
+
" bolding=True,\n",
|
143 |
+
" ),\n",
|
144 |
+
" Field(\n",
|
145 |
+
" name=\"embedding\",\n",
|
146 |
+
" type=\"tensor<float>(x[384])\",\n",
|
147 |
+
" indexing=[\n",
|
148 |
+
" 'input title . \" \" . input body',\n",
|
149 |
+
" \"embed\",\n",
|
150 |
+
" \"index\",\n",
|
151 |
+
" \"attribute\",\n",
|
152 |
+
" ],\n",
|
153 |
+
" ann=HNSW(distance_metric=\"angular\"),\n",
|
154 |
+
" is_document_field=False,\n",
|
155 |
+
" ),\n",
|
156 |
+
" ]\n",
|
157 |
+
" ),\n",
|
158 |
+
" fieldsets=[FieldSet(name=\"default\", fields=[\"title\", \"body\"])],\n",
|
159 |
+
" rank_profiles=[\n",
|
160 |
+
" RankProfile(\n",
|
161 |
+
" name=\"bm25\",\n",
|
162 |
+
" inputs=[(\"query(q)\", \"tensor<float>(x[384])\")],\n",
|
163 |
+
" functions=[\n",
|
164 |
+
" Function(name=\"bm25sum\", expression=\"bm25(title) + bm25(body)\")\n",
|
165 |
+
" ],\n",
|
166 |
+
" first_phase=\"bm25sum\",\n",
|
167 |
+
" ),\n",
|
168 |
+
" RankProfile(\n",
|
169 |
+
" name=\"semantic\",\n",
|
170 |
+
" inputs=[(\"query(q)\", \"tensor<float>(x[384])\")],\n",
|
171 |
+
" first_phase=\"closeness(field, embedding)\",\n",
|
172 |
+
" ),\n",
|
173 |
+
" RankProfile(\n",
|
174 |
+
" name=\"fusion\",\n",
|
175 |
+
" inherits=\"bm25\",\n",
|
176 |
+
" inputs=[(\"query(q)\", \"tensor<float>(x[384])\")],\n",
|
177 |
+
" first_phase=\"closeness(field, embedding)\",\n",
|
178 |
+
" global_phase=GlobalPhaseRanking(\n",
|
179 |
+
" expression=\"reciprocal_rank_fusion(bm25sum, closeness(field, embedding))\",\n",
|
180 |
+
" rerank_count=1000,\n",
|
181 |
+
" ),\n",
|
182 |
+
" ),\n",
|
183 |
+
" ],\n",
|
184 |
+
" )\n",
|
185 |
+
" ],\n",
|
186 |
+
" components=[\n",
|
187 |
+
" Component(\n",
|
188 |
+
" id=\"e5\",\n",
|
189 |
+
" type=\"hugging-face-embedder\",\n",
|
190 |
+
" parameters=[\n",
|
191 |
+
" Parameter(\n",
|
192 |
+
" \"transformer-model\",\n",
|
193 |
+
" {\n",
|
194 |
+
" \"url\": \"https://github.com/vespa-engine/sample-apps/raw/master/simple-semantic-search/model/e5-small-v2-int8.onnx\"\n",
|
195 |
+
" },\n",
|
196 |
+
" ),\n",
|
197 |
+
" Parameter(\n",
|
198 |
+
" \"tokenizer-model\",\n",
|
199 |
+
" {\n",
|
200 |
+
" \"url\": \"https://raw.githubusercontent.com/vespa-engine/sample-apps/master/simple-semantic-search/model/tokenizer.json\"\n",
|
201 |
+
" },\n",
|
202 |
+
" ),\n",
|
203 |
+
" ],\n",
|
204 |
+
" )\n",
|
205 |
+
" ],\n",
|
206 |
+
" auth_clients=[\n",
|
207 |
+
" AuthClient(\n",
|
208 |
+
" id=\"mtls\",\n",
|
209 |
+
" permissions=[\"read\", \"write\"],\n",
|
210 |
+
" parameters=[Parameter(\"certificate\", {\"file\": \"security/clients.pem\"})],\n",
|
211 |
+
" ),\n",
|
212 |
+
" AuthClient(\n",
|
213 |
+
" id=\"token\",\n",
|
214 |
+
" permissions=[\"read\", \"write\"],\n",
|
215 |
+
" parameters=[Parameter(\"token\", {\"id\": token_id})],\n",
|
216 |
+
" ),\n",
|
217 |
+
" ],\n",
|
218 |
+
")"
|
219 |
+
]
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"cell_type": "markdown",
|
223 |
+
"id": "2c5e2943",
|
224 |
+
"metadata": {},
|
225 |
+
"source": [
|
226 |
+
"Note that the name cannot have `-` or `_`.\n"
|
227 |
+
]
|
228 |
+
},
|
229 |
+
{
|
230 |
+
"cell_type": "markdown",
|
231 |
+
"id": "careful-savage",
|
232 |
+
"metadata": {},
|
233 |
+
"source": [
|
234 |
+
"## Deploy to Vespa Cloud\n",
|
235 |
+
"\n",
|
236 |
+
"The app is now defined and ready to deploy to Vespa Cloud.\n",
|
237 |
+
"\n",
|
238 |
+
"Deploy `package` to Vespa Cloud, by creating an instance of\n",
|
239 |
+
"[VespaCloud](https://pyvespa.readthedocs.io/en/latest/reference-api.html#vespa.deployment.VespaCloud):\n"
|
240 |
+
]
|
241 |
+
},
|
242 |
+
{
|
243 |
+
"cell_type": "code",
|
244 |
+
"execution_count": 7,
|
245 |
+
"id": "canadian-blood",
|
246 |
+
"metadata": {},
|
247 |
+
"outputs": [
|
248 |
+
{
|
249 |
+
"name": "stdout",
|
250 |
+
"output_type": "stream",
|
251 |
+
"text": [
|
252 |
+
"Setting application...\n",
|
253 |
+
"Running: vespa config set application scoober.fasthtml\n",
|
254 |
+
"Setting target cloud...\n",
|
255 |
+
"Running: vespa config set target cloud\n",
|
256 |
+
"\n",
|
257 |
+
"No api-key found for control plane access. Using access token.\n",
|
258 |
+
"Checking for access token in auth.json...\n",
|
259 |
+
"Successfully obtained access token for control plane access.\n"
|
260 |
+
]
|
261 |
+
}
|
262 |
+
],
|
263 |
+
"source": [
|
264 |
+
"from vespa.deployment import VespaCloud\n",
|
265 |
+
"\n",
|
266 |
+
"vespa_cloud = VespaCloud(\n",
|
267 |
+
" tenant=tenant_name,\n",
|
268 |
+
" application=application,\n",
|
269 |
+
" application_package=package,\n",
|
270 |
+
")"
|
271 |
+
]
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"cell_type": "markdown",
|
275 |
+
"id": "197c0a27",
|
276 |
+
"metadata": {},
|
277 |
+
"source": [
|
278 |
+
"The following will upload the application package to Vespa Cloud Dev Zone (`aws-us-east-1c`), read more about [Vespa Zones](https://cloud.vespa.ai/en/reference/zones.html).\n",
|
279 |
+
"The Vespa Cloud Dev Zone is considered as a sandbox environment where resources are down-scaled and idle deployments are expired automatically.\n",
|
280 |
+
"For information about production deployments, see the following [example](https://pyvespa.readthedocs.io/en/latest/getting-started-pyvespa-cloud.html#Example:-Deploy-the-app-to-the-prod-environment).\n",
|
281 |
+
"\n",
|
282 |
+
"> Note: Deployments to dev and perf expire after 7 days of inactivity, i.e., 7 days after running deploy. This applies to all plans, not only the Free Trial. Use the Vespa Console to extend the expiry period, or redeploy the application to add 7 more days.\n"
|
283 |
+
]
|
284 |
+
},
|
285 |
+
{
|
286 |
+
"cell_type": "code",
|
287 |
+
"execution_count": 8,
|
288 |
+
"id": "752166fc",
|
289 |
+
"metadata": {},
|
290 |
+
"outputs": [
|
291 |
+
{
|
292 |
+
"name": "stdout",
|
293 |
+
"output_type": "stream",
|
294 |
+
"text": [
|
295 |
+
"Deployment started in run 13 of dev-aws-us-east-1c for scoober.fasthtml. This may take a few minutes the first time.\n",
|
296 |
+
"INFO [10:44:08] Deploying platform version 8.397.20 and application dev build 7 for dev-aws-us-east-1c of default ...\n",
|
297 |
+
"INFO [10:44:08] Using CA signed certificate version 1\n",
|
298 |
+
"INFO [10:44:08] Using 1 nodes in container cluster 'fasthtml_container'\n",
|
299 |
+
"INFO [10:44:11] Validating Onnx models memory usage for container cluster 'fasthtml_container', percentage of available memory too low (10 < 15) to avoid restart, consider a flavor with more memory to avoid this\n",
|
300 |
+
"INFO [10:44:13] Session 4210 for tenant 'scoober' prepared and activated.\n",
|
301 |
+
"INFO [10:44:13] ######## Details for all nodes ########\n",
|
302 |
+
"INFO [10:44:13] h94419b.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n",
|
303 |
+
"INFO [10:44:13] --- platform vespa/cloud-tenant-rhel8:8.397.20\n",
|
304 |
+
"INFO [10:44:13] --- storagenode on port 19102 has config generation 4210, wanted is 4210\n",
|
305 |
+
"INFO [10:44:13] --- searchnode on port 19107 has config generation 4210, wanted is 4210\n",
|
306 |
+
"INFO [10:44:13] --- distributor on port 19111 has config generation 4210, wanted is 4210\n",
|
307 |
+
"INFO [10:44:13] --- metricsproxy-container on port 19092 has config generation 4210, wanted is 4210\n",
|
308 |
+
"INFO [10:44:13] h93281d.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n",
|
309 |
+
"INFO [10:44:13] --- platform vespa/cloud-tenant-rhel8:8.397.20\n",
|
310 |
+
"INFO [10:44:13] --- container-clustercontroller on port 19050 has config generation 4209, wanted is 4210\n",
|
311 |
+
"INFO [10:44:13] --- metricsproxy-container on port 19092 has config generation 4210, wanted is 4210\n",
|
312 |
+
"INFO [10:44:13] h93281b.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n",
|
313 |
+
"INFO [10:44:13] --- platform vespa/cloud-tenant-rhel8:8.397.20\n",
|
314 |
+
"INFO [10:44:13] --- logserver-container on port 4080 has config generation 4209, wanted is 4210\n",
|
315 |
+
"INFO [10:44:13] --- metricsproxy-container on port 19092 has config generation 4210, wanted is 4210\n",
|
316 |
+
"INFO [10:44:13] h95982a.dev.aws-us-east-1c.vespa-external.aws.oath.cloud: expected to be UP\n",
|
317 |
+
"INFO [10:44:13] --- platform vespa/cloud-tenant-rhel8:8.397.20\n",
|
318 |
+
"INFO [10:44:13] --- container on port 4080 has config generation 4209, wanted is 4210\n",
|
319 |
+
"INFO [10:44:13] --- metricsproxy-container on port 19092 has config generation 4209, wanted is 4210\n",
|
320 |
+
"INFO [10:44:23] Found endpoints:\n",
|
321 |
+
"INFO [10:44:23] - dev.aws-us-east-1c\n",
|
322 |
+
"INFO [10:44:23] |-- https://d14d3ce0.ba4a39d8.z.vespa-app.cloud/ (cluster 'fasthtml_container')\n",
|
323 |
+
"INFO [10:44:23] Deployment of new application complete!\n",
|
324 |
+
"Found mtls endpoint for fasthtml_container\n",
|
325 |
+
"URL: https://d14d3ce0.ba4a39d8.z.vespa-app.cloud/\n",
|
326 |
+
"Connecting to https://d14d3ce0.ba4a39d8.z.vespa-app.cloud/\n",
|
327 |
+
"Using Mutual TLS with key and cert to connect to Vespa endpoint https://d14d3ce0.ba4a39d8.z.vespa-app.cloud/\n",
|
328 |
+
"Application is up!\n",
|
329 |
+
"Finished deployment.\n"
|
330 |
+
]
|
331 |
+
}
|
332 |
+
],
|
333 |
+
"source": [
|
334 |
+
"app = vespa_cloud.deploy()"
|
335 |
+
]
|
336 |
+
},
|
337 |
+
{
|
338 |
+
"cell_type": "markdown",
|
339 |
+
"id": "aaae2f91",
|
340 |
+
"metadata": {},
|
341 |
+
"source": [
|
342 |
+
"If the deployment failed, it is possible you forgot to add the key in the Vespa Cloud Console in the `vespa auth api-key` step above.\n",
|
343 |
+
"\n",
|
344 |
+
"If you can authenticate, you should see lines like the following\n",
|
345 |
+
"\n",
|
346 |
+
"```\n",
|
347 |
+
" Deployment started in run 1 of dev-aws-us-east-1c for mytenant.hybridsearch.\n",
|
348 |
+
"```\n",
|
349 |
+
"\n",
|
350 |
+
"The deployment takes a few minutes the first time while Vespa Cloud sets up the resources for your Vespa application\n",
|
351 |
+
"\n",
|
352 |
+
"`app` now holds a reference to a [Vespa](https://pyvespa.readthedocs.io/en/latest/reference-api.html#vespa.application.Vespa) instance. We can access the\n",
|
353 |
+
"mTLS protected endpoint name using the control-plane (vespa_cloud) instance. This endpoint we can query and feed to (data plane access) using the\n",
|
354 |
+
"mTLS certificate generated in previous steps.\n"
|
355 |
+
]
|
356 |
+
},
|
357 |
+
{
|
358 |
+
"cell_type": "markdown",
|
359 |
+
"id": "sealed-mustang",
|
360 |
+
"metadata": {},
|
361 |
+
"source": [
|
362 |
+
"### Feeding documents to Vespa\n",
|
363 |
+
"\n",
|
364 |
+
"In this example we use the [HF Datasets](https://huggingface.co/docs/datasets/index) library to stream the\n",
|
365 |
+
"[BeIR/nfcorpus](https://huggingface.co/datasets/BeIR/nfcorpus) dataset and index in our newly deployed Vespa instance. Read\n",
|
366 |
+
"more about the [NFCorpus](https://www.cl.uni-heidelberg.de/statnlpgroup/nfcorpus/):\n",
|
367 |
+
"\n",
|
368 |
+
"> NFCorpus is a full-text English retrieval data set for Medical Information Retrieval.\n",
|
369 |
+
"\n",
|
370 |
+
"The following uses the [stream](https://huggingface.co/docs/datasets/stream) option of datasets to stream the data without\n",
|
371 |
+
"downloading all the contents locally. The `map` functionality allows us to convert the\n",
|
372 |
+
"dataset fields into the expected feed format for `pyvespa` which expects a dict with the keys `id` and `fields`:\n",
|
373 |
+
"\n",
|
374 |
+
"`{ \"id\": \"vespa-document-id\", \"fields\": {\"vespa_field\": \"vespa-field-value\"}}`\n"
|
375 |
+
]
|
376 |
+
},
|
377 |
+
{
|
378 |
+
"cell_type": "code",
|
379 |
+
"execution_count": 6,
|
380 |
+
"id": "9a49fa8e",
|
381 |
+
"metadata": {},
|
382 |
+
"outputs": [
|
383 |
+
{
|
384 |
+
"name": "stdout",
|
385 |
+
"output_type": "stream",
|
386 |
+
"text": [
|
387 |
+
"Found token endpoint for fasthtml_container\n",
|
388 |
+
"URL: https://d3f601e7.ba4a39d8.z.vespa-app.cloud/\n"
|
389 |
+
]
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"data": {
|
393 |
+
"text/plain": [
|
394 |
+
"'https://d3f601e7.ba4a39d8.z.vespa-app.cloud/'"
|
395 |
+
]
|
396 |
+
},
|
397 |
+
"execution_count": 6,
|
398 |
+
"metadata": {},
|
399 |
+
"output_type": "execute_result"
|
400 |
+
}
|
401 |
+
],
|
402 |
+
"source": [
|
403 |
+
"token_endpoint = vespa_cloud.get_token_endpoint()\n",
|
404 |
+
"token_endpoint"
|
405 |
+
]
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"cell_type": "markdown",
|
409 |
+
"id": "126c0c29",
|
410 |
+
"metadata": {},
|
411 |
+
"source": [
|
412 |
+
"Add this endpoint to your `.env.example` file:\n",
|
413 |
+
"\n",
|
414 |
+
"```bash\n",
|
415 |
+
"VESPA_APP_URL=https://d3f601e7.ba4a39d8.z.vespa-app.cloud/\n",
|
416 |
+
"```\n",
|
417 |
+
"\n",
|
418 |
+
"Remember to rename the file to `.env`.\n"
|
419 |
+
]
|
420 |
+
},
|
421 |
+
{
|
422 |
+
"cell_type": "markdown",
|
423 |
+
"id": "775f3dd4",
|
424 |
+
"metadata": {},
|
425 |
+
"source": [
|
426 |
+
"## Feed data\n"
|
427 |
+
]
|
428 |
+
},
|
429 |
+
{
|
430 |
+
"cell_type": "code",
|
431 |
+
"execution_count": 7,
|
432 |
+
"id": "executed-reservoir",
|
433 |
+
"metadata": {},
|
434 |
+
"outputs": [
|
435 |
+
{
|
436 |
+
"name": "stderr",
|
437 |
+
"output_type": "stream",
|
438 |
+
"text": [
|
439 |
+
"/Users/thomas/.pyenv/versions/3.9.19/envs/pyvespa-dev/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
440 |
+
" from .autonotebook import tqdm as notebook_tqdm\n"
|
441 |
+
]
|
442 |
+
}
|
443 |
+
],
|
444 |
+
"source": [
|
445 |
+
"from datasets import load_dataset\n",
|
446 |
+
"\n",
|
447 |
+
"dataset = load_dataset(\"BeIR/nfcorpus\", \"corpus\", split=\"corpus\", streaming=True)\n",
|
448 |
+
"vespa_feed = dataset.map(\n",
|
449 |
+
" lambda x: {\n",
|
450 |
+
" \"id\": x[\"_id\"],\n",
|
451 |
+
" \"fields\": {\"title\": x[\"title\"], \"body\": x[\"text\"], \"id\": x[\"_id\"]},\n",
|
452 |
+
" }\n",
|
453 |
+
")"
|
454 |
+
]
|
455 |
+
},
|
456 |
+
{
|
457 |
+
"cell_type": "markdown",
|
458 |
+
"id": "4f0ca33f",
|
459 |
+
"metadata": {},
|
460 |
+
"source": [
|
461 |
+
"Now we can feed to Vespa using `feed_iterable` which accepts any `Iterable` and an optional callback function where we can\n",
|
462 |
+
"check the outcome of each operation. The application is configured to use [embedding](https://docs.vespa.ai/en/embedding.html)\n",
|
463 |
+
"functionality, that produce a vector embedding using a concatenation of the title and the body input fields. This step is resource intensive.\n",
|
464 |
+
"\n",
|
465 |
+
"Read more about embedding inference in Vespa in the [Accelerating Transformer-based Embedding Retrieval with Vespa](https://blog.vespa.ai/accelerating-transformer-based-embedding-retrieval-with-vespa/)\n",
|
466 |
+
"blog post.\n",
|
467 |
+
"\n",
|
468 |
+
"Default node resources in Vespa Cloud have 2 v-cpu for the Dev Zone.\n"
|
469 |
+
]
|
470 |
+
},
|
471 |
+
{
|
472 |
+
"cell_type": "code",
|
473 |
+
"execution_count": 8,
|
474 |
+
"id": "bottom-memorabilia",
|
475 |
+
"metadata": {},
|
476 |
+
"outputs": [
|
477 |
+
{
|
478 |
+
"name": "stdout",
|
479 |
+
"output_type": "stream",
|
480 |
+
"text": [
|
481 |
+
"Using mtls_key_cert Authentication against endpoint https://d14d3ce0.ba4a39d8.z.vespa-app.cloud//ApplicationStatus\n"
|
482 |
+
]
|
483 |
+
}
|
484 |
+
],
|
485 |
+
"source": [
|
486 |
+
"from vespa.io import VespaResponse\n",
|
487 |
+
"\n",
|
488 |
+
"\n",
|
489 |
+
"def callback(response: VespaResponse, id: str):\n",
|
490 |
+
" if not response.is_successful():\n",
|
491 |
+
" print(f\"Error when feeding document {id}: {response.get_json()}\")\n",
|
492 |
+
"\n",
|
493 |
+
"\n",
|
494 |
+
"app.feed_iterable(vespa_feed, schema=\"doc\", namespace=\"tutorial\", callback=callback)"
|
495 |
+
]
|
496 |
+
},
|
497 |
+
{
|
498 |
+
"cell_type": "markdown",
|
499 |
+
"id": "336e339d",
|
500 |
+
"metadata": {},
|
501 |
+
"source": [
|
502 |
+
"### Run a test query\n"
|
503 |
+
]
|
504 |
+
},
|
505 |
+
{
|
506 |
+
"cell_type": "code",
|
507 |
+
"execution_count": 9,
|
508 |
+
"id": "11faeacf",
|
509 |
+
"metadata": {},
|
510 |
+
"outputs": [
|
511 |
+
{
|
512 |
+
"data": {
|
513 |
+
"text/plain": [
|
514 |
+
"{'root': {'id': 'toplevel',\n",
|
515 |
+
" 'relevance': 1.0,\n",
|
516 |
+
" 'fields': {'totalCount': 1387},\n",
|
517 |
+
" 'coverage': {'coverage': 100,\n",
|
518 |
+
" 'documents': 3633,\n",
|
519 |
+
" 'full': True,\n",
|
520 |
+
" 'nodes': 1,\n",
|
521 |
+
" 'results': 1,\n",
|
522 |
+
" 'resultsFull': 1},\n",
|
523 |
+
" 'children': [{'id': 'id:tutorial:doc::MED-2464',\n",
|
524 |
+
" 'relevance': 0.03200204813108039,\n",
|
525 |
+
" 'source': 'fasthtml_content',\n",
|
526 |
+
" 'fields': {'sddocname': 'doc',\n",
|
527 |
+
" 'body': \"BACKGROUND: In recent decades, children's diet quality has changed <hi>and</hi> <hi>asthma</hi> prevalence has increased, although it remains unclear if these events are associated. OBJECTIVE: To examine children's total <hi>and</hi> component diet quality <hi>and</hi> <hi>asthma</hi> <hi>and</hi> airway hyperresponsiveness (AHR), a proxy for <hi>asthma</hi> severity. METHODS: Food frequency questionnaires adapted from the Nurses' Health Study <hi>and</hi> supplemented with foods whose nutrients which have garnered interest of late in relation to <hi>asthma</hi> were administered. From these data, diet quality scores (total <hi>and</hi> component), based on the Youth Healthy Eating Index (YHEI adapted) were developed. <hi>Asthma</hi> assessments were performed by pediatric allergists <hi>and</hi> classified by atopic status: Allergic <hi>asthma</hi> (≥1 positive skin prick test to common allergens >3 mm compared to negative control) versus non-allergic <hi>asthma</hi> (negative skin prick test). AHR was assessed via the Cockcroft technique. Participants included 270 boys (30% with <hi>asthma</hi>) <hi>and</hi> 206 girls (33% with <hi>asthma</hi>) involved in the 1995 Manitoba Prospective Cohort Study nested case-control study. Logistic regression was used to examine associations between diet quality <hi>and</hi> <hi>asthma</hi>, <hi>and</hi> multinomial logistic regression was used to examine associations between diet quality <hi>and</hi> AHR. RESULTS: Four hundred seventy six children (56.7% boys) were seen at 12.6 ± 0.5 years. <hi>Asthma</hi> <hi>and</hi> AHR prevalence were 26.2 <hi>and</hi> 53.8%, respectively. In fully adjusted models, high <hi>vegetable</hi> intake was protective against allergic <hi>asthma</hi> (OR 0.49; 95% CI 0.29-0.84; P < 0.009) <hi>and</hi> moderate/severe AHR (OR 0.58; 0.37-0.91; P < 0.019). CONCLUSIONS: <hi>Vegetable</hi> intake is inversely associated with allergic <hi>asthma</hi> <hi>and</hi> moderate/severe AHR. Copyright © 2012 Wiley Periodicals, Inc.\",\n",
|
528 |
+
" 'documentid': 'id:tutorial:doc::MED-2464',\n",
|
529 |
+
" 'id': 'MED-2464',\n",
|
530 |
+
" 'title': 'Low vegetable intake is associated with allergic asthma and moderate-to-severe airway hyperresponsiveness.'}},\n",
|
531 |
+
" {'id': 'id:tutorial:doc::MED-2450',\n",
|
532 |
+
" 'relevance': 0.03177805800756621,\n",
|
533 |
+
" 'source': 'fasthtml_content',\n",
|
534 |
+
" 'fields': {'sddocname': 'doc',\n",
|
535 |
+
" 'body': \"Background Atopy is not uncommon among children living in rural Crete, but wheeze <hi>and</hi> rhinitis are rare. A study was undertaken to examine whether this discrepancy could be attributed to a high consumption of fresh fruit <hi>and</hi> <hi>vegetables</hi> or adherence to a traditional Mediterranean diet. Methods A cross‐sectional survey was performed in 690 children aged 7–18\\u2005years in rural Crete. Parents completed a questionnaire on their child's respiratory <hi>and</hi> allergic symptoms <hi>and</hi> a 58‐item food frequency questionnaire. Adherence to a Mediterranean diet was measured using a scale with 12 dietary items. Children underwent skin prick tests with 10 common aeroallergens. Results 80% of children ate fresh fruit (<hi>and</hi> 68% <hi>vegetables</hi>) at least twice a day. The intake of grapes, oranges, apples, <hi>and</hi> fresh tomatoes—the main local products in Crete—had no association with atopy but was protective for wheezing <hi>and</hi> rhinitis. A high consumption of nuts was found to be inversely associated with wheezing (OR 0.46; 95% CI 0.20 to 0.98), whereas margarine increased the risk of both wheeze (OR 2.19; 95% CI 1.01 to 4.82) <hi>and</hi> allergic rhinitis (OR 2.10; 95% CI 1.31 to 3.37). A high level of adherence to the Mediterranean diet was protective for allergic rhinitis (OR 0.34; 95% CI 0.18 to 0.64) while a more modest protection was observed for wheezing <hi>and</hi> atopy. Conclusion The results of this study suggest a beneficial effect of commonly consumed <hi>fruits</hi>, <hi>vegetables</hi> <hi>and</hi> nuts, <hi>and</hi> of a high adherence to a traditional Mediterranean diet during childhood on symptoms of <hi>asthma</hi> <hi>and</hi> rhinitis. Diet may explain the relative lack of allergic symptoms in this population.\",\n",
|
536 |
+
" 'documentid': 'id:tutorial:doc::MED-2450',\n",
|
537 |
+
" 'id': 'MED-2450',\n",
|
538 |
+
" 'title': 'Protective effect of fruits, vegetables and the Mediterranean diet on asthma and allergies among children in Crete'}},\n",
|
539 |
+
" {'id': 'id:tutorial:doc::MED-2458',\n",
|
540 |
+
" 'relevance': 0.030776515151515152,\n",
|
541 |
+
" 'source': 'fasthtml_content',\n",
|
542 |
+
" 'fields': {'sddocname': 'doc',\n",
|
543 |
+
" 'body': 'BACKGROUND: Antioxidant-rich diets are associated with reduced <hi>asthma</hi> prevalence in epidemiologic studies. We previously showed that short-term manipulation of antioxidant defenses leads to changes in <hi>asthma</hi> outcomes. OBJECTIVE: The objective was to investigate the effects of a high-antioxidant diet compared with those of a low-antioxidant diet, with or without lycopene supplementation, in <hi>asthma</hi>. DESIGN: <hi>Asthmatic</hi> adults (n = 137) were randomly assigned to a high-antioxidant diet (5 servings of <hi>vegetables</hi> <hi>and</hi> 2 servings of fruit daily; n = 46) or a low-antioxidant diet (≤2 servings of <hi>vegetables</hi> <hi>and</hi> 1 serving of fruit daily; n = 91) for 14 d <hi>and</hi> then commenced a parallel, randomized, controlled supplementation trial. Subjects who consumed the high-antioxidant diet received placebo. Subjects who consumed the low-antioxidant diet received placebo or tomato extract (45 mg lycopene/d). The intervention continued until week 14 or until an exacerbation occurred. RESULTS: After 14 d, subjects consuming the low-antioxidant diet had a lower percentage predicted forced expiratory volume in 1 s <hi>and</hi> percentage predicted forced vital capacity than did those consuming the high-antioxidant diet. Subjects in the low-antioxidant diet group had increased plasma C-reactive protein at week 14. At the end of the trial, time to exacerbation was greater in the high-antioxidant than in the low-antioxidant diet group, <hi>and</hi> the low-antioxidant diet group was 2.26 (95% CI: 1.04, 4.91; P = 0.039) times as likely to exacerbate. Of the subjects in the low-antioxidant diet group, no difference in airway or systemic inflammation or clinical outcomes was observed between the groups that consumed the tomato extract <hi>and</hi> those who consumed placebo. CONCLUSIONS: Modifying the dietary intake of carotenoids alters clinical <hi>asthma</hi> outcomes. Improvements were evident only after increased fruit <hi>and</hi> <hi>vegetable</hi> intake, which suggests that whole-food interventions are most effective. This trial was registered at http://www.actr.org.au as ACTRN012606000286549.',\n",
|
544 |
+
" 'documentid': 'id:tutorial:doc::MED-2458',\n",
|
545 |
+
" 'id': 'MED-2458',\n",
|
546 |
+
" 'title': 'Manipulating antioxidant intake in asthma: a randomized controlled trial.'}},\n",
|
547 |
+
" {'id': 'id:tutorial:doc::MED-2461',\n",
|
548 |
+
" 'relevance': 0.03055037313432836,\n",
|
549 |
+
" 'source': 'fasthtml_content',\n",
|
550 |
+
" 'fields': {'sddocname': 'doc',\n",
|
551 |
+
" 'body': 'This study aimed to evaluate the association of diet with respiratory symptoms <hi>and</hi> <hi>asthma</hi> in schoolchildren in Taipei, Taiwan. An in-class interview survey elicited experiences of <hi>asthma</hi> <hi>and</hi> respiratory symptoms <hi>and</hi> consumption frequencies of the major food categories in 2290 fifth graders. Respiratory symptoms surveyed included persistent cough, chest tightness, wheezing with cold, wheezing without cold, dyspnea-associated wheezing, <hi>and</hi> exercise-induced cough or wheezing. Results showed that the consumption of sweetened beverages had the strongest association with respiratory symptoms <hi>and</hi> was positively associated with six of the seven respiratory symptoms (all p < 0.05). The adjusted odds ratios (aOR) ranged from 1.05 (95% confidence interval (CI = 1.01-1.09) for exercise-induced cough to 1.09 (95% CI = 1.03-1.16) for wheezing without cold. Egg consumption was associated with 5 of the 7 respiratory symptoms. Consumptions of seafood, soy products, <hi>and</hi> <hi>fruits</hi> were each negatively associated with one of the seven respiratory symptoms (all p < 0.05). Consumption of seafood was negatively associated with physician-diagnosed <hi>asthma</hi> <hi>and</hi> consumptions of sweetened beverages <hi>and</hi> eggs were positively associated with suspected <hi>asthma</hi> (p < 0.05). In conclusion, the study suggests that diet is associated with the respiratory symptoms in schoolchildren in Taipei. Consumptions of sweetened beverages <hi>and</hi> eggs are associated with increased risk of respiratory symptoms <hi>and</hi> <hi>asthma</hi> whereas consumptions of soy products <hi>and</hi> <hi>fruits</hi> are associated with reduced risk of respiratory symptoms.',\n",
|
552 |
+
" 'documentid': 'id:tutorial:doc::MED-2461',\n",
|
553 |
+
" 'id': 'MED-2461',\n",
|
554 |
+
" 'title': 'The association of diet with respiratory symptoms and asthma in schoolchildren in Taipei, Taiwan.'}},\n",
|
555 |
+
" {'id': 'id:tutorial:doc::MED-5072',\n",
|
556 |
+
" 'relevance': 0.027757078986587184,\n",
|
557 |
+
" 'source': 'fasthtml_content',\n",
|
558 |
+
" 'fields': {'sddocname': 'doc',\n",
|
559 |
+
" 'body': 'Antioxidant-rich diets are associated with reduced <hi>asthma</hi> prevalence. However, direct evidence that altering intake of antioxidant-rich foods affects <hi>asthma</hi> is lacking. The objective was to investigate changes in <hi>asthma</hi> <hi>and</hi> airway inflammation resulting from a low antioxidant diet <hi>and</hi> subsequent use of lycopene-rich treatments. <hi>Asthmatic</hi> adults (n=32) consumed a low antioxidant diet for 10 days, then commenced a randomized, cross-over trial involving 3 x 7 day treatment arms (placebo, tomato extract (45 mg lycopene/day) <hi>and</hi> tomato juice (45 mg lycopene/day)). With consumption of a low antioxidant diet, plasma carotenoid concentrations decreased, <hi>Asthma</hi> Control Score worsened, %FEV(1) <hi>and</hi> %FVC decreased <hi>and</hi> %sputum neutrophils increased. Treatment with both tomato juice <hi>and</hi> extract reduced airway neutrophil influx. Treatment with tomato extract also reduced sputum neutrophil elastase activity. In conclusion, dietary antioxidant consumption modifies clinical <hi>asthma</hi> outcomes. Changing dietary antioxidant intake may be contributing to rising <hi>asthma</hi> prevalence. Lycopene-rich supplements should be further investigated as a therapeutic intervention.',\n",
|
560 |
+
" 'documentid': 'id:tutorial:doc::MED-5072',\n",
|
561 |
+
" 'id': 'MED-5072',\n",
|
562 |
+
" 'title': 'Lycopene-rich treatments modify noneosinophilic airway inflammation in asthma: proof of concept.'}}]}}"
|
563 |
+
]
|
564 |
+
},
|
565 |
+
"execution_count": 9,
|
566 |
+
"metadata": {},
|
567 |
+
"output_type": "execute_result"
|
568 |
+
}
|
569 |
+
],
|
570 |
+
"source": [
|
571 |
+
"with app.syncio(connections=1) as session:\n",
|
572 |
+
" query = \"How Fruits and Vegetables Can Treat Asthma?\"\n",
|
573 |
+
" response = session.query(\n",
|
574 |
+
" yql=\"select * from sources * where userQuery() or ({targetHits:1000}nearestNeighbor(embedding,q)) limit 5\",\n",
|
575 |
+
" query=query,\n",
|
576 |
+
" ranking=\"fusion\",\n",
|
577 |
+
" body={\"input.query(q)\": f\"embed({query})\"},\n",
|
578 |
+
" )\n",
|
579 |
+
" assert response.is_successful()\n",
|
580 |
+
"response.json"
|
581 |
+
]
|
582 |
+
},
|
583 |
+
{
|
584 |
+
"cell_type": "markdown",
|
585 |
+
"id": "072a12ac",
|
586 |
+
"metadata": {},
|
587 |
+
"source": [
|
588 |
+
"Now, you should be all set to run your frontend against the Vespa Cloud application.\n"
|
589 |
+
]
|
590 |
+
}
|
591 |
+
],
|
592 |
+
"metadata": {
|
593 |
+
"kernelspec": {
|
594 |
+
"display_name": "Python 3 (ipykernel)",
|
595 |
+
"language": "python",
|
596 |
+
"name": "python3"
|
597 |
+
},
|
598 |
+
"language_info": {
|
599 |
+
"codemirror_mode": {
|
600 |
+
"name": "ipython",
|
601 |
+
"version": 3
|
602 |
+
},
|
603 |
+
"file_extension": ".py",
|
604 |
+
"mimetype": "text/x-python",
|
605 |
+
"name": "python",
|
606 |
+
"nbconvert_exporter": "python",
|
607 |
+
"pygments_lexer": "ipython3",
|
608 |
+
"version": "3.9.19"
|
609 |
+
},
|
610 |
+
"nbsphinx": {
|
611 |
+
"allow_errors": true
|
612 |
+
},
|
613 |
+
"vscode": {
|
614 |
+
"interpreter": {
|
615 |
+
"hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
|
616 |
+
}
|
617 |
+
}
|
618 |
+
},
|
619 |
+
"nbformat": 4,
|
620 |
+
"nbformat_minor": 5
|
621 |
+
}
|
main.py
ADDED
@@ -0,0 +1,713 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from fasthtml_hf import setup_hf_backup
|
2 |
+
from fasthtml.common import (
|
3 |
+
picolink,
|
4 |
+
serve,
|
5 |
+
Div,
|
6 |
+
Title,
|
7 |
+
Main,
|
8 |
+
Input,
|
9 |
+
Button,
|
10 |
+
A,
|
11 |
+
Section,
|
12 |
+
H2,
|
13 |
+
Ul,
|
14 |
+
Li,
|
15 |
+
P,
|
16 |
+
Img,
|
17 |
+
Details,
|
18 |
+
MarkdownJS,
|
19 |
+
HighlightJS,
|
20 |
+
Summary,
|
21 |
+
Script,
|
22 |
+
I,
|
23 |
+
Form,
|
24 |
+
RedirectResponse,
|
25 |
+
dataclass,
|
26 |
+
Favicon,
|
27 |
+
database,
|
28 |
+
get_key,
|
29 |
+
Table,
|
30 |
+
Thead,
|
31 |
+
Tr,
|
32 |
+
Th,
|
33 |
+
Tbody,
|
34 |
+
Td,
|
35 |
+
FileResponse,
|
36 |
+
fast_app,
|
37 |
+
Beforeware,
|
38 |
+
Hidden,
|
39 |
+
Request,
|
40 |
+
)
|
41 |
+
from fasthtml.components import Nav, Article, Header, Mark
|
42 |
+
from fasthtml.pico import Search
|
43 |
+
from starlette.middleware import Middleware
|
44 |
+
from starlette.middleware.base import BaseHTTPMiddleware
|
45 |
+
from starlette.middleware.sessions import SessionMiddleware
|
46 |
+
from vespa.application import Vespa
|
47 |
+
import json
|
48 |
+
import os
|
49 |
+
import re
|
50 |
+
import time
|
51 |
+
from hmac import compare_digest
|
52 |
+
from io import StringIO
|
53 |
+
import csv
|
54 |
+
import tempfile
|
55 |
+
|
56 |
+
DEV_MODE = True
|
57 |
+
|
58 |
+
if DEV_MODE:
|
59 |
+
print("Running in DEV_MODE - Hot reload enabled")
|
60 |
+
from dotenv import load_dotenv
|
61 |
+
|
62 |
+
load_dotenv()
|
63 |
+
|
64 |
+
ADMIN_NAME = os.getenv("ADMIN_NAME", "admin")
|
65 |
+
ADMIN_PWD = os.getenv("ADMIN_PWD", "admin")
|
66 |
+
|
67 |
+
vespa_app: Vespa = Vespa(
|
68 |
+
url=os.getenv("VESPA_APP_URL"),
|
69 |
+
vespa_cloud_secret_token=os.getenv("VESPA_CLOUD_SECRET_TOKEN"),
|
70 |
+
)
|
71 |
+
status = vespa_app.get_application_status()
|
72 |
+
if status is None:
|
73 |
+
print("Could not connect to Vespa application")
|
74 |
+
else:
|
75 |
+
print("Connected to Vespa application!")
|
76 |
+
|
77 |
+
fa = Script(src="https://kit.fontawesome.com/664eb1a115.js", crossorigin="anonymous")
|
78 |
+
favicon = Favicon(
|
79 |
+
"https://search.vespa.ai/favicon.ico",
|
80 |
+
"https://search.vespa.ai/favicon.ico",
|
81 |
+
)
|
82 |
+
DB_FILE = "db/vespa.db"
|
83 |
+
db = database(DB_FILE)
|
84 |
+
queries = db.t.queries
|
85 |
+
if queries not in db.t:
|
86 |
+
# You can pass a dict, or kwargs, to most MiniDataAPI methods.
|
87 |
+
queries.create(dict(qid=int, query=str, sess_id=str, timestamp=int), pk="qid")
|
88 |
+
# Add autoincrement to the qid column
|
89 |
+
db.query("ALTER TABLE queries ADD COLUMN qid INTEGER PRIMARY KEY AUTOINCREMENT")
|
90 |
+
Query = queries.dataclass()
|
91 |
+
|
92 |
+
# Add a classmethod to the Query dataclass to convert timestamp field to a human readable format
|
93 |
+
Query.get_datetime = lambda self: time.strftime(
|
94 |
+
"%Y-%m-%d %H:%M:%S", time.localtime(self.timestamp)
|
95 |
+
)
|
96 |
+
|
97 |
+
# Status code 303 is a redirect that can change POST to GET,
|
98 |
+
# so it's appropriate for a login page.
|
99 |
+
login_redir = RedirectResponse("/login", status_code=303)
|
100 |
+
|
101 |
+
|
102 |
+
def user_auth_before(req, sess):
|
103 |
+
# The `auth` key in the request scope is automatically provided
|
104 |
+
# to any handler which requests it, and can not be injected
|
105 |
+
# by the user using query params, cookies, etc, so it should
|
106 |
+
# be secure to use.
|
107 |
+
print(f"Session Data before route: {sess}")
|
108 |
+
auth = req.scope["auth"] = sess.get("auth", None)
|
109 |
+
print(f"Auth: {auth}")
|
110 |
+
if not auth:
|
111 |
+
return login_redir
|
112 |
+
|
113 |
+
|
114 |
+
headers = (picolink, MarkdownJS(), HighlightJS(langs=["json", "python"]), favicon, fa)
|
115 |
+
|
116 |
+
# Sesskey
|
117 |
+
sess_key_path = "session/.sesskey"
|
118 |
+
# Make sure session directory exists
|
119 |
+
os.makedirs("session", exist_ok=True)
|
120 |
+
|
121 |
+
# Middleware
|
122 |
+
|
123 |
+
|
124 |
+
class XFrameOptionsMiddleware(BaseHTTPMiddleware):
|
125 |
+
async def dispatch(self, request, call_next):
|
126 |
+
response = await call_next(request)
|
127 |
+
response.headers["X-Frame-Options"] = "ALLOW-FROM https://huggingface.co/"
|
128 |
+
return response
|
129 |
+
|
130 |
+
|
131 |
+
middlewares = [
|
132 |
+
Middleware(
|
133 |
+
SessionMiddleware,
|
134 |
+
secret_key=get_key(fname=sess_key_path),
|
135 |
+
max_age=3600,
|
136 |
+
),
|
137 |
+
Middleware(XFrameOptionsMiddleware),
|
138 |
+
]
|
139 |
+
bware = Beforeware(
|
140 |
+
user_auth_before,
|
141 |
+
skip=[
|
142 |
+
r"/favicon\.ico",
|
143 |
+
r"/static/.*",
|
144 |
+
r".*\.css",
|
145 |
+
r".*\.js",
|
146 |
+
"/",
|
147 |
+
"/login",
|
148 |
+
"/search",
|
149 |
+
"/document/.*",
|
150 |
+
"/expand/.*",
|
151 |
+
"/source",
|
152 |
+
],
|
153 |
+
)
|
154 |
+
|
155 |
+
app, rt = fast_app(
|
156 |
+
before=bware,
|
157 |
+
live=DEV_MODE,
|
158 |
+
hdrs=headers,
|
159 |
+
middleware=middlewares,
|
160 |
+
key_fname=sess_key_path,
|
161 |
+
same_site="None",
|
162 |
+
)
|
163 |
+
|
164 |
+
|
165 |
+
sesskey = get_key(fname=sess_key_path)
|
166 |
+
print(f"Session key: {sesskey}")
|
167 |
+
|
168 |
+
|
169 |
+
def get_navbar(admin: bool):
|
170 |
+
print(f"In get_navbar: {admin}")
|
171 |
+
bar = Nav(
|
172 |
+
Ul(
|
173 |
+
Li(
|
174 |
+
A(
|
175 |
+
Img(src="https://vespa.ai/assets/vespa-ai-logo-heather.svg"),
|
176 |
+
href="https://cloud.vespa.ai",
|
177 |
+
target="_blank",
|
178 |
+
style="margin: 10px;",
|
179 |
+
),
|
180 |
+
)
|
181 |
+
),
|
182 |
+
Ul(H2("Vespa-fastHTML demo")),
|
183 |
+
Ul(
|
184 |
+
# A question mark icon with link to an about page
|
185 |
+
A(
|
186 |
+
I(cls="fa fa-question-circle fa-2x"),
|
187 |
+
href="/about",
|
188 |
+
style="margin: 10px;",
|
189 |
+
title="About this app",
|
190 |
+
),
|
191 |
+
A(
|
192 |
+
I(cls="fab fa-slack fa-2x"),
|
193 |
+
href="https://slack.vespa.ai/",
|
194 |
+
style="margin: 10px;",
|
195 |
+
target="_blank",
|
196 |
+
title="Join Vespa Slack channel",
|
197 |
+
),
|
198 |
+
A(
|
199 |
+
I(cls="fab fa-github fa-2x"),
|
200 |
+
href="https://github.com/thomasht86/fasthtml-vespa-demo/",
|
201 |
+
style="margin: 10px;",
|
202 |
+
target="_blank",
|
203 |
+
title="View source code on GitHub",
|
204 |
+
),
|
205 |
+
A(
|
206 |
+
I(cls="fa fa-code fa-2x"),
|
207 |
+
href="/source",
|
208 |
+
style="margin: 10px;",
|
209 |
+
title="View source code",
|
210 |
+
),
|
211 |
+
# Login icon (link to /login) show tooltip on hover. MAke it hidden if admin is logged in
|
212 |
+
A(
|
213 |
+
I(cls="fa fa-shield fa-2x"),
|
214 |
+
href="/login" if not admin else "/admin",
|
215 |
+
style="margin: 10px;",
|
216 |
+
title="Admin login",
|
217 |
+
),
|
218 |
+
# Logout icon if admin is logged in
|
219 |
+
A(
|
220 |
+
I(cls="fa fa-sign-out fa-2x"),
|
221 |
+
href="/logout",
|
222 |
+
style="margin: 10px;" if admin else "display: none;",
|
223 |
+
title="Logout",
|
224 |
+
),
|
225 |
+
),
|
226 |
+
# 10px margin to right of navbar
|
227 |
+
style="margin-right: 10px;",
|
228 |
+
)
|
229 |
+
return bar
|
230 |
+
|
231 |
+
|
232 |
+
@app.route("/")
|
233 |
+
def get(sess):
|
234 |
+
# Can not get auth directly, as it is skipped in beforeware
|
235 |
+
auth = sess.get("auth", False)
|
236 |
+
return (
|
237 |
+
Title("Vespa demo"),
|
238 |
+
get_navbar(auth),
|
239 |
+
Main(
|
240 |
+
# Search bar
|
241 |
+
Search(
|
242 |
+
Input(
|
243 |
+
type="search",
|
244 |
+
placeholder="Ask/search for medical information?",
|
245 |
+
id="userquery",
|
246 |
+
),
|
247 |
+
# Get search results on button click with search-input as query parameter
|
248 |
+
Button(
|
249 |
+
"Search",
|
250 |
+
hx_get="/search",
|
251 |
+
hx_include="#userquery",
|
252 |
+
hx_target="#results",
|
253 |
+
hx_indicator="#spinner",
|
254 |
+
),
|
255 |
+
style="margin: 10% 10px 0 0;",
|
256 |
+
),
|
257 |
+
# Section(
|
258 |
+
# Input(
|
259 |
+
# id="suggestion-input",
|
260 |
+
# list="search-options",
|
261 |
+
# placeholder="Search options",
|
262 |
+
# ),
|
263 |
+
# Datalist(
|
264 |
+
# *[
|
265 |
+
# Option(
|
266 |
+
# "Covid-19",
|
267 |
+
# value="Covid-19",
|
268 |
+
# ),
|
269 |
+
# Option(
|
270 |
+
# "Vaccine",
|
271 |
+
# value="Vaccine",
|
272 |
+
# ),
|
273 |
+
# ],
|
274 |
+
# id="search-options",
|
275 |
+
# ),
|
276 |
+
# id="suggestions",
|
277 |
+
# ),
|
278 |
+
Section(
|
279 |
+
Div(
|
280 |
+
A(
|
281 |
+
id="spinner",
|
282 |
+
aria_busy="true",
|
283 |
+
cls="htmx-indicator",
|
284 |
+
style="font-size: 2em;",
|
285 |
+
),
|
286 |
+
style="text-align: center; margin-top: 40px;",
|
287 |
+
),
|
288 |
+
id="results",
|
289 |
+
hx_swap="innerHTML",
|
290 |
+
style="margin: 20px;",
|
291 |
+
),
|
292 |
+
style="margin: 0 auto; width: 70%;",
|
293 |
+
id="main",
|
294 |
+
),
|
295 |
+
)
|
296 |
+
|
297 |
+
|
298 |
+
@dataclass
|
299 |
+
class Login:
|
300 |
+
name: str
|
301 |
+
pwd: str
|
302 |
+
|
303 |
+
|
304 |
+
@app.get("/login")
|
305 |
+
def get_login_form(sess, error: bool = False):
|
306 |
+
auth = sess.get("auth", False)
|
307 |
+
frm = Form(
|
308 |
+
Input(id="name", placeholder="Name"),
|
309 |
+
Input(id="pwd", type="password", placeholder="Password"),
|
310 |
+
Button("login"),
|
311 |
+
action="/login",
|
312 |
+
method="post",
|
313 |
+
)
|
314 |
+
err_msg = P("Incorrect password", style="color: red;") if error else ""
|
315 |
+
return (
|
316 |
+
Title("Admin login"),
|
317 |
+
get_navbar(auth),
|
318 |
+
Main(
|
319 |
+
err_msg,
|
320 |
+
frm,
|
321 |
+
style="width: 50%; margin: 10% auto;",
|
322 |
+
),
|
323 |
+
)
|
324 |
+
|
325 |
+
|
326 |
+
@app.post("/login")
|
327 |
+
def post(login: Login, sess):
|
328 |
+
if not compare_digest(ADMIN_PWD.encode("utf-8"), login.pwd.encode("utf-8")):
|
329 |
+
# Incorrect password - add error message
|
330 |
+
return RedirectResponse("/login?error=True", status_code=303)
|
331 |
+
sess["auth"] = True
|
332 |
+
print(f"Sess after login: {sess}")
|
333 |
+
return RedirectResponse("/admin", status_code=303)
|
334 |
+
|
335 |
+
|
336 |
+
@app.get("/logout")
|
337 |
+
def logout(sess):
|
338 |
+
sess["auth"] = False
|
339 |
+
return RedirectResponse("/")
|
340 |
+
|
341 |
+
|
342 |
+
def replace_hi_with_strong(text):
|
343 |
+
parts = re.split(r"(<hi>|</hi>)", text)
|
344 |
+
elements = []
|
345 |
+
open_tag = False
|
346 |
+
for part in parts:
|
347 |
+
if part == "<hi>":
|
348 |
+
open_tag = True
|
349 |
+
elif part == "</hi>":
|
350 |
+
open_tag = False
|
351 |
+
elif open_tag:
|
352 |
+
elements.append(Mark(part))
|
353 |
+
else:
|
354 |
+
elements.append(part)
|
355 |
+
return elements
|
356 |
+
|
357 |
+
|
358 |
+
def log_query_to_db(query, sess):
|
359 |
+
return queries.insert(
|
360 |
+
Query(query=query, sess_id=sesskey, timestamp=int(time.time()))
|
361 |
+
)
|
362 |
+
|
363 |
+
|
364 |
+
def parse_results(records):
|
365 |
+
return [
|
366 |
+
Article(
|
367 |
+
Header(
|
368 |
+
H2(
|
369 |
+
A(
|
370 |
+
result["title"],
|
371 |
+
hx_get=f"/document/{result['id']}",
|
372 |
+
hx_target="#results",
|
373 |
+
)
|
374 |
+
)
|
375 |
+
),
|
376 |
+
Div(
|
377 |
+
P(
|
378 |
+
*replace_hi_with_strong(
|
379 |
+
result["body"][:300] + "..."
|
380 |
+
), # Display first 300 characters of body
|
381 |
+
),
|
382 |
+
Div(
|
383 |
+
# Button with "Show more" - center align
|
384 |
+
Button(
|
385 |
+
"Show more",
|
386 |
+
hx_post=f"/expand/{result['id']}?expand=true",
|
387 |
+
hx_target=f"#{result['id']}",
|
388 |
+
hx_include=f"#{result['id']}-full",
|
389 |
+
cls="outline secondary",
|
390 |
+
# Style to fill whole width of parent div
|
391 |
+
style="width: 100%;",
|
392 |
+
),
|
393 |
+
style="text-align: center;",
|
394 |
+
),
|
395 |
+
id=result["id"],
|
396 |
+
),
|
397 |
+
Hidden(result["body"], id=f"{result['id']}-full"),
|
398 |
+
)
|
399 |
+
for result in records
|
400 |
+
]
|
401 |
+
|
402 |
+
|
403 |
+
@app.post("/expand/{docid}")
|
404 |
+
async def expand(request: Request, docid: str, expand: bool):
|
405 |
+
print(f"Expanding {docid}")
|
406 |
+
form_data = await request.form()
|
407 |
+
result = form_data.get(f"{docid}-full")
|
408 |
+
if not expand:
|
409 |
+
result = result[:300] + "..."
|
410 |
+
return (
|
411 |
+
Div(
|
412 |
+
P(
|
413 |
+
*replace_hi_with_strong(result), # Display full body
|
414 |
+
),
|
415 |
+
Div(
|
416 |
+
# Button with "Show less" - center align
|
417 |
+
Button(
|
418 |
+
"Show less" if expand else "Show more",
|
419 |
+
hx_post=f"/expand/{docid}?expand="
|
420 |
+
+ ("false" if expand else "true"),
|
421 |
+
hx_target=f"#{docid}",
|
422 |
+
hx_include=f"#{docid}-full",
|
423 |
+
cls="outline secondary",
|
424 |
+
# Style to fill whole width of parent div
|
425 |
+
style="width: 100%;",
|
426 |
+
),
|
427 |
+
style="text-align: center;",
|
428 |
+
),
|
429 |
+
id=docid,
|
430 |
+
),
|
431 |
+
)
|
432 |
+
|
433 |
+
|
434 |
+
@app.get("/search")
|
435 |
+
async def search(userquery: str, sess):
|
436 |
+
print(sess)
|
437 |
+
if "queries" not in sess:
|
438 |
+
sess["queries"] = []
|
439 |
+
else:
|
440 |
+
sess["queries"].append(userquery)
|
441 |
+
print(f"Searching for: {userquery}")
|
442 |
+
log_query_to_db(userquery, sess)
|
443 |
+
async with vespa_app.asyncio() as session:
|
444 |
+
resp = await session.query(
|
445 |
+
yql="select * from sources * where userQuery() or ({targetHits:1000}nearestNeighbor(embedding,q)) limit 10;",
|
446 |
+
query=userquery,
|
447 |
+
hits=10,
|
448 |
+
ranking="fusion",
|
449 |
+
body={"input.query(q)": f"embed({userquery})"},
|
450 |
+
)
|
451 |
+
records = []
|
452 |
+
fields = ["id", "title", "body"]
|
453 |
+
for hit in resp.hits:
|
454 |
+
record = {}
|
455 |
+
for field in fields:
|
456 |
+
record[field] = hit["fields"][field]
|
457 |
+
records.append(record)
|
458 |
+
results = parse_results(records)
|
459 |
+
json_dump = json.dumps(resp.get_json(), indent=4)
|
460 |
+
return Div(
|
461 |
+
# Accordion (with Details)
|
462 |
+
Details(
|
463 |
+
Summary("Full JSON response"),
|
464 |
+
Div(
|
465 |
+
f"""```json\n{json_dump}\n```""",
|
466 |
+
cls="marked",
|
467 |
+
),
|
468 |
+
),
|
469 |
+
H2(
|
470 |
+
"Search Results",
|
471 |
+
),
|
472 |
+
Div(
|
473 |
+
*results,
|
474 |
+
id="all-searchresults",
|
475 |
+
),
|
476 |
+
)
|
477 |
+
|
478 |
+
|
479 |
+
@app.get("/download_csv")
|
480 |
+
def download_csv(auth):
|
481 |
+
queries_dict = list(db.query("SELECT * FROM queries"))
|
482 |
+
queries = [Query(**query) for query in queries_dict]
|
483 |
+
|
484 |
+
# Create CSV in memory
|
485 |
+
csv_file = StringIO()
|
486 |
+
csv_writer = csv.writer(csv_file)
|
487 |
+
csv_writer.writerow(["Query", "Session ID", "Timestamp"])
|
488 |
+
for query in queries:
|
489 |
+
csv_writer.writerow([query.query, query.sess_id, query.timestamp])
|
490 |
+
|
491 |
+
# Move to the beginning of the StringIO object
|
492 |
+
csv_file.seek(0)
|
493 |
+
|
494 |
+
# Save CSV to a temporary file
|
495 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
496 |
+
temp_file.write(csv_file.getvalue().encode("utf-8"))
|
497 |
+
temp_file.close()
|
498 |
+
|
499 |
+
return FileResponse(
|
500 |
+
temp_file.name,
|
501 |
+
filename="queries.csv",
|
502 |
+
media_type="text/csv",
|
503 |
+
content_disposition_type="attachment",
|
504 |
+
)
|
505 |
+
|
506 |
+
|
507 |
+
@app.get("/admin")
|
508 |
+
def get_admin(auth, page: int = 1):
|
509 |
+
limit = 15
|
510 |
+
offset = (page - 1) * limit
|
511 |
+
total_queries_result = list(
|
512 |
+
db.query("SELECT COUNT(*) AS count FROM queries ORDER BY timestamp DESC")
|
513 |
+
)
|
514 |
+
total_queries = total_queries_result[0]["count"]
|
515 |
+
queries_dict = list(
|
516 |
+
db.query(f"SELECT * FROM queries LIMIT {limit} OFFSET {offset}")
|
517 |
+
)
|
518 |
+
queries = [Query(**query) for query in queries_dict]
|
519 |
+
|
520 |
+
total_pages = (
|
521 |
+
total_queries + limit - 1
|
522 |
+
) // limit # Calculate total number of pages
|
523 |
+
|
524 |
+
# Define the range of pages to display
|
525 |
+
page_window = 5 # Number of pages to display at once
|
526 |
+
start_page = max(1, page - page_window // 2)
|
527 |
+
end_page = min(total_pages, start_page + page_window - 1)
|
528 |
+
|
529 |
+
# Adjust the start and end pages if they exceed the limits
|
530 |
+
if end_page - start_page < page_window:
|
531 |
+
start_page = max(1, end_page - page_window + 1)
|
532 |
+
|
533 |
+
# Pagination controls with "First", "Previous", "Next", and "Last"
|
534 |
+
pagination_controls = Div(
|
535 |
+
A(
|
536 |
+
"First",
|
537 |
+
href="/admin?page=1",
|
538 |
+
style="margin: 5px;"
|
539 |
+
if page > 1
|
540 |
+
else "margin: 5px; color: grey; pointer-events: none;",
|
541 |
+
),
|
542 |
+
A(
|
543 |
+
"Previous",
|
544 |
+
href=f"/admin?page={page - 1}",
|
545 |
+
style="margin: 5px;"
|
546 |
+
if page > 1
|
547 |
+
else "margin: 5px; color: grey; pointer-events: none;",
|
548 |
+
),
|
549 |
+
*[
|
550 |
+
A(
|
551 |
+
f"{i}",
|
552 |
+
href=f"/admin?page={i}",
|
553 |
+
style="margin: 5px;"
|
554 |
+
if i != page
|
555 |
+
else "margin: 5px; font-weight: bold;",
|
556 |
+
)
|
557 |
+
for i in range(start_page, end_page + 1)
|
558 |
+
],
|
559 |
+
A(
|
560 |
+
"Next",
|
561 |
+
href=f"/admin?page={page + 1}",
|
562 |
+
style="margin: 5px;"
|
563 |
+
if page < total_pages
|
564 |
+
else "margin: 5px; color: grey; pointer-events: none;",
|
565 |
+
),
|
566 |
+
A(
|
567 |
+
"Last",
|
568 |
+
href=f"/admin?page={total_pages}",
|
569 |
+
style="margin: 5px;"
|
570 |
+
if page < total_pages
|
571 |
+
else "margin: 5px; color: grey; pointer-events: none;",
|
572 |
+
),
|
573 |
+
style="text-align: center; margin: 20px;",
|
574 |
+
)
|
575 |
+
|
576 |
+
# Total pages indication
|
577 |
+
total_pages_indicator = Div(
|
578 |
+
f"Page {page} of {total_pages}",
|
579 |
+
style="text-align: center; margin: 10px;",
|
580 |
+
)
|
581 |
+
|
582 |
+
return (
|
583 |
+
Title("Admin"),
|
584 |
+
get_navbar(auth),
|
585 |
+
Main(
|
586 |
+
Div(
|
587 |
+
A(
|
588 |
+
I(cls="fa fa-arrow-left"),
|
589 |
+
"Back",
|
590 |
+
href="/",
|
591 |
+
title="Back to main page",
|
592 |
+
style="margin: 10px;",
|
593 |
+
),
|
594 |
+
style="margin: 10px;",
|
595 |
+
),
|
596 |
+
H2("Queries"),
|
597 |
+
# Table of all queries
|
598 |
+
Table(
|
599 |
+
Thead(
|
600 |
+
Tr(
|
601 |
+
Th("Query"),
|
602 |
+
Th("Session ID"),
|
603 |
+
Th("Datetime"),
|
604 |
+
)
|
605 |
+
),
|
606 |
+
Tbody(
|
607 |
+
*[
|
608 |
+
Tr(
|
609 |
+
Td(query.query),
|
610 |
+
Td(query.sess_id),
|
611 |
+
Td(query.get_datetime()),
|
612 |
+
)
|
613 |
+
for query in queries
|
614 |
+
],
|
615 |
+
),
|
616 |
+
cls="striped",
|
617 |
+
),
|
618 |
+
total_pages_indicator, # Include the total pages indicator here
|
619 |
+
pagination_controls,
|
620 |
+
Div(
|
621 |
+
A(
|
622 |
+
I(cls="fa fa-download fa-2x"),
|
623 |
+
" Download CSV",
|
624 |
+
href="/download_csv",
|
625 |
+
style="margin: 10px; float: right;",
|
626 |
+
title="Download queries as CSV",
|
627 |
+
),
|
628 |
+
style="text-align: right; margin: 20px;",
|
629 |
+
),
|
630 |
+
style="width: 80%; margin: 40px auto;",
|
631 |
+
),
|
632 |
+
)
|
633 |
+
|
634 |
+
|
635 |
+
@app.get("/source")
|
636 |
+
def get_source(auth, sess):
|
637 |
+
with open("main.py") as f:
|
638 |
+
source = f.read()
|
639 |
+
# Back icon to go back to main page in top left corner
|
640 |
+
return (
|
641 |
+
Title("Source code"),
|
642 |
+
get_navbar(auth),
|
643 |
+
Main(
|
644 |
+
Div(
|
645 |
+
A(
|
646 |
+
I(cls="fa fa-arrow-left"),
|
647 |
+
"Back",
|
648 |
+
href="/",
|
649 |
+
title="Back to main page",
|
650 |
+
style="margin: 10px;",
|
651 |
+
),
|
652 |
+
Div(
|
653 |
+
f"""### main.py - This is the whole source code for this app (!)\n```python\n{source}\n```""",
|
654 |
+
cls="marked",
|
655 |
+
style="margin: 10px;",
|
656 |
+
),
|
657 |
+
style="width: 80%; margin: 40px auto;",
|
658 |
+
),
|
659 |
+
),
|
660 |
+
)
|
661 |
+
|
662 |
+
|
663 |
+
@app.get("/about")
|
664 |
+
def get_about(auth, sess):
|
665 |
+
return (
|
666 |
+
Title("About this app"),
|
667 |
+
get_navbar(auth),
|
668 |
+
Main(
|
669 |
+
Div(
|
670 |
+
A(
|
671 |
+
I(cls="fa fa-arrow-left"),
|
672 |
+
"Back",
|
673 |
+
href="/",
|
674 |
+
title="Back to main page",
|
675 |
+
style="margin: 10px;",
|
676 |
+
),
|
677 |
+
Div(
|
678 |
+
"""# About this app \n We wanted to see if we could build a simple search interface using Vespa and fastHTML. This is the result!""",
|
679 |
+
cls="marked",
|
680 |
+
style="margin: 10px;",
|
681 |
+
),
|
682 |
+
style="width: 80%; margin: 40px auto;",
|
683 |
+
),
|
684 |
+
),
|
685 |
+
)
|
686 |
+
|
687 |
+
|
688 |
+
@app.get("/document/{docid}")
|
689 |
+
def get_document(docid: str, sess):
|
690 |
+
resp = vespa_app.get_data(data_id=docid, schema="doc", namespace="tutorial")
|
691 |
+
doc = resp.json
|
692 |
+
# Link with Back to search results at top of page
|
693 |
+
return Main(
|
694 |
+
Div(
|
695 |
+
A(
|
696 |
+
I(cls="fa fa-arrow-left"),
|
697 |
+
"Back to search results",
|
698 |
+
hx_get=f"/search?userquery={sess['queries'][-1]}",
|
699 |
+
hx_target="#results",
|
700 |
+
style="margin: 10px;",
|
701 |
+
),
|
702 |
+
H2(doc["fields"]["title"], style="margin: 10px;"),
|
703 |
+
P(doc["fields"]["body"], cls="marked"),
|
704 |
+
),
|
705 |
+
)
|
706 |
+
|
707 |
+
|
708 |
+
if not DEV_MODE:
|
709 |
+
try:
|
710 |
+
setup_hf_backup(app)
|
711 |
+
except Exception as e:
|
712 |
+
print(f"Error setting up hf backup: {e}")
|
713 |
+
serve()
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
python-fasthtml
|
2 |
+
git+https://github.com/AnswerDotAI/fasthtml-hf@a7ae831a1bd01105a9f771fb3a4e4c454ddc3176 # latest released version did not work
|
3 |
+
pyvespa
|
4 |
+
vespacli
|
5 |
+
python-dotenv
|