Spaces:
Sleeping
Sleeping
Make the project installable, move dependencies to setup.py
Browse filesSimplifies testing and dockerfile.
Docker now runs 80 internally, but still exposes 8000
- .dockerignore +1 -1
- Dockerfile +8 -10
- README.md +10 -1
- app.py +1 -2
- __init__.py β commafixer/__init__.py +0 -0
- {routers β commafixer/routers}/__init__.py +0 -0
- {routers β commafixer/routers}/baseline.py +1 -1
- {src β commafixer/src}/__init__.py +0 -0
- {src β commafixer/src}/baseline.py +0 -0
- docker-compose.yml +4 -4
- openapi.yaml +1 -1
- requirements.txt +1 -9
- setup.py +31 -0
- test-requirements.txt +0 -2
- tests/__init__.py +0 -0
- tests/test_baseline.py +1 -1
.dockerignore
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
.idea
|
2 |
data/
|
3 |
.pytest_cache
|
|
|
4 |
.gitignore
|
5 |
README.md
|
6 |
openapi.yaml
|
7 |
-
|
|
|
1 |
.idea
|
2 |
data/
|
3 |
.pytest_cache
|
4 |
+
.eggs
|
5 |
.gitignore
|
6 |
README.md
|
7 |
openapi.yaml
|
|
Dockerfile
CHANGED
@@ -6,22 +6,20 @@ ENV PYTHONUNBUFFERED=1
|
|
6 |
RUN python -m venv /venv
|
7 |
ENV PATH="/venv/bin:$PATH"
|
8 |
|
9 |
-
COPY
|
10 |
-
COPY test-requirements.txt .
|
11 |
RUN pip install --upgrade pip
|
12 |
-
RUN pip install --no-cache-dir --upgrade
|
13 |
|
14 |
-
COPY src/baseline.py src/baseline.py
|
15 |
ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
|
16 |
-
RUN python src/baseline.py # This pre-downloads models and tokenizers
|
17 |
|
18 |
COPY . .
|
19 |
|
20 |
FROM base as test
|
21 |
|
22 |
-
RUN pip install
|
23 |
-
|
24 |
-
RUN python -m pytest ../tests
|
25 |
|
26 |
FROM python:3.10-slim as deploy
|
27 |
|
@@ -33,5 +31,5 @@ ENV PATH="/venv/bin:$PATH"
|
|
33 |
ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
|
34 |
COPY --from=base /coma-fixer/.cache /coma-fixer/.cache
|
35 |
|
36 |
-
EXPOSE
|
37 |
-
CMD uvicorn "app:app" --port
|
|
|
6 |
RUN python -m venv /venv
|
7 |
ENV PATH="/venv/bin:$PATH"
|
8 |
|
9 |
+
COPY setup.py .
|
|
|
10 |
RUN pip install --upgrade pip
|
11 |
+
RUN pip install --no-cache-dir --upgrade .
|
12 |
|
13 |
+
COPY commafixer/src/baseline.py commafixer/src/baseline.py
|
14 |
ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
|
15 |
+
RUN python commafixer/src/baseline.py # This pre-downloads models and tokenizers
|
16 |
|
17 |
COPY . .
|
18 |
|
19 |
FROM base as test
|
20 |
|
21 |
+
RUN pip install .[test]
|
22 |
+
RUN python -m pytest tests
|
|
|
23 |
|
24 |
FROM python:3.10-slim as deploy
|
25 |
|
|
|
31 |
ENV TRANSFORMERS_CACHE=/coma-fixer/.cache
|
32 |
COPY --from=base /coma-fixer/.cache /coma-fixer/.cache
|
33 |
|
34 |
+
EXPOSE 80
|
35 |
+
CMD uvicorn "app:app" --port 80 --host "0.0.0.0"
|
README.md
CHANGED
@@ -10,8 +10,17 @@ pinned: true
|
|
10 |
app_port: 8000
|
11 |
---
|
12 |
|
|
|
|
|
13 |
`docker log [id]` for logs from the container.
|
14 |
|
15 |
`docker build -t comma-fixer --target test .` for tests
|
16 |
|
17 |
-
`git push hub` to deploy to huggingface hub, after adding a remote
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
app_port: 8000
|
11 |
---
|
12 |
|
13 |
+
`sudo service docker start`
|
14 |
+
|
15 |
`docker log [id]` for logs from the container.
|
16 |
|
17 |
`docker build -t comma-fixer --target test .` for tests
|
18 |
|
19 |
+
`git push hub` to deploy to huggingface hub, after adding a remote
|
20 |
+
|
21 |
+
Reported token classification F1 scores on commas for different languages:
|
22 |
+
|
23 |
+
| English | German | French | Italian |
|
24 |
+
|---------|--------|--------|---------|
|
25 |
+
| 0.819 | 0.945 | 0.831 | 0.798 |
|
26 |
+
|
app.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
from os.path import realpath
|
2 |
-
|
3 |
import uvicorn
|
4 |
from fastapi import FastAPI
|
5 |
from fastapi.responses import FileResponse
|
6 |
from fastapi.staticfiles import StaticFiles
|
7 |
|
8 |
-
from routers import baseline
|
9 |
|
10 |
app = FastAPI()
|
11 |
app.include_router(baseline.router, prefix='/baseline')
|
|
|
1 |
from os.path import realpath
|
|
|
2 |
import uvicorn
|
3 |
from fastapi import FastAPI
|
4 |
from fastapi.responses import FileResponse
|
5 |
from fastapi.staticfiles import StaticFiles
|
6 |
|
7 |
+
from commafixer.routers import baseline
|
8 |
|
9 |
app = FastAPI()
|
10 |
app.include_router(baseline.router, prefix='/baseline')
|
__init__.py β commafixer/__init__.py
RENAMED
File without changes
|
{routers β commafixer/routers}/__init__.py
RENAMED
File without changes
|
{routers β commafixer/routers}/baseline.py
RENAMED
@@ -1,7 +1,7 @@
|
|
1 |
from fastapi import APIRouter, HTTPException
|
2 |
import logging
|
3 |
|
4 |
-
from src.baseline import BaselineCommaFixer
|
5 |
|
6 |
|
7 |
logger = logging.Logger(__name__)
|
|
|
1 |
from fastapi import APIRouter, HTTPException
|
2 |
import logging
|
3 |
|
4 |
+
from commafixer.src.baseline import BaselineCommaFixer
|
5 |
|
6 |
|
7 |
logger = logging.Logger(__name__)
|
{src β commafixer/src}/__init__.py
RENAMED
File without changes
|
{src β commafixer/src}/baseline.py
RENAMED
File without changes
|
docker-compose.yml
CHANGED
@@ -18,11 +18,11 @@ services:
|
|
18 |
context: ./
|
19 |
dockerfile: Dockerfile
|
20 |
container_name: comma-fixer
|
21 |
-
command: uvicorn --host 0.0.0.0 --port
|
22 |
ports:
|
23 |
-
- "8000:
|
24 |
-
|
25 |
-
|
26 |
# networks:
|
27 |
# my-network:
|
28 |
# aliases:
|
|
|
18 |
context: ./
|
19 |
dockerfile: Dockerfile
|
20 |
container_name: comma-fixer
|
21 |
+
command: uvicorn --host 0.0.0.0 --port 80 "app:app" --reload
|
22 |
ports:
|
23 |
+
- "8000:80"
|
24 |
+
volumes:
|
25 |
+
- .:/comma-fixer
|
26 |
# networks:
|
27 |
# my-network:
|
28 |
# aliases:
|
openapi.yaml
CHANGED
@@ -2,7 +2,7 @@ openapi: 3.0.3
|
|
2 |
info:
|
3 |
title: Comma fixer
|
4 |
description: Comma fixer, using machine learning to fix placement of commas within a string of text.
|
5 |
-
version: 1.0
|
6 |
servers:
|
7 |
- url: 'https://localhost:5000'
|
8 |
paths:
|
|
|
2 |
info:
|
3 |
title: Comma fixer
|
4 |
description: Comma fixer, using machine learning to fix placement of commas within a string of text.
|
5 |
+
version: 0.1.0
|
6 |
servers:
|
7 |
- url: 'https://localhost:5000'
|
8 |
paths:
|
requirements.txt
CHANGED
@@ -1,9 +1 @@
|
|
1 |
-
|
2 |
-
gunicorn==21.2.0
|
3 |
-
uvicorn==0.23.2
|
4 |
-
torch==2.0.1
|
5 |
-
transformers==4.31.0
|
6 |
-
|
7 |
-
# for the tokenizer of the baseline model
|
8 |
-
protobuf==4.24.0
|
9 |
-
sentencepiece==0.1.99
|
|
|
1 |
+
-e .
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
setup.py
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from setuptools import setup, find_packages
|
2 |
+
|
3 |
+
setup(
|
4 |
+
name='commafixer',
|
5 |
+
version='0.1.0',
|
6 |
+
description='Fixing commas using Deep Learning.',
|
7 |
+
author='Karol Lasocki',
|
8 |
+
author_email='karolasocki@gmail.com',
|
9 |
+
url='https://huggingface.co/spaces/klasocki/comma-fixer',
|
10 |
+
packages=find_packages(include=['commafixer', 'commafixer.*']),
|
11 |
+
install_requires=[
|
12 |
+
"fastapi == 0.101.1",
|
13 |
+
"uvicorn == 0.23.2",
|
14 |
+
"torch == 2.0.1",
|
15 |
+
"transformers == 4.31.0",
|
16 |
+
# for the tokenizer of the baseline model
|
17 |
+
"protobuf == 4.24.0",
|
18 |
+
"sentencepiece == 0.1.99",
|
19 |
+
|
20 |
+
],
|
21 |
+
extras_require={
|
22 |
+
'training': [
|
23 |
+
'datasets==2.14.4',
|
24 |
+
'notebook'
|
25 |
+
],
|
26 |
+
'test': [
|
27 |
+
'pytest',
|
28 |
+
'httpx'
|
29 |
+
]
|
30 |
+
},
|
31 |
+
)
|
test-requirements.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
pytest
|
2 |
-
httpx
|
|
|
|
|
|
tests/__init__.py
DELETED
File without changes
|
tests/test_baseline.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
import pytest
|
2 |
-
from baseline import BaselineCommaFixer, _remove_punctuation
|
3 |
|
4 |
|
5 |
@pytest.fixture()
|
|
|
1 |
import pytest
|
2 |
+
from commafixer.src.baseline import BaselineCommaFixer, _remove_punctuation
|
3 |
|
4 |
|
5 |
@pytest.fixture()
|