Luca Foppiano commited on
Commit
0b28b48
1 Parent(s): 6551eca

Setuptools build, pypy release, github action improvements (#5)

Browse files
.github/workflows/ci-build.yml CHANGED
@@ -14,9 +14,12 @@ jobs:
14
  steps:
15
  - uses: actions/checkout@v2
16
  - name: Set up Python 3.9
17
- uses: actions/setup-python@v2
18
  with:
19
  python-version: "3.9"
 
 
 
20
  - name: Install dependencies
21
  run: |
22
  python -m pip install --upgrade pip
@@ -30,16 +33,4 @@ jobs:
30
  flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
31
  # - name: Test with pytest
32
  # run: |
33
- # pytest
34
-
35
- docker-build-documentqa:
36
- needs: [build]
37
-
38
- runs-on: ubuntu-latest
39
-
40
- steps:
41
- - uses: actions/checkout@v2
42
- - name: Build the Docker image
43
- run: docker build . --file Dockerfile --tag lfoppiano/documentqa:develop
44
- - name: Cleanup older than 24h images and containers
45
- run: docker system prune --filter "until=24h" --force
 
14
  steps:
15
  - uses: actions/checkout@v2
16
  - name: Set up Python 3.9
17
+ uses: actions/setup-python@v4
18
  with:
19
  python-version: "3.9"
20
+ cache: 'pip'
21
+ - name: Cleanup more disk space
22
+ run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
23
  - name: Install dependencies
24
  run: |
25
  python -m pip install --upgrade pip
 
33
  flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
34
  # - name: Test with pytest
35
  # run: |
36
+ # pytest
 
 
 
 
 
 
 
 
 
 
 
 
.github/workflows/ci-release.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build release
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ push:
6
+ tags:
7
+ - 'v*'
8
+
9
+ concurrency:
10
+ group: docker
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ build:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v2
18
+ - name: Set up Python 3.9
19
+ uses: actions/setup-python@v4
20
+ with:
21
+ python-version: "3.9"
22
+ cache: 'pip'
23
+ - name: Cleanup more disk space
24
+ run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
25
+ - name: Install dependencies
26
+ run: |
27
+ python -m pip install --upgrade pip
28
+ pip install --upgrade flake8 pytest pycodestyle
29
+ if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
30
+ - name: Lint with flake8
31
+ run: |
32
+ # stop the build if there are Python syntax errors or undefined names
33
+ flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
34
+ # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
35
+ flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
36
+ # - name: Test with pytest
37
+ # run: |
38
+ # pytest
39
+
40
+ - name: Build and Publish to PyPI
41
+ uses: conchylicultor/pypi-build-publish@v1
42
+ with:
43
+ pypi-token: ${{ secrets.PYPI_API_TOKEN }}
44
+
45
+
46
+ docker-build:
47
+ needs: [build]
48
+ runs-on: ubuntu-latest
49
+
50
+ steps:
51
+ - name: Set tags
52
+ id: set_tags
53
+ run: |
54
+ DOCKER_IMAGE=lfoppiano/document-insights-qa
55
+ VERSION=""
56
+ if [[ $GITHUB_REF == refs/tags/v* ]]; then
57
+ VERSION=${GITHUB_REF#refs/tags/v}
58
+ fi
59
+ if [[ $VERSION =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
60
+ TAGS="${VERSION}"
61
+ else
62
+ TAGS="latest"
63
+ fi
64
+ echo "TAGS=${TAGS}"
65
+ echo ::set-output name=tags::${TAGS}
66
+ - name: Create more disk space
67
+ run: sudo rm -rf /usr/share/dotnet && sudo rm -rf /opt/ghc && sudo rm -rf "/usr/local/share/boost" && sudo rm -rf "$AGENT_TOOLSDIRECTORY"
68
+ - uses: actions/checkout@v2
69
+ - name: Build and push
70
+ id: docker_build
71
+ uses: mr-smithers-excellent/docker-build-push@v5
72
+ with:
73
+ username: ${{ secrets.DOCKERHUB_USERNAME }}
74
+ password: ${{ secrets.DOCKERHUB_TOKEN }}
75
+ image: lfoppiano/document-insights-qa
76
+ registry: docker.io
77
+ pushImage: ${{ github.event_name != 'pull_request' }}
78
+ tags: ${{ steps.set_tags.outputs.tags }}
79
+ - name: Image digest
80
+ run: echo ${{ steps.docker_build.outputs.digest }}
CHANGELOG.md ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6
+
7
+ ## [0.0.1] – 2023-05-13
8
+
9
+ ### Added
10
+
11
+ + Kick off application
12
+ + Support for GPT-3.5
13
+ + Support for Mistral + SentenceTransformer
14
+ + Streamlit application
15
+ + Docker image
16
+ + pypi package
17
+
18
+ <!-- markdownlint-disable-file MD024 MD033 -->
README.md CHANGED
@@ -43,6 +43,22 @@ Allow to change the number of embedding chunks that are considered for respondin
43
  By default, the mode is set to LLM (Language Model) which enables question/answering. You can directly ask questions related to the document content, and the system will answer the question using content from the document.
44
  If you switch the mode to "Embedding," the system will return specific chunks from the document that are semantically related to your query. This mode helps to test why sometimes the answers are not satisfying or incomplete.
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  ## Acknolwedgement
48
 
 
43
  By default, the mode is set to LLM (Language Model) which enables question/answering. You can directly ask questions related to the document content, and the system will answer the question using content from the document.
44
  If you switch the mode to "Embedding," the system will return specific chunks from the document that are semantically related to your query. This mode helps to test why sometimes the answers are not satisfying or incomplete.
45
 
46
+ ## Development notes
47
+
48
+ To release a new version:
49
+
50
+ - `bump-my-version bump patch`
51
+ - `git push --tags
52
+
53
+ To use docker:
54
+
55
+ - docker run `lfoppiano/document-insights-qa:latest`
56
+
57
+ To install the library with Pypi:
58
+
59
+ - `pip install document-qa-engine`
60
+
61
+
62
 
63
  ## Acknolwedgement
64
 
document_qa_engine.py → document_qa/document_qa_engine.py RENAMED
File without changes
grobid_processors.py → document_qa/grobid_processors.py RENAMED
File without changes
pyproject.toml ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools", "setuptools-scm"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [tool.bumpversion]
6
+ current_version = "0.0.1"
7
+ commit = "true"
8
+ tag = "true"
9
+ tag_name = "v{new_version}"
10
+
11
+ #[[tool.bumpversion.files]]
12
+ #filename = "version.txt"
13
+ #search = "{current_version}"
14
+ #replace = "{new_version}"
15
+
16
+ [project]
17
+ name = "document-qa-engine"
18
+ license = { file = "LICENSE" }
19
+ authors = [
20
+ { name = "Luca Foppiano", email = "lucanoro@duck.com" },
21
+ ]
22
+ maintainers = [
23
+ { name = "Luca Foppiano", email = "lucanoro@duck.com" }
24
+ ]
25
+ description = "Scientific Document Insight Q/A"
26
+ readme = "README.md"
27
+
28
+ dynamic = ['version']
29
+
30
+ [tool.setuptools_scm]
31
+
32
+ [project.urls]
33
+ Homepage = "https://document-insights.streamlit.app"
34
+ Repository = "https://github.com/lfoppiano/document-qa"
35
+ Changelog = "https://github.com/lfoppiano/document-qa/blob/main/CHANGELOG.md"
pytest.ini ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ [pytest]
2
+ testpaths = tests
requirements.txt CHANGED
@@ -1,15 +1,13 @@
1
  # Grobid
2
- grobid-quantities-client
3
- grobid-client-python
4
- grobid_tei_xml
5
 
6
  # Utils
7
  tqdm
8
- textdistance[extras]
9
- pyyaml
10
- dateparser
11
  pytest
12
- streamlit
13
  lxml
14
  Beautifulsoup4
15
  python-dotenv
@@ -17,11 +15,12 @@ watchdog
17
 
18
  # LLM
19
  chromadb==0.3.25
20
- tiktoken
21
- openai
22
- langchain==0.0.244
23
- promptlayer
24
- typing-inspect==0.8.0
25
- typing_extensions==4.5.0
26
- pydantic==1.10.8
27
- sentence_transformers
 
 
1
  # Grobid
2
+ grobid-quantities-client==0.4.0
3
+ grobid-client-python==0.0.5
4
+ grobid_tei_xml==0.1.3
5
 
6
  # Utils
7
  tqdm
8
+ pyyaml==6.0
 
 
9
  pytest
10
+ streamlit==1.27.1
11
  lxml
12
  Beautifulsoup4
13
  python-dotenv
 
15
 
16
  # LLM
17
  chromadb==0.3.25
18
+ tiktoken==0.4.0
19
+ openai==0.27.7
20
+ langchain==0.0.314
21
+ promptlayer==0.2.4
22
+ typing-inspect==0.9.0
23
+ typing_extensions==4.8.0
24
+ pydantic==2.4.2
25
+ sentence_transformers==2.2.2
26
+ bump-my-version
streamlit_app.py CHANGED
@@ -13,8 +13,8 @@ import streamlit as st
13
  from langchain.chat_models import PromptLayerChatOpenAI
14
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
15
 
16
- from document_qa_engine import DocumentQAEngine
17
- from grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
18
  from grobid_client_generic import GrobidClientGeneric
19
 
20
  if 'rqa' not in st.session_state:
 
13
  from langchain.chat_models import PromptLayerChatOpenAI
14
  from langchain.embeddings import OpenAIEmbeddings, HuggingFaceEmbeddings
15
 
16
+ from document_qa.document_qa_engine import DocumentQAEngine
17
+ from document_qa.grobid_processors import GrobidAggregationProcessor, decorate_text_with_annotations
18
  from grobid_client_generic import GrobidClientGeneric
19
 
20
  if 'rqa' not in st.session_state: