iamtatsuki05 commited on
Commit
514c4e1
1 Parent(s): db72a24

Synced repo using 'sync_with_huggingface' Github Action

Browse files
.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ html/*
2
+ .DS_Store
3
+
4
+ .venv
5
+ *.swp
6
+ .mypy_cache
7
+ .pytest_cache
8
+ .ipynb_checkpoints
9
+ __pycache__
Dockerfile ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04 AS base
2
+
3
+ ARG PYTHON_VERSION=3.10
4
+
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV WORKDIR /app/
7
+
8
+ WORKDIR /opt
9
+
10
+ # install dev tools
11
+ RUN apt-get update && apt-get install -y \
12
+ vim neovim nano \
13
+ git git-lfs \
14
+ zip unzip \
15
+ curl wget make build-essential xz-utils file tree \
16
+ sudo \
17
+ dnsutils \
18
+ tzdata language-pack-ja \
19
+ && apt-get clean \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # for Japanese settings
23
+ # ENV TZ Asia/Tokyo
24
+ # ENV LANG ja_JP.utf8
25
+
26
+ # for US settings
27
+ ENV LANG en_US.UTF-8
28
+ ENV LANGUAGE en_US
29
+
30
+ # install Python
31
+ RUN apt-get update && apt-get -yV upgrade && DEBIAN_FRONTEND=noninteractive apt-get -yV install \
32
+ build-essential libssl-dev libffi-dev \
33
+ python${PYTHON_VERSION} python${PYTHON_VERSION}-distutils python${PYTHON_VERSION}-dev \
34
+ && ln -s /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python3 \
35
+ && ln -s /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python \
36
+ && apt-get clean \
37
+ && rm -rf /var/lib/apt/lists/*
38
+
39
+ ## install pip
40
+ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
41
+ && python3 get-pip.py \
42
+ && pip3 --no-cache-dir install --upgrade pip
43
+
44
+ ## install Poetry
45
+ RUN curl -sSL https://install.python-poetry.org | python3 -
46
+ ENV PATH $PATH:/root/.local/bin
47
+ RUN poetry config virtualenvs.create true \
48
+ && poetry config virtualenvs.in-project false
49
+
50
+ WORKDIR ${WORKDIR}
51
+
52
+ # install python packages
53
+ COPY poetry.lock pyproject.toml ./
54
+ COPY src ./src
55
+ RUN poetry install --no-dev
56
+
57
+ FROM base AS dev
58
+ WORKDIR ${WORKDIR}
59
+
60
+ # install python packages
61
+ COPY poetry.lock pyproject.toml ./
62
+ COPY src ./src
63
+ RUN poetry install
64
+
65
+ # Hugging Face Hub Settings
66
+ CMD ["poetry", "run", "streamlit", "run", "src/app.py", "--server.port", "7860"]
Makefile ADDED
File without changes
compose.yaml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: "3.2"
2
+ services:
3
+ unlock_pdf_password:
4
+ tty: true
5
+ stdin_open: true
6
+ user: root
7
+ working_dir: /app
8
+ build:
9
+ context: .
10
+ dockerfile: docker/cpu/Dockerfile
11
+ target: dev
12
+ # secrets:
13
+ # - github_token
14
+ args:
15
+ progress: plain
16
+ volumes:
17
+ - type: bind
18
+ source: ./
19
+ target: /app
20
+ ports:
21
+ - "8501:8501"
22
+ command:
23
+ poetry run streamlit run src/app.py
24
+ environment:
25
+ PYTHONPATH: "/app/src"
26
+ PYTHONUNBUFFERED: 1
27
+ # secrets:
28
+ # github_token:
29
+ # file: ${HOME}/.git-credentials
docker/cpu/Dockerfile ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM ubuntu:22.04 AS base
2
+
3
+ ARG PYTHON_VERSION=3.10
4
+
5
+ ENV DEBIAN_FRONTEND=noninteractive
6
+ ENV WORKDIR /app/
7
+
8
+ WORKDIR /opt
9
+
10
+ # install dev tools
11
+ RUN apt-get update && apt-get install -y \
12
+ vim neovim nano \
13
+ git git-lfs \
14
+ zip unzip \
15
+ curl wget make build-essential xz-utils file tree \
16
+ sudo \
17
+ dnsutils \
18
+ tzdata language-pack-ja \
19
+ && apt-get clean \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # for Japanese settings
23
+ # ENV TZ Asia/Tokyo
24
+ # ENV LANG ja_JP.utf8
25
+
26
+ # for US settings
27
+ ENV LANG en_US.UTF-8
28
+ ENV LANGUAGE en_US
29
+
30
+ # install Python
31
+ RUN apt-get update && apt-get -yV upgrade && DEBIAN_FRONTEND=noninteractive apt-get -yV install \
32
+ build-essential libssl-dev libffi-dev \
33
+ python${PYTHON_VERSION} python${PYTHON_VERSION}-distutils python${PYTHON_VERSION}-dev \
34
+ && ln -s /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python3 \
35
+ && ln -s /usr/bin/python${PYTHON_VERSION} /usr/local/bin/python \
36
+ && apt-get clean \
37
+ && rm -rf /var/lib/apt/lists/*
38
+
39
+ ## install pip
40
+ RUN curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py \
41
+ && python3 get-pip.py \
42
+ && pip3 --no-cache-dir install --upgrade pip
43
+
44
+ ## install Poetry
45
+ RUN curl -sSL https://install.python-poetry.org | python3 -
46
+ ENV PATH $PATH:/root/.local/bin
47
+ RUN poetry config virtualenvs.create true \
48
+ && poetry config virtualenvs.in-project false
49
+
50
+ WORKDIR ${WORKDIR}
51
+
52
+ # install python packages
53
+ COPY poetry.lock pyproject.toml ./
54
+ COPY src ./src
55
+ RUN poetry install --no-dev
56
+
57
+ FROM base AS dev
58
+ WORKDIR ${WORKDIR}
59
+
60
+ # install python packages
61
+ COPY poetry.lock pyproject.toml ./
62
+ COPY src ./src
63
+ RUN poetry install
images/app_sample.png ADDED
notebook/helloworld.ipynb ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "18edcb70-64a5-4d17-94c0-a86ecc435be4",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "name": "stdout",
11
+ "output_type": "stream",
12
+ "text": [
13
+ "hello world\n"
14
+ ]
15
+ }
16
+ ],
17
+ "source": [
18
+ "print(\"hello world\")"
19
+ ]
20
+ },
21
+ {
22
+ "cell_type": "code",
23
+ "execution_count": null,
24
+ "id": "b90e3e1f-5aa1-43a0-b9e2-1d1bcaf9ff94",
25
+ "metadata": {},
26
+ "outputs": [],
27
+ "source": []
28
+ }
29
+ ],
30
+ "metadata": {
31
+ "kernelspec": {
32
+ "display_name": "Python 3 (ipykernel)",
33
+ "language": "python",
34
+ "name": "python3"
35
+ },
36
+ "language_info": {
37
+ "codemirror_mode": {
38
+ "name": "ipython",
39
+ "version": 3
40
+ },
41
+ "file_extension": ".py",
42
+ "mimetype": "text/x-python",
43
+ "name": "python",
44
+ "nbconvert_exporter": "python",
45
+ "pygments_lexer": "ipython3",
46
+ "version": "3.9.5"
47
+ }
48
+ },
49
+ "nbformat": 4,
50
+ "nbformat_minor": 5
51
+ }
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "unlock_pdf_password"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["iamtatsuki05 <tatsukio0522@gmail.com>"]
6
+ packages = [
7
+ { include = "unlock_pdf_password", from = "src/" },
8
+ ]
9
+
10
+ [tool.poetry.dependencies]
11
+ python = "^3.10"
12
+ python-dotenv = "^1.0.0"
13
+ setuptools = "^69.0.3"
14
+ fire = "^0.5.0"
15
+ pydantic = "^2.5.3"
16
+ beautifulsoup4 = "^4.12.2"
17
+ selenium = "^4.16.0"
18
+ fastapi = "^0.108.0"
19
+ uvicorn = "^0.25.0"
20
+ matplotlib = "^3.5.1"
21
+ pandas = "^1.4.2"
22
+ seaborn = "^0.11.2"
23
+ japanize-matplotlib = "^1.1.3"
24
+ numpy = "^1.22.3"
25
+ jupyterlab = "^3.3.4"
26
+ tqdm = "^4.64.0"
27
+ scikit-learn = "^1.1.1"
28
+ openpyxl = "^3.1.2"
29
+ pikepdf = "^8.15.1"
30
+ streamlit = "^1.34.0"
31
+
32
+ [tool.poetry.group.dev.dependencies]
33
+ pytest = "^7.0.0"
34
+ ipykernel = ">=6.13.0"
35
+ autopep8 = ">=1.6.0"
36
+ autoflake = ">=1.4"
37
+ flake8 = ">=4.0.1"
38
+ flake8-isort = ">=4.1.1"
39
+ flake8-quotes = ">=3.3.1"
40
+ flake8-print = ">=4.0.0"
41
+ isort = ">=5.10.1"
42
+ black = ">=22.10.0"
43
+ mypy = ">=0.971"
44
+ tox = ">=3.25.1"
45
+ pre-commit = ">=3.3.3"
46
+ nbstripout = "0.6.1"
47
+
48
+ [tool.isort]
49
+ line_length = 88
50
+ multi_line_output = 3
51
+ include_trailing_comma = true
52
+
53
+ [tool.black]
54
+ skip-string-normalization = true
55
+
56
+ [build-system]
57
+ requires = ["poetry-core>=1.0.0"]
58
+ build-backend = "poetry.core.masonry.api"
scripts/unlock_pdf_password.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ from typing import Union, Optional
3
+ from pathlib import Path
4
+
5
+ import fire
6
+
7
+ from unlock_pdf_password.unlock_pdf_password import unlock_pdf_password, unlock_pdf
8
+
9
+ logging.basicConfig(level=logging.INFO)
10
+ logger = logging.getLogger(__name__)
11
+
12
+ def unlock_pdf_password_wrapper(
13
+ input_pdf_path: Union[str, Path],
14
+ output_pdf_path: Union[str, Path],
15
+ password_length: int = None,
16
+ password_start_length: int = None,
17
+ password_in_number: Optional[bool] = True,
18
+ password_in_lower: Optional[bool] = True,
19
+ password_in_upper: Optional[bool] = True,
20
+ password_in_sign: Optional[bool] = True,
21
+ ) -> None:
22
+ input_pdf_path = Path(input_pdf_path)
23
+ output_pdf_path = Path(output_pdf_path)
24
+ output_pdf_path.parent.mkdir(parents=True, exist_ok=True)
25
+
26
+ password = unlock_pdf_password(
27
+ input_pdf_path=input_pdf_path,
28
+ password_length=password_length,
29
+ password_start_length=password_start_length,
30
+ password_in_number=password_in_number,
31
+ password_in_lower=password_in_lower,
32
+ password_in_upper=password_in_upper,
33
+ password_in_sign=password_in_sign,
34
+ )
35
+
36
+ if password:
37
+ unlock_pdf(
38
+ input_pdf_path=input_pdf_path,
39
+ output_pdf_path=output_pdf_path,
40
+ password=password,
41
+ )
42
+ logger.info(f'PDF unlocked successfully! Password: {password}')
43
+ logger.error('Password not found')
44
+
45
+ if __name__ == '__main__':
46
+ fire.Fire(unlock_pdf_password)
src/__init__.py ADDED
File without changes
src/app.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ from pathlib import Path
3
+
4
+ import streamlit as st
5
+
6
+ from unlock_pdf_password.unlock_pdf_password import unlock_pdf, unlock_pdf_password
7
+
8
+
9
+ def app():
10
+ st.title("PDF Unlocker")
11
+
12
+ uploaded_file = st.file_uploader("Upload your PDF", type="pdf")
13
+ password_length = st.number_input(
14
+ "Enter maximum password length:", min_value=1, value=4
15
+ )
16
+ password_start_length = st.number_input(
17
+ "Enter starting password length:", min_value=0, value=0
18
+ )
19
+ password_in_number = st.checkbox("Include numbers", True)
20
+ password_in_lower = st.checkbox("Include lowercase letters", True)
21
+ password_in_upper = st.checkbox("Include uppercase letters", True)
22
+ password_in_sign = st.checkbox("Include special characters", True)
23
+
24
+ if st.button("Unlock PDF"):
25
+ if uploaded_file is None:
26
+ st.error("Please upload a PDF file.")
27
+ else:
28
+ with tempfile.NamedTemporaryFile(
29
+ delete=False, suffix=".pdf"
30
+ ) as temp_input_file:
31
+ temp_input_file.write(uploaded_file.getvalue())
32
+ input_pdf_path = temp_input_file.name
33
+
34
+ password = unlock_pdf_password(
35
+ input_pdf_path=input_pdf_path,
36
+ password_length=password_length,
37
+ password_start_length=password_start_length,
38
+ password_in_number=password_in_number,
39
+ password_in_lower=password_in_lower,
40
+ password_in_upper=password_in_upper,
41
+ password_in_sign=password_in_sign,
42
+ )
43
+ if password:
44
+ result = unlock_pdf(
45
+ input_pdf_path=input_pdf_path,
46
+ password=password,
47
+ output_pdf_path=None,
48
+ )
49
+ if result:
50
+ st.success(f"PDF unlocked successfully! Password: {password}")
51
+ with tempfile.NamedTemporaryFile(
52
+ delete=False, suffix=".pdf"
53
+ ) as temp_output_file:
54
+ result.save(temp_output_file.name)
55
+ with open(temp_output_file.name, 'rb') as f:
56
+ pdf_bytes = f.read()
57
+ st.download_button(
58
+ label="Download unlocked PDF",
59
+ data=pdf_bytes,
60
+ file_name=f"unlocked_{Path(input_pdf_path).name}",
61
+ mime="application/octet-stream",
62
+ )
63
+ else:
64
+ st.error("Failed to unlock PDF.")
65
+ else:
66
+ st.error("Password not found.")
67
+
68
+
69
+ if __name__ == "__main__":
70
+ app()
src/unlock_pdf_password/__init__.py ADDED
File without changes
src/unlock_pdf_password/unlock_pdf_password.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import itertools
2
+ import logging
3
+ import string
4
+ from pathlib import Path
5
+ from typing import Optional, Union
6
+
7
+ import pikepdf
8
+ from tqdm.auto import tqdm
9
+
10
+ logging.basicConfig(level=logging.INFO)
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ def create_char_set(
15
+ numbers: bool, lowercase: bool, uppercase: bool, special_chars: bool
16
+ ) -> list[str]:
17
+ chars = []
18
+ if numbers:
19
+ chars.extend(string.digits)
20
+ if lowercase:
21
+ chars.extend(string.ascii_lowercase)
22
+ if uppercase:
23
+ chars.extend(string.ascii_uppercase)
24
+ if special_chars:
25
+ chars.extend(string.punctuation)
26
+ return chars
27
+
28
+
29
+ def find_password(
30
+ input_pdf_path: Union[str, Path],
31
+ chars: list[str],
32
+ password_length: int,
33
+ password_start_length: int,
34
+ ) -> str:
35
+ for length in tqdm(range(password_start_length, password_length + 1)):
36
+ for password_tuple in itertools.product(chars, repeat=length):
37
+ password = ''.join(password_tuple)
38
+ try:
39
+ with pikepdf.open(input_pdf_path, password=password):
40
+ logger.info(f'Password found: {password}')
41
+ return password
42
+ except pikepdf.PasswordError:
43
+ logger.debug(f'Tried password: {password} - Incorrect')
44
+ logger.error('Password not found')
45
+ return ''
46
+
47
+
48
+ def unlock_pdf(
49
+ input_pdf_path: Union[str, Path],
50
+ password: str,
51
+ output_pdf_path: Optional[Union[str, Path]] = None,
52
+ ) -> Union[pikepdf.Pdf, bool]:
53
+ try:
54
+ with pikepdf.open(input_pdf_path, password=password) as pdf:
55
+ pdf_unlocked = pikepdf.new()
56
+ pdf_unlocked.pages.extend(pdf.pages)
57
+ if output_pdf_path is not None:
58
+ pdf_unlocked.save(output_pdf_path)
59
+ return pdf_unlocked
60
+ except pikepdf.PasswordError:
61
+ return False
62
+
63
+
64
+ def unlock_pdf_password(
65
+ input_pdf_path: Union[str, Path],
66
+ password_length: int = None,
67
+ password_start_length: int = None,
68
+ password_in_number: Optional[bool] = True,
69
+ password_in_lower: Optional[bool] = True,
70
+ password_in_upper: Optional[bool] = True,
71
+ password_in_sign: Optional[bool] = True,
72
+ ) -> Union[str, bool]:
73
+ input_pdf_path = Path(input_pdf_path)
74
+
75
+ if password_length is None or password_start_length is None:
76
+ logger.error('Password length and start length must be specified')
77
+ return False
78
+
79
+ chars = create_char_set(
80
+ numbers=password_in_number,
81
+ lowercase=password_in_lower,
82
+ uppercase=password_in_upper,
83
+ special_chars=password_in_sign,
84
+ )
85
+ password = find_password(
86
+ input_pdf_path=input_pdf_path,
87
+ chars=chars,
88
+ password_length=password_length,
89
+ password_start_length=password_start_length,
90
+ )
91
+ return password