update
Browse files- Dockerfile +1 -0
- docker_as_a_service/docker_as_a_service.py +54 -9
- docker_as_a_service/experimental_mods/config.json +6 -0
- docker_as_a_service/experimental_mods/docker_as_a_service.py +151 -0
- docker_as_a_service/experimental_mods/docker_to_api copy.py +97 -0
- docker_as_a_service/experimental_mods/get_bilibili_resource copy.py +37 -0
- docker_as_a_service/experimental_mods/get_bilibili_resource.py +34 -0
- docker_as_a_service/experimental_mods/get_search_kw_api_stop.py +138 -0
- docker_as_a_service/experimental_mods/test_docker_to_api.py +69 -0
- docker_as_a_service/shared_utils/docker_as_service_api.py +2 -2
Dockerfile
CHANGED
@@ -5,6 +5,7 @@ RUN apt update && apt-get install -y python3 python3-dev python3-pip
|
|
5 |
RUN python3 -m pip install fastapi pydantic loguru --break-system-packages
|
6 |
RUN python3 -m pip install requests python-multipart --break-system-packages
|
7 |
RUN python3 -m pip install uvicorn --break-system-packages
|
|
|
8 |
|
9 |
# 为了让user用户可以访问/root目录
|
10 |
RUN useradd -m -u 1000 user
|
|
|
5 |
RUN python3 -m pip install fastapi pydantic loguru --break-system-packages
|
6 |
RUN python3 -m pip install requests python-multipart --break-system-packages
|
7 |
RUN python3 -m pip install uvicorn --break-system-packages
|
8 |
+
RUN python3 -m pip install tenacity --break-system-packages
|
9 |
|
10 |
# 为了让user用户可以访问/root目录
|
11 |
RUN useradd -m -u 1000 user
|
docker_as_a_service/docker_as_a_service.py
CHANGED
@@ -51,14 +51,19 @@ async def stream_generator(request_obj):
|
|
51 |
# Get list of existing files before download
|
52 |
existing_file_before_download = []
|
53 |
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
|
|
|
|
|
|
|
|
|
|
62 |
cmd = ' '.join(cmd)
|
63 |
yield yield_message(cmd)
|
64 |
process = subprocess.Popen(cmd,
|
@@ -125,6 +130,12 @@ async def stream_generator(request_obj):
|
|
125 |
yield python_obj_to_pickle_file_bytes(dsacm)
|
126 |
|
127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
128 |
@app.post("/stream")
|
129 |
async def stream_response(file: UploadFile = File(...)):
|
130 |
# read the file in memory, treat it as pickle file, and unpickle it
|
@@ -134,12 +145,46 @@ async def stream_response(file: UploadFile = File(...)):
|
|
134 |
with io.BytesIO(content) as f:
|
135 |
request_obj = pickle.load(f)
|
136 |
# process the request_obj
|
|
|
137 |
return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
@app.get("/")
|
140 |
async def hi():
|
141 |
return "Hello, this is Docker as a Service (DaaS)!"
|
142 |
-
|
143 |
if __name__ == "__main__":
|
144 |
import uvicorn
|
145 |
uvicorn.run(app, host="0.0.0.0", port=49000)
|
|
|
51 |
# Get list of existing files before download
|
52 |
existing_file_before_download = []
|
53 |
|
54 |
+
if request_obj.endpoint == "down_bilibili":
|
55 |
+
video_id = request_obj.client_command
|
56 |
+
cmd = [
|
57 |
+
'/root/.dotnet/tools/BBDown',
|
58 |
+
video_id,
|
59 |
+
'--use-app-api',
|
60 |
+
'--work-dir',
|
61 |
+
f'{os.path.abspath(temp_dir)}'
|
62 |
+
]
|
63 |
+
else:
|
64 |
+
assert False, "Invalid endpoint"
|
65 |
+
|
66 |
+
|
67 |
cmd = ' '.join(cmd)
|
68 |
yield yield_message(cmd)
|
69 |
process = subprocess.Popen(cmd,
|
|
|
130 |
yield python_obj_to_pickle_file_bytes(dsacm)
|
131 |
|
132 |
|
133 |
+
def simple_generator(return_obj):
|
134 |
+
dsacm = DockerServiceApiComModel(
|
135 |
+
server_message=return_obj,
|
136 |
+
)
|
137 |
+
yield python_obj_to_pickle_file_bytes(dsacm)
|
138 |
+
|
139 |
@app.post("/stream")
|
140 |
async def stream_response(file: UploadFile = File(...)):
|
141 |
# read the file in memory, treat it as pickle file, and unpickle it
|
|
|
145 |
with io.BytesIO(content) as f:
|
146 |
request_obj = pickle.load(f)
|
147 |
# process the request_obj
|
148 |
+
request_obj.endpoint = "down_bilibili"
|
149 |
return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
|
150 |
|
151 |
+
@app.post("/search")
|
152 |
+
async def stream_response(file: UploadFile = File(...)):
|
153 |
+
# read the file in memory, treat it as pickle file, and unpickle it
|
154 |
+
import pickle
|
155 |
+
import io
|
156 |
+
content = await file.read()
|
157 |
+
with io.BytesIO(content) as f:
|
158 |
+
request_obj = pickle.load(f)
|
159 |
+
|
160 |
+
# process the request_obj
|
161 |
+
keyword = request_obj.client_command
|
162 |
+
|
163 |
+
from experimental_mods.get_search_kw_api_stop import search_videos
|
164 |
+
# Default parameters for video search
|
165 |
+
csrf_token = '40a227fcf12c380d7d3c81af2cd8c5e8' # Using default from main()
|
166 |
+
search_type = 'default'
|
167 |
+
max_pages = 1
|
168 |
+
output_path = 'search_results'
|
169 |
+
config_path = 'experimental_mods/config.json'
|
170 |
+
|
171 |
+
# Search for videos and return the first result
|
172 |
+
videos = search_videos(
|
173 |
+
keyword=keyword,
|
174 |
+
csrf_token=csrf_token,
|
175 |
+
search_type=search_type,
|
176 |
+
max_pages=max_pages,
|
177 |
+
output_path=output_path,
|
178 |
+
config_path=config_path,
|
179 |
+
early_stop=True
|
180 |
+
)
|
181 |
+
|
182 |
+
return StreamingResponse(simple_generator(videos), media_type="application/octet-stream")
|
183 |
+
|
184 |
@app.get("/")
|
185 |
async def hi():
|
186 |
return "Hello, this is Docker as a Service (DaaS)!"
|
187 |
+
|
188 |
if __name__ == "__main__":
|
189 |
import uvicorn
|
190 |
uvicorn.run(app, host="0.0.0.0", port=49000)
|
docker_as_a_service/experimental_mods/config.json
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
|
3 |
+
"cookie": "buvid3=902A16D7-B19F-790B-FF85-197C38F3227462114infoc; b_nut=1731952262; b_lsid=10DF102ED6_19340664283; _uuid=84EEC61010-CE55-DAC4-68C10-CFA6108F89C8963816infoc; buvid_fp=b2f71cc1058da966a62a2caf13596b1f; buvid4=0C27B28C-406E-B88B-D232-51167891712B62902-024111817-wg%2Bfug1OO8Jl5lXoeCp0dw%3D%3D; enable_web_push=DISABLE; home_feed_column=4; browser_resolution=1313-699; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzIyMTE0NjMsImlhdCI6MTczMTk1MjIwMywicGx0IjotMX0.xKiEBdcpGFZy7Qv2wCExcBoRK-LGtvv_wvmCbuDoCN8; bili_ticket_expires=1732211403; CURRENT_FNVAL=4048; sid=5h1fpj7o; rpdid=|(k|kmJk)Y|u0J'u~JumlkkY)"
|
4 |
+
}
|
5 |
+
|
6 |
+
|
docker_as_a_service/experimental_mods/docker_as_a_service.py
ADDED
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
DaaS (Docker as a Service) is a service
|
3 |
+
that allows users to run docker commands on the server side.
|
4 |
+
"""
|
5 |
+
|
6 |
+
from fastapi import FastAPI
|
7 |
+
from fastapi.responses import StreamingResponse
|
8 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
9 |
+
from pydantic import BaseModel, Field
|
10 |
+
from typing import Optional, Dict
|
11 |
+
import time
|
12 |
+
import os
|
13 |
+
import asyncio
|
14 |
+
import subprocess
|
15 |
+
import uuid
|
16 |
+
import threading
|
17 |
+
import queue
|
18 |
+
|
19 |
+
app = FastAPI()
|
20 |
+
|
21 |
+
class DockerServiceApiComModel(BaseModel):
|
22 |
+
client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
|
23 |
+
client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
|
24 |
+
server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
25 |
+
server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
26 |
+
server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
|
27 |
+
server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
|
28 |
+
|
29 |
+
|
30 |
+
def python_obj_to_pickle_file_bytes(obj):
|
31 |
+
import pickle
|
32 |
+
import io
|
33 |
+
with io.BytesIO() as f:
|
34 |
+
pickle.dump(obj, f)
|
35 |
+
return f.getvalue()
|
36 |
+
|
37 |
+
def yield_message(message):
|
38 |
+
dsacm = DockerServiceApiComModel(server_message=message)
|
39 |
+
return python_obj_to_pickle_file_bytes(dsacm)
|
40 |
+
|
41 |
+
def read_output(stream, output_queue):
|
42 |
+
while True:
|
43 |
+
line_stdout = stream.readline()
|
44 |
+
print('recv')
|
45 |
+
if line_stdout:
|
46 |
+
output_queue.put(line_stdout)
|
47 |
+
else:
|
48 |
+
break
|
49 |
+
|
50 |
+
|
51 |
+
async def stream_generator(request_obj):
|
52 |
+
import tempfile
|
53 |
+
# Create a temporary directory
|
54 |
+
with tempfile.TemporaryDirectory() as temp_dir:
|
55 |
+
|
56 |
+
# Construct the docker command
|
57 |
+
download_folder = temp_dir
|
58 |
+
|
59 |
+
# Get list of existing files before download
|
60 |
+
existing_file_before_download = []
|
61 |
+
|
62 |
+
video_id = request_obj.client_command
|
63 |
+
cmd = [
|
64 |
+
# 'docker', 'run', '--rm',
|
65 |
+
# '-v', f'{download_folder}:/downloads',
|
66 |
+
# 'bbdown',
|
67 |
+
# video_id,
|
68 |
+
# '--use-app-api',
|
69 |
+
# '--work-dir', '/downloads'
|
70 |
+
"while true; do date; sleep 1; done"
|
71 |
+
]
|
72 |
+
cmd = ' '.join(cmd)
|
73 |
+
yield yield_message(cmd)
|
74 |
+
process = subprocess.Popen(cmd,
|
75 |
+
stdout=subprocess.PIPE,
|
76 |
+
stderr=subprocess.PIPE,
|
77 |
+
shell=True,
|
78 |
+
text=True)
|
79 |
+
|
80 |
+
stdout_queue = queue.Queue()
|
81 |
+
thread = threading.Thread(target=read_output, args=(process.stdout, stdout_queue))
|
82 |
+
thread.daemon = True
|
83 |
+
thread.start()
|
84 |
+
stderr_queue = queue.Queue()
|
85 |
+
thread = threading.Thread(target=read_output, args=(process.stderr, stderr_queue))
|
86 |
+
thread.daemon = True
|
87 |
+
thread.start()
|
88 |
+
|
89 |
+
while True:
|
90 |
+
print("looping")
|
91 |
+
# Check if there is any output in the queue
|
92 |
+
try:
|
93 |
+
output_stdout = stdout_queue.get_nowait() # Non-blocking get
|
94 |
+
if output_stdout:
|
95 |
+
print(output_stdout)
|
96 |
+
yield yield_message(output_stdout)
|
97 |
+
|
98 |
+
output_stderr = stderr_queue.get_nowait() # Non-blocking get
|
99 |
+
if output_stderr:
|
100 |
+
print(output_stdout)
|
101 |
+
yield yield_message(output_stderr)
|
102 |
+
except queue.Empty:
|
103 |
+
pass # No output available
|
104 |
+
|
105 |
+
# Break the loop if the process has finished
|
106 |
+
if process.poll() is not None:
|
107 |
+
break
|
108 |
+
|
109 |
+
await asyncio.sleep(0.25)
|
110 |
+
|
111 |
+
# Get the return code
|
112 |
+
return_code = process.returncode
|
113 |
+
yield yield_message("(return code:) " + str(return_code))
|
114 |
+
|
115 |
+
# print(f"Successfully downloaded video {video_id}")
|
116 |
+
existing_file_after_download = list(os.listdir(download_folder))
|
117 |
+
# get the difference
|
118 |
+
downloaded_files = [
|
119 |
+
f for f in existing_file_after_download if f not in existing_file_before_download
|
120 |
+
]
|
121 |
+
downloaded_files_path = [
|
122 |
+
os.path.join(download_folder, f) for f in existing_file_after_download if f not in existing_file_before_download
|
123 |
+
]
|
124 |
+
# read file
|
125 |
+
server_file_attach = {}
|
126 |
+
for fp, fn in zip(downloaded_files_path, downloaded_files):
|
127 |
+
with open(fp, "rb") as f:
|
128 |
+
file_bytes = f.read()
|
129 |
+
server_file_attach[fn] = file_bytes
|
130 |
+
|
131 |
+
dsacm = DockerServiceApiComModel(
|
132 |
+
server_message="complete",
|
133 |
+
server_file_attach=server_file_attach,
|
134 |
+
)
|
135 |
+
yield python_obj_to_pickle_file_bytes(dsacm)
|
136 |
+
|
137 |
+
|
138 |
+
@app.post("/stream")
|
139 |
+
async def stream_response(file: UploadFile = File(...)):
|
140 |
+
# read the file in memory, treat it as pickle file, and unpickle it
|
141 |
+
import pickle
|
142 |
+
import io
|
143 |
+
content = await file.read()
|
144 |
+
with io.BytesIO(content) as f:
|
145 |
+
request_obj = pickle.load(f)
|
146 |
+
# process the request_obj
|
147 |
+
return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
|
148 |
+
|
149 |
+
if __name__ == "__main__":
|
150 |
+
import uvicorn
|
151 |
+
uvicorn.run(app, host="127.0.0.1", port=48000)
|
docker_as_a_service/experimental_mods/docker_to_api copy.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import pickle
|
3 |
+
from fastapi import FastAPI, File, UploadFile, HTTPException
|
4 |
+
from fastapi.responses import StreamingResponse
|
5 |
+
from io import BytesIO
|
6 |
+
import subprocess
|
7 |
+
|
8 |
+
app = FastAPI()
|
9 |
+
|
10 |
+
@app.post("/container_task")
|
11 |
+
async def container_task(file: UploadFile = File(...)):
|
12 |
+
# Save the uploaded file to disk
|
13 |
+
input_filepath = "input.pkl"
|
14 |
+
with open(input_filepath, "wb") as f:
|
15 |
+
f.write(await file.read())
|
16 |
+
|
17 |
+
# Process the unpickle_param from the file
|
18 |
+
try:
|
19 |
+
with open(input_filepath, 'rb') as f:
|
20 |
+
unpickle_param = pickle.load(f)
|
21 |
+
except Exception as e:
|
22 |
+
raise HTTPException(status_code=400, detail=f"Failed to unpickle the input file: {str(e)}")
|
23 |
+
|
24 |
+
# Execute the Docker command
|
25 |
+
command = ["docker", "run", "--rm", "bbdown", str(unpickle_param)]
|
26 |
+
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
27 |
+
|
28 |
+
# Stream the output of the command
|
29 |
+
stdout_stream = BytesIO()
|
30 |
+
stderr_stream = BytesIO()
|
31 |
+
|
32 |
+
# Create a generator to stream output
|
33 |
+
def stream_output():
|
34 |
+
while True:
|
35 |
+
output = process.stdout.readline()
|
36 |
+
if output == b"" and process.poll() is not None:
|
37 |
+
break
|
38 |
+
if output:
|
39 |
+
stdout_stream.write(output)
|
40 |
+
|
41 |
+
stderr_output = process.stderr.read()
|
42 |
+
stderr_stream.write(stderr_output)
|
43 |
+
yield ""
|
44 |
+
|
45 |
+
# Return the StreamingResponse for the current output
|
46 |
+
async def response_stream():
|
47 |
+
for _ in stream_output():
|
48 |
+
yield stdout_stream.getvalue()
|
49 |
+
stdout_stream.seek(0) # Rewind for next read
|
50 |
+
stdout_stream.truncate() # Clear for next fill
|
51 |
+
|
52 |
+
# Run the process and wait for completion
|
53 |
+
process.wait()
|
54 |
+
|
55 |
+
# Check for errors
|
56 |
+
if process.returncode != 0:
|
57 |
+
raise HTTPException(status_code=500, detail=f"Docker command failed with error: {stderr_stream.getvalue().decode()}")
|
58 |
+
|
59 |
+
# Create a new pickle file as output
|
60 |
+
output_filepath = "output.pkl"
|
61 |
+
with open(output_filepath, 'wb') as f:
|
62 |
+
f.write(b"Your output data here.") # Replace this with actual output data
|
63 |
+
|
64 |
+
# Return the output file
|
65 |
+
return StreamingResponse(open(output_filepath, "rb"), media_type='application/octet-stream',
|
66 |
+
headers={"Content-Disposition": f"attachment; filename={os.path.basename(output_filepath)}"})
|
67 |
+
|
68 |
+
# To run the application, use: uvicorn your_file_name:app --reload
|
69 |
+
from fastapi import FastAPI
|
70 |
+
from fastapi.responses import StreamingResponse
|
71 |
+
import time
|
72 |
+
import asyncio
|
73 |
+
|
74 |
+
app = FastAPI()
|
75 |
+
|
76 |
+
async def stream_generator():
|
77 |
+
for i in range(10):
|
78 |
+
yield f"Data chunk {i}\n"
|
79 |
+
await asyncio.sleep(1) # Simulating some delay
|
80 |
+
|
81 |
+
@app.get("/stream")
|
82 |
+
async def stream_response():
|
83 |
+
return StreamingResponse(stream_generator(), media_type="text/plain")
|
84 |
+
|
85 |
+
if __name__ == "__main__":
|
86 |
+
import uvicorn
|
87 |
+
uvicorn.run(app, host="127.0.0.1", port=8000)
|
88 |
+
|
89 |
+
|
90 |
+
def client_call(*args, **kwargs):
|
91 |
+
|
92 |
+
result = execute(*args, **kwargs)
|
93 |
+
|
94 |
+
result.text
|
95 |
+
result.file_manifest
|
96 |
+
result.files
|
97 |
+
|
docker_as_a_service/experimental_mods/get_bilibili_resource copy.py
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
|
2 |
+
|
3 |
+
|
4 |
+
def download_bilibili(video_id, only_audio, user_name, chatbot, history):
|
5 |
+
# run : docker run --rm -v $(pwd)/downloads:/downloads bbdown BV1LSSHYXEtv --use-app-api --work-dir /downloads
|
6 |
+
import os
|
7 |
+
import subprocess
|
8 |
+
from toolbox import get_log_folder
|
9 |
+
|
10 |
+
download_folder_rel = get_log_folder(user=user_name, plugin_name="shared")
|
11 |
+
download_folder = os.path.abspath(download_folder_rel)
|
12 |
+
|
13 |
+
# Get list of existing files before download
|
14 |
+
existing_file_before_download = list(os.listdir(download_folder))
|
15 |
+
|
16 |
+
# Construct the docker command
|
17 |
+
cmd = [
|
18 |
+
'docker', 'run', '--rm',
|
19 |
+
'-v', f'{download_folder}:/downloads',
|
20 |
+
'bbdown',
|
21 |
+
video_id,
|
22 |
+
'--use-app-api',
|
23 |
+
'--work-dir', '/downloads'
|
24 |
+
]
|
25 |
+
if only_audio:
|
26 |
+
cmd.append('--audio-only')
|
27 |
+
|
28 |
+
|
29 |
+
# Execute the command
|
30 |
+
result = subprocess.run(cmd, check=True, capture_output=True, text=True)
|
31 |
+
# print(f"Successfully downloaded video {video_id}")
|
32 |
+
existing_file_after_download = list(os.listdir(download_folder))
|
33 |
+
# get the difference
|
34 |
+
downloaded_files = [os.path.join(download_folder_rel, f) for f in existing_file_after_download if f not in existing_file_before_download]
|
35 |
+
|
36 |
+
return downloaded_files
|
37 |
+
|
docker_as_a_service/experimental_mods/get_bilibili_resource.py
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
|
2 |
+
from shared_utils.docker_as_service_api import stream_daas
|
3 |
+
from shared_utils.docker_as_service_api import DockerServiceApiComModel
|
4 |
+
|
5 |
+
def download_bilibili(video_id, only_audio, user_name, chatbot, history):
|
6 |
+
# run : docker run --rm -v $(pwd)/downloads:/downloads bbdown BV1LSSHYXEtv --use-app-api --work-dir /downloads
|
7 |
+
import os
|
8 |
+
import subprocess
|
9 |
+
from toolbox import get_log_folder
|
10 |
+
|
11 |
+
chatbot.append([None, "Processing..."])
|
12 |
+
yield from update_ui(chatbot, history)
|
13 |
+
|
14 |
+
client_command = f'{video_id} --audio-only' if only_audio else video_id
|
15 |
+
server_url = get_conf('DAAS_SERVER_URL')
|
16 |
+
docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
|
17 |
+
save_file_dir = get_log_folder(user_name, plugin_name='media_downloader')
|
18 |
+
for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):
|
19 |
+
status_buf = ""
|
20 |
+
status_buf += "DaaS message: \n\n"
|
21 |
+
status_buf += output_manifest['server_message'].replace('\n', '<br/>')
|
22 |
+
status_buf += "\n\n"
|
23 |
+
status_buf += "DaaS standard error: \n\n"
|
24 |
+
status_buf += output_manifest['server_std_err'].replace('\n', '<br/>')
|
25 |
+
status_buf += "\n\n"
|
26 |
+
status_buf += "DaaS standard output: \n\n"
|
27 |
+
status_buf += output_manifest['server_std_out'].replace('\n', '<br/>')
|
28 |
+
status_buf += "\n\n"
|
29 |
+
status_buf += "DaaS file attach: \n\n"
|
30 |
+
status_buf += str(output_manifest['server_file_attach'])
|
31 |
+
yield from update_ui_lastest_msg(status_buf, chatbot, history)
|
32 |
+
|
33 |
+
return output_manifest['server_file_attach']
|
34 |
+
|
docker_as_a_service/experimental_mods/get_search_kw_api_stop.py
ADDED
@@ -0,0 +1,138 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import re
|
3 |
+
import time
|
4 |
+
import json
|
5 |
+
import random
|
6 |
+
import requests
|
7 |
+
import argparse
|
8 |
+
from loguru import logger
|
9 |
+
from datetime import datetime, timezone, timedelta
|
10 |
+
from typing import List, Dict
|
11 |
+
from tenacity import retry, stop_after_attempt, wait_random
|
12 |
+
|
13 |
+
def update_and_save_data(new_data: List[Dict], filename: str):
|
14 |
+
if os.path.exists(filename):
|
15 |
+
with open(filename, 'r', encoding='utf-8') as f:
|
16 |
+
existing_data = json.load(f)
|
17 |
+
else:
|
18 |
+
existing_data = []
|
19 |
+
|
20 |
+
existing_bvids = set(video['bvid'] for video in existing_data)
|
21 |
+
|
22 |
+
for video in new_data:
|
23 |
+
if video['bvid'] not in existing_bvids:
|
24 |
+
existing_data.append(video)
|
25 |
+
existing_bvids.add(video['bvid'])
|
26 |
+
|
27 |
+
with open(filename, 'w', encoding='utf-8') as f:
|
28 |
+
json.dump(existing_data, f, ensure_ascii=False, indent=4)
|
29 |
+
|
30 |
+
print(f"数据已更新并保存到 {filename}")
|
31 |
+
return existing_data
|
32 |
+
|
33 |
+
def extract_and_combine(text):
|
34 |
+
match = re.search(r'(.*?)<em class="keyword">(.*?)</em>(.*)', text)
|
35 |
+
if match:
|
36 |
+
combined = match.group(1) + match.group(2) + match.group(3)
|
37 |
+
return combined
|
38 |
+
return text
|
39 |
+
|
40 |
+
def convert_timestamp_to_beijing_time(timestamp):
|
41 |
+
utc_time = datetime.fromtimestamp(timestamp, timezone.utc)
|
42 |
+
beijing_time = utc_time + timedelta(hours=8)
|
43 |
+
return beijing_time.strftime('%Y-%m-%d %H:%M:%S')
|
44 |
+
|
45 |
+
def load_headers(config_path):
|
46 |
+
with open(config_path, 'r', encoding='utf-8') as f:
|
47 |
+
config = json.load(f)
|
48 |
+
print(f"已从 {config_path} 加载配置,请求头为:{config}")
|
49 |
+
return config
|
50 |
+
|
51 |
+
|
52 |
+
@retry(stop=stop_after_attempt(3), wait=wait_random(min=1, max=3))
|
53 |
+
def make_api_request(url, headers):
|
54 |
+
response = requests.get(url=url, headers=headers)
|
55 |
+
response.raise_for_status()
|
56 |
+
return response.json()
|
57 |
+
|
58 |
+
def search_videos(keyword, csrf_token, search_type, max_pages=5, output_path=None, config_path='config.json', early_stop=False):
|
59 |
+
url_template = "https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={keyword}&page={page}&order={search_type}&duration=0&tids=0"
|
60 |
+
headers = load_headers(config_path)
|
61 |
+
videos = []
|
62 |
+
existing_bvids = set()
|
63 |
+
|
64 |
+
if early_stop and output_path:
|
65 |
+
output_file = f"search_results_{keyword.replace(' ', '_')}_{search_type}.json"
|
66 |
+
file_path = os.path.join(output_path, output_file)
|
67 |
+
if os.path.exists(file_path):
|
68 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
69 |
+
existing_data = json.load(f)
|
70 |
+
existing_bvids = set(video['bvid'] for video in existing_data)
|
71 |
+
|
72 |
+
for page in range(1, max_pages + 1):
|
73 |
+
url = url_template.format(keyword=keyword, page=page, search_type=search_type)
|
74 |
+
try:
|
75 |
+
data = make_api_request(url, headers)
|
76 |
+
|
77 |
+
if data['code'] != 0:
|
78 |
+
logger.error(f"Error fetching page {page}: {data['message']}")
|
79 |
+
break
|
80 |
+
|
81 |
+
if 'result' not in data['data']:
|
82 |
+
logger.info(f"No more results found on page {page}")
|
83 |
+
break
|
84 |
+
|
85 |
+
result = data['data']['result']
|
86 |
+
|
87 |
+
if not result:
|
88 |
+
logger.info(f"No more results found on page {page}")
|
89 |
+
break
|
90 |
+
|
91 |
+
new_videos = []
|
92 |
+
for video in result:
|
93 |
+
video_data = {
|
94 |
+
'title': extract_and_combine(video['title']),
|
95 |
+
'author': video['author'],
|
96 |
+
'author_id': video['mid'],
|
97 |
+
'bvid': video['bvid'],
|
98 |
+
'播放量': video['play'],
|
99 |
+
'弹幕': video['danmaku'],
|
100 |
+
'评论': video['review'],
|
101 |
+
'点赞': video['favorites'],
|
102 |
+
'发布时间': convert_timestamp_to_beijing_time(video['pubdate']),
|
103 |
+
'视频时长': video['duration'],
|
104 |
+
'tag': video['tag'],
|
105 |
+
'description': video['description']
|
106 |
+
}
|
107 |
+
new_videos.append(video_data)
|
108 |
+
|
109 |
+
new_bvids = set(video['bvid'] for video in new_videos)
|
110 |
+
duplicate_count = len(new_bvids.intersection(existing_bvids))
|
111 |
+
logger.info(f"Page {page}: {duplicate_count} out of {len(new_videos)} videos already exist in the dataset.")
|
112 |
+
|
113 |
+
videos.extend(new_videos)
|
114 |
+
logger.info(f"Collected {len(videos)} videos from {page} pages")
|
115 |
+
time.sleep(random.uniform(1, 3)) # Random delay between 1 and 3 seconds
|
116 |
+
|
117 |
+
except Exception as e:
|
118 |
+
logger.error(f"Error on page {page}: {str(e)}")
|
119 |
+
break
|
120 |
+
|
121 |
+
return videos
|
122 |
+
|
123 |
+
def main():
|
124 |
+
parser = argparse.ArgumentParser(description="Search for videos on Bilibili")
|
125 |
+
parser.add_argument("--keyword", default='天文馆的猫', help="Search keyword")
|
126 |
+
parser.add_argument("--csrf_token", default='40a227fcf12c380d7d3c81af2cd8c5e8', help="CSRF token for authentication")
|
127 |
+
parser.add_argument("--search_type", default='default', choices=['pubdate', 'default', 'stow', 'dm', 'click'], help="Search order type")
|
128 |
+
parser.add_argument("--max_pages", default=1, type=int, help="Maximum number of pages to fetch")
|
129 |
+
parser.add_argument("--output_path", default='search_results', help="Output directory for search results")
|
130 |
+
parser.add_argument("--interval", default=1, type=int, help="Interval in hours between searches")
|
131 |
+
parser.add_argument("--early_stop", default=True, help="Enable early stopping if all videos on a page already exist in the dataset")
|
132 |
+
args = parser.parse_args()
|
133 |
+
|
134 |
+
videos = search_videos(args.keyword, args.csrf_token, args.search_type, args.max_pages, args.output_path, early_stop=args.early_stop)
|
135 |
+
print(videos)
|
136 |
+
|
137 |
+
if __name__ == "__main__":
|
138 |
+
main()
|
docker_as_a_service/experimental_mods/test_docker_to_api.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import pickle
|
3 |
+
import io
|
4 |
+
import os
|
5 |
+
from pydantic import BaseModel, Field
|
6 |
+
from typing import Optional, Dict
|
7 |
+
|
8 |
+
class DockerServiceApiComModel(BaseModel):
|
9 |
+
client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
|
10 |
+
client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
|
11 |
+
server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
12 |
+
server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
13 |
+
server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
|
14 |
+
server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
|
15 |
+
|
16 |
+
def process_received(received: DockerServiceApiComModel, save_file_dir="./daas_output"):
|
17 |
+
# Process the received data
|
18 |
+
if received.server_message:
|
19 |
+
print(f"Recv message: {received.server_message}")
|
20 |
+
if received.server_std_err:
|
21 |
+
print(f"Recv standard error: {received.server_std_err}")
|
22 |
+
if received.server_std_out:
|
23 |
+
print(f"Recv standard output: {received.server_std_out}")
|
24 |
+
if received.server_file_attach:
|
25 |
+
# print(f"Recv file attach: {received.server_file_attach}")
|
26 |
+
for file_name, file_content in received.server_file_attach.items():
|
27 |
+
new_fp = os.path.join(save_file_dir, file_name)
|
28 |
+
new_fp_dir = os.path.dirname(new_fp)
|
29 |
+
if not os.path.exists(new_fp_dir):
|
30 |
+
os.makedirs(new_fp_dir, exist_ok=True)
|
31 |
+
with open(new_fp, 'wb') as f:
|
32 |
+
f.write(file_content)
|
33 |
+
print(f"Saved file attach to {save_file_dir}")
|
34 |
+
|
35 |
+
def send_file_and_stream_response(docker_service_api_com_model, server_url):
|
36 |
+
# Prepare the file
|
37 |
+
# Pickle the object
|
38 |
+
pickled_data = pickle.dumps(docker_service_api_com_model)
|
39 |
+
|
40 |
+
# Create a file-like object from the pickled data
|
41 |
+
file_obj = io.BytesIO(pickled_data)
|
42 |
+
|
43 |
+
# Prepare the file for sending
|
44 |
+
files = {'file': ('docker_service_api_com_model.pkl', file_obj, 'application/octet-stream')}
|
45 |
+
|
46 |
+
# Send the POST request
|
47 |
+
response = requests.post(server_url, files=files, stream=True)
|
48 |
+
|
49 |
+
max_full_package_size = 1024 * 1024 * 1024 * 1 # 1 GB
|
50 |
+
|
51 |
+
# Check if the request was successful
|
52 |
+
if response.status_code == 200:
|
53 |
+
# Process the streaming response
|
54 |
+
for chunk in response.iter_content(max_full_package_size):
|
55 |
+
if chunk:
|
56 |
+
received = pickle.loads(chunk)
|
57 |
+
process_received(received)
|
58 |
+
|
59 |
+
else:
|
60 |
+
print(f"Error: Received status code {response.status_code}")
|
61 |
+
print(response.text)
|
62 |
+
|
63 |
+
# Usage
|
64 |
+
if __name__ == "__main__":
|
65 |
+
server_url = "http://localhost:49000/stream" # Replace with your server URL
|
66 |
+
docker_service_api_com_model = DockerServiceApiComModel(
|
67 |
+
client_command='BV1LSSHYXEtv --audio-only',
|
68 |
+
)
|
69 |
+
send_file_and_stream_response(docker_service_api_com_model, server_url)
|
docker_as_a_service/shared_utils/docker_as_service_api.py
CHANGED
@@ -3,13 +3,13 @@ import pickle
|
|
3 |
import io
|
4 |
import os
|
5 |
from pydantic import BaseModel, Field
|
6 |
-
from typing import Optional, Dict
|
7 |
from loguru import logger
|
8 |
|
9 |
class DockerServiceApiComModel(BaseModel):
|
10 |
client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
|
11 |
client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
|
12 |
-
server_message: Optional[
|
13 |
server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
14 |
server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
|
15 |
server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
|
|
|
3 |
import io
|
4 |
import os
|
5 |
from pydantic import BaseModel, Field
|
6 |
+
from typing import Optional, Dict, Any
|
7 |
from loguru import logger
|
8 |
|
9 |
class DockerServiceApiComModel(BaseModel):
|
10 |
client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
|
11 |
client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
|
12 |
+
server_message: Optional[Any] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
13 |
server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
|
14 |
server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
|
15 |
server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
|