qingxu98 commited on
Commit
da6e788
·
1 Parent(s): f3a9d55
Dockerfile CHANGED
@@ -5,6 +5,7 @@ RUN apt update && apt-get install -y python3 python3-dev python3-pip
5
  RUN python3 -m pip install fastapi pydantic loguru --break-system-packages
6
  RUN python3 -m pip install requests python-multipart --break-system-packages
7
  RUN python3 -m pip install uvicorn --break-system-packages
 
8
 
9
  # 为了让user用户可以访问/root目录
10
  RUN useradd -m -u 1000 user
 
5
  RUN python3 -m pip install fastapi pydantic loguru --break-system-packages
6
  RUN python3 -m pip install requests python-multipart --break-system-packages
7
  RUN python3 -m pip install uvicorn --break-system-packages
8
+ RUN python3 -m pip install tenacity --break-system-packages
9
 
10
  # 为了让user用户可以访问/root目录
11
  RUN useradd -m -u 1000 user
docker_as_a_service/docker_as_a_service.py CHANGED
@@ -51,14 +51,19 @@ async def stream_generator(request_obj):
51
  # Get list of existing files before download
52
  existing_file_before_download = []
53
 
54
- video_id = request_obj.client_command
55
- cmd = [
56
- '/root/.dotnet/tools/BBDown',
57
- video_id,
58
- '--use-app-api',
59
- '--work-dir',
60
- f'{os.path.abspath(temp_dir)}'
61
- ]
 
 
 
 
 
62
  cmd = ' '.join(cmd)
63
  yield yield_message(cmd)
64
  process = subprocess.Popen(cmd,
@@ -125,6 +130,12 @@ async def stream_generator(request_obj):
125
  yield python_obj_to_pickle_file_bytes(dsacm)
126
 
127
 
 
 
 
 
 
 
128
  @app.post("/stream")
129
  async def stream_response(file: UploadFile = File(...)):
130
  # read the file in memory, treat it as pickle file, and unpickle it
@@ -134,12 +145,46 @@ async def stream_response(file: UploadFile = File(...)):
134
  with io.BytesIO(content) as f:
135
  request_obj = pickle.load(f)
136
  # process the request_obj
 
137
  return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  @app.get("/")
140
  async def hi():
141
  return "Hello, this is Docker as a Service (DaaS)!"
142
-
143
  if __name__ == "__main__":
144
  import uvicorn
145
  uvicorn.run(app, host="0.0.0.0", port=49000)
 
51
  # Get list of existing files before download
52
  existing_file_before_download = []
53
 
54
+ if request_obj.endpoint == "down_bilibili":
55
+ video_id = request_obj.client_command
56
+ cmd = [
57
+ '/root/.dotnet/tools/BBDown',
58
+ video_id,
59
+ '--use-app-api',
60
+ '--work-dir',
61
+ f'{os.path.abspath(temp_dir)}'
62
+ ]
63
+ else:
64
+ assert False, "Invalid endpoint"
65
+
66
+
67
  cmd = ' '.join(cmd)
68
  yield yield_message(cmd)
69
  process = subprocess.Popen(cmd,
 
130
  yield python_obj_to_pickle_file_bytes(dsacm)
131
 
132
 
133
+ def simple_generator(return_obj):
134
+ dsacm = DockerServiceApiComModel(
135
+ server_message=return_obj,
136
+ )
137
+ yield python_obj_to_pickle_file_bytes(dsacm)
138
+
139
  @app.post("/stream")
140
  async def stream_response(file: UploadFile = File(...)):
141
  # read the file in memory, treat it as pickle file, and unpickle it
 
145
  with io.BytesIO(content) as f:
146
  request_obj = pickle.load(f)
147
  # process the request_obj
148
+ request_obj.endpoint = "down_bilibili"
149
  return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
150
 
151
+ @app.post("/search")
152
+ async def stream_response(file: UploadFile = File(...)):
153
+ # read the file in memory, treat it as pickle file, and unpickle it
154
+ import pickle
155
+ import io
156
+ content = await file.read()
157
+ with io.BytesIO(content) as f:
158
+ request_obj = pickle.load(f)
159
+
160
+ # process the request_obj
161
+ keyword = request_obj.client_command
162
+
163
+ from experimental_mods.get_search_kw_api_stop import search_videos
164
+ # Default parameters for video search
165
+ csrf_token = '40a227fcf12c380d7d3c81af2cd8c5e8' # Using default from main()
166
+ search_type = 'default'
167
+ max_pages = 1
168
+ output_path = 'search_results'
169
+ config_path = 'experimental_mods/config.json'
170
+
171
+ # Search for videos and return the first result
172
+ videos = search_videos(
173
+ keyword=keyword,
174
+ csrf_token=csrf_token,
175
+ search_type=search_type,
176
+ max_pages=max_pages,
177
+ output_path=output_path,
178
+ config_path=config_path,
179
+ early_stop=True
180
+ )
181
+
182
+ return StreamingResponse(simple_generator(videos), media_type="application/octet-stream")
183
+
184
  @app.get("/")
185
  async def hi():
186
  return "Hello, this is Docker as a Service (DaaS)!"
187
+
188
  if __name__ == "__main__":
189
  import uvicorn
190
  uvicorn.run(app, host="0.0.0.0", port=49000)
docker_as_a_service/experimental_mods/config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
3
+ "cookie": "buvid3=902A16D7-B19F-790B-FF85-197C38F3227462114infoc; b_nut=1731952262; b_lsid=10DF102ED6_19340664283; _uuid=84EEC61010-CE55-DAC4-68C10-CFA6108F89C8963816infoc; buvid_fp=b2f71cc1058da966a62a2caf13596b1f; buvid4=0C27B28C-406E-B88B-D232-51167891712B62902-024111817-wg%2Bfug1OO8Jl5lXoeCp0dw%3D%3D; enable_web_push=DISABLE; home_feed_column=4; browser_resolution=1313-699; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3MzIyMTE0NjMsImlhdCI6MTczMTk1MjIwMywicGx0IjotMX0.xKiEBdcpGFZy7Qv2wCExcBoRK-LGtvv_wvmCbuDoCN8; bili_ticket_expires=1732211403; CURRENT_FNVAL=4048; sid=5h1fpj7o; rpdid=|(k|kmJk)Y|u0J'u~JumlkkY)"
4
+ }
5
+
6
+
docker_as_a_service/experimental_mods/docker_as_a_service.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DaaS (Docker as a Service) is a service
3
+ that allows users to run docker commands on the server side.
4
+ """
5
+
6
+ from fastapi import FastAPI
7
+ from fastapi.responses import StreamingResponse
8
+ from fastapi import FastAPI, File, UploadFile, HTTPException
9
+ from pydantic import BaseModel, Field
10
+ from typing import Optional, Dict
11
+ import time
12
+ import os
13
+ import asyncio
14
+ import subprocess
15
+ import uuid
16
+ import threading
17
+ import queue
18
+
19
+ app = FastAPI()
20
+
21
+ class DockerServiceApiComModel(BaseModel):
22
+ client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
23
+ client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
24
+ server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
25
+ server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
26
+ server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
27
+ server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
28
+
29
+
30
+ def python_obj_to_pickle_file_bytes(obj):
31
+ import pickle
32
+ import io
33
+ with io.BytesIO() as f:
34
+ pickle.dump(obj, f)
35
+ return f.getvalue()
36
+
37
+ def yield_message(message):
38
+ dsacm = DockerServiceApiComModel(server_message=message)
39
+ return python_obj_to_pickle_file_bytes(dsacm)
40
+
41
+ def read_output(stream, output_queue):
42
+ while True:
43
+ line_stdout = stream.readline()
44
+ print('recv')
45
+ if line_stdout:
46
+ output_queue.put(line_stdout)
47
+ else:
48
+ break
49
+
50
+
51
+ async def stream_generator(request_obj):
52
+ import tempfile
53
+ # Create a temporary directory
54
+ with tempfile.TemporaryDirectory() as temp_dir:
55
+
56
+ # Construct the docker command
57
+ download_folder = temp_dir
58
+
59
+ # Get list of existing files before download
60
+ existing_file_before_download = []
61
+
62
+ video_id = request_obj.client_command
63
+ cmd = [
64
+ # 'docker', 'run', '--rm',
65
+ # '-v', f'{download_folder}:/downloads',
66
+ # 'bbdown',
67
+ # video_id,
68
+ # '--use-app-api',
69
+ # '--work-dir', '/downloads'
70
+ "while true; do date; sleep 1; done"
71
+ ]
72
+ cmd = ' '.join(cmd)
73
+ yield yield_message(cmd)
74
+ process = subprocess.Popen(cmd,
75
+ stdout=subprocess.PIPE,
76
+ stderr=subprocess.PIPE,
77
+ shell=True,
78
+ text=True)
79
+
80
+ stdout_queue = queue.Queue()
81
+ thread = threading.Thread(target=read_output, args=(process.stdout, stdout_queue))
82
+ thread.daemon = True
83
+ thread.start()
84
+ stderr_queue = queue.Queue()
85
+ thread = threading.Thread(target=read_output, args=(process.stderr, stderr_queue))
86
+ thread.daemon = True
87
+ thread.start()
88
+
89
+ while True:
90
+ print("looping")
91
+ # Check if there is any output in the queue
92
+ try:
93
+ output_stdout = stdout_queue.get_nowait() # Non-blocking get
94
+ if output_stdout:
95
+ print(output_stdout)
96
+ yield yield_message(output_stdout)
97
+
98
+ output_stderr = stderr_queue.get_nowait() # Non-blocking get
99
+ if output_stderr:
100
+ print(output_stdout)
101
+ yield yield_message(output_stderr)
102
+ except queue.Empty:
103
+ pass # No output available
104
+
105
+ # Break the loop if the process has finished
106
+ if process.poll() is not None:
107
+ break
108
+
109
+ await asyncio.sleep(0.25)
110
+
111
+ # Get the return code
112
+ return_code = process.returncode
113
+ yield yield_message("(return code:) " + str(return_code))
114
+
115
+ # print(f"Successfully downloaded video {video_id}")
116
+ existing_file_after_download = list(os.listdir(download_folder))
117
+ # get the difference
118
+ downloaded_files = [
119
+ f for f in existing_file_after_download if f not in existing_file_before_download
120
+ ]
121
+ downloaded_files_path = [
122
+ os.path.join(download_folder, f) for f in existing_file_after_download if f not in existing_file_before_download
123
+ ]
124
+ # read file
125
+ server_file_attach = {}
126
+ for fp, fn in zip(downloaded_files_path, downloaded_files):
127
+ with open(fp, "rb") as f:
128
+ file_bytes = f.read()
129
+ server_file_attach[fn] = file_bytes
130
+
131
+ dsacm = DockerServiceApiComModel(
132
+ server_message="complete",
133
+ server_file_attach=server_file_attach,
134
+ )
135
+ yield python_obj_to_pickle_file_bytes(dsacm)
136
+
137
+
138
+ @app.post("/stream")
139
+ async def stream_response(file: UploadFile = File(...)):
140
+ # read the file in memory, treat it as pickle file, and unpickle it
141
+ import pickle
142
+ import io
143
+ content = await file.read()
144
+ with io.BytesIO(content) as f:
145
+ request_obj = pickle.load(f)
146
+ # process the request_obj
147
+ return StreamingResponse(stream_generator(request_obj), media_type="application/octet-stream")
148
+
149
+ if __name__ == "__main__":
150
+ import uvicorn
151
+ uvicorn.run(app, host="127.0.0.1", port=48000)
docker_as_a_service/experimental_mods/docker_to_api copy.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pickle
3
+ from fastapi import FastAPI, File, UploadFile, HTTPException
4
+ from fastapi.responses import StreamingResponse
5
+ from io import BytesIO
6
+ import subprocess
7
+
8
+ app = FastAPI()
9
+
10
+ @app.post("/container_task")
11
+ async def container_task(file: UploadFile = File(...)):
12
+ # Save the uploaded file to disk
13
+ input_filepath = "input.pkl"
14
+ with open(input_filepath, "wb") as f:
15
+ f.write(await file.read())
16
+
17
+ # Process the unpickle_param from the file
18
+ try:
19
+ with open(input_filepath, 'rb') as f:
20
+ unpickle_param = pickle.load(f)
21
+ except Exception as e:
22
+ raise HTTPException(status_code=400, detail=f"Failed to unpickle the input file: {str(e)}")
23
+
24
+ # Execute the Docker command
25
+ command = ["docker", "run", "--rm", "bbdown", str(unpickle_param)]
26
+ process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
27
+
28
+ # Stream the output of the command
29
+ stdout_stream = BytesIO()
30
+ stderr_stream = BytesIO()
31
+
32
+ # Create a generator to stream output
33
+ def stream_output():
34
+ while True:
35
+ output = process.stdout.readline()
36
+ if output == b"" and process.poll() is not None:
37
+ break
38
+ if output:
39
+ stdout_stream.write(output)
40
+
41
+ stderr_output = process.stderr.read()
42
+ stderr_stream.write(stderr_output)
43
+ yield ""
44
+
45
+ # Return the StreamingResponse for the current output
46
+ async def response_stream():
47
+ for _ in stream_output():
48
+ yield stdout_stream.getvalue()
49
+ stdout_stream.seek(0) # Rewind for next read
50
+ stdout_stream.truncate() # Clear for next fill
51
+
52
+ # Run the process and wait for completion
53
+ process.wait()
54
+
55
+ # Check for errors
56
+ if process.returncode != 0:
57
+ raise HTTPException(status_code=500, detail=f"Docker command failed with error: {stderr_stream.getvalue().decode()}")
58
+
59
+ # Create a new pickle file as output
60
+ output_filepath = "output.pkl"
61
+ with open(output_filepath, 'wb') as f:
62
+ f.write(b"Your output data here.") # Replace this with actual output data
63
+
64
+ # Return the output file
65
+ return StreamingResponse(open(output_filepath, "rb"), media_type='application/octet-stream',
66
+ headers={"Content-Disposition": f"attachment; filename={os.path.basename(output_filepath)}"})
67
+
68
+ # To run the application, use: uvicorn your_file_name:app --reload
69
+ from fastapi import FastAPI
70
+ from fastapi.responses import StreamingResponse
71
+ import time
72
+ import asyncio
73
+
74
+ app = FastAPI()
75
+
76
+ async def stream_generator():
77
+ for i in range(10):
78
+ yield f"Data chunk {i}\n"
79
+ await asyncio.sleep(1) # Simulating some delay
80
+
81
+ @app.get("/stream")
82
+ async def stream_response():
83
+ return StreamingResponse(stream_generator(), media_type="text/plain")
84
+
85
+ if __name__ == "__main__":
86
+ import uvicorn
87
+ uvicorn.run(app, host="127.0.0.1", port=8000)
88
+
89
+
90
+ def client_call(*args, **kwargs):
91
+
92
+ result = execute(*args, **kwargs)
93
+
94
+ result.text
95
+ result.file_manifest
96
+ result.files
97
+
docker_as_a_service/experimental_mods/get_bilibili_resource copy.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
2
+
3
+
4
+ def download_bilibili(video_id, only_audio, user_name, chatbot, history):
5
+ # run : docker run --rm -v $(pwd)/downloads:/downloads bbdown BV1LSSHYXEtv --use-app-api --work-dir /downloads
6
+ import os
7
+ import subprocess
8
+ from toolbox import get_log_folder
9
+
10
+ download_folder_rel = get_log_folder(user=user_name, plugin_name="shared")
11
+ download_folder = os.path.abspath(download_folder_rel)
12
+
13
+ # Get list of existing files before download
14
+ existing_file_before_download = list(os.listdir(download_folder))
15
+
16
+ # Construct the docker command
17
+ cmd = [
18
+ 'docker', 'run', '--rm',
19
+ '-v', f'{download_folder}:/downloads',
20
+ 'bbdown',
21
+ video_id,
22
+ '--use-app-api',
23
+ '--work-dir', '/downloads'
24
+ ]
25
+ if only_audio:
26
+ cmd.append('--audio-only')
27
+
28
+
29
+ # Execute the command
30
+ result = subprocess.run(cmd, check=True, capture_output=True, text=True)
31
+ # print(f"Successfully downloaded video {video_id}")
32
+ existing_file_after_download = list(os.listdir(download_folder))
33
+ # get the difference
34
+ downloaded_files = [os.path.join(download_folder_rel, f) for f in existing_file_after_download if f not in existing_file_before_download]
35
+
36
+ return downloaded_files
37
+
docker_as_a_service/experimental_mods/get_bilibili_resource.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from toolbox import update_ui, get_conf, promote_file_to_downloadzone, update_ui_lastest_msg, generate_file_link
2
+ from shared_utils.docker_as_service_api import stream_daas
3
+ from shared_utils.docker_as_service_api import DockerServiceApiComModel
4
+
5
+ def download_bilibili(video_id, only_audio, user_name, chatbot, history):
6
+ # run : docker run --rm -v $(pwd)/downloads:/downloads bbdown BV1LSSHYXEtv --use-app-api --work-dir /downloads
7
+ import os
8
+ import subprocess
9
+ from toolbox import get_log_folder
10
+
11
+ chatbot.append([None, "Processing..."])
12
+ yield from update_ui(chatbot, history)
13
+
14
+ client_command = f'{video_id} --audio-only' if only_audio else video_id
15
+ server_url = get_conf('DAAS_SERVER_URL')
16
+ docker_service_api_com_model = DockerServiceApiComModel(client_command=client_command)
17
+ save_file_dir = get_log_folder(user_name, plugin_name='media_downloader')
18
+ for output_manifest in stream_daas(docker_service_api_com_model, server_url, save_file_dir):
19
+ status_buf = ""
20
+ status_buf += "DaaS message: \n\n"
21
+ status_buf += output_manifest['server_message'].replace('\n', '<br/>')
22
+ status_buf += "\n\n"
23
+ status_buf += "DaaS standard error: \n\n"
24
+ status_buf += output_manifest['server_std_err'].replace('\n', '<br/>')
25
+ status_buf += "\n\n"
26
+ status_buf += "DaaS standard output: \n\n"
27
+ status_buf += output_manifest['server_std_out'].replace('\n', '<br/>')
28
+ status_buf += "\n\n"
29
+ status_buf += "DaaS file attach: \n\n"
30
+ status_buf += str(output_manifest['server_file_attach'])
31
+ yield from update_ui_lastest_msg(status_buf, chatbot, history)
32
+
33
+ return output_manifest['server_file_attach']
34
+
docker_as_a_service/experimental_mods/get_search_kw_api_stop.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ import json
5
+ import random
6
+ import requests
7
+ import argparse
8
+ from loguru import logger
9
+ from datetime import datetime, timezone, timedelta
10
+ from typing import List, Dict
11
+ from tenacity import retry, stop_after_attempt, wait_random
12
+
13
+ def update_and_save_data(new_data: List[Dict], filename: str):
14
+ if os.path.exists(filename):
15
+ with open(filename, 'r', encoding='utf-8') as f:
16
+ existing_data = json.load(f)
17
+ else:
18
+ existing_data = []
19
+
20
+ existing_bvids = set(video['bvid'] for video in existing_data)
21
+
22
+ for video in new_data:
23
+ if video['bvid'] not in existing_bvids:
24
+ existing_data.append(video)
25
+ existing_bvids.add(video['bvid'])
26
+
27
+ with open(filename, 'w', encoding='utf-8') as f:
28
+ json.dump(existing_data, f, ensure_ascii=False, indent=4)
29
+
30
+ print(f"数据已更新并保存到 {filename}")
31
+ return existing_data
32
+
33
+ def extract_and_combine(text):
34
+ match = re.search(r'(.*?)<em class="keyword">(.*?)</em>(.*)', text)
35
+ if match:
36
+ combined = match.group(1) + match.group(2) + match.group(3)
37
+ return combined
38
+ return text
39
+
40
+ def convert_timestamp_to_beijing_time(timestamp):
41
+ utc_time = datetime.fromtimestamp(timestamp, timezone.utc)
42
+ beijing_time = utc_time + timedelta(hours=8)
43
+ return beijing_time.strftime('%Y-%m-%d %H:%M:%S')
44
+
45
+ def load_headers(config_path):
46
+ with open(config_path, 'r', encoding='utf-8') as f:
47
+ config = json.load(f)
48
+ print(f"已从 {config_path} 加载配置,请求头为:{config}")
49
+ return config
50
+
51
+
52
+ @retry(stop=stop_after_attempt(3), wait=wait_random(min=1, max=3))
53
+ def make_api_request(url, headers):
54
+ response = requests.get(url=url, headers=headers)
55
+ response.raise_for_status()
56
+ return response.json()
57
+
58
+ def search_videos(keyword, csrf_token, search_type, max_pages=5, output_path=None, config_path='config.json', early_stop=False):
59
+ url_template = "https://api.bilibili.com/x/web-interface/search/type?search_type=video&keyword={keyword}&page={page}&order={search_type}&duration=0&tids=0"
60
+ headers = load_headers(config_path)
61
+ videos = []
62
+ existing_bvids = set()
63
+
64
+ if early_stop and output_path:
65
+ output_file = f"search_results_{keyword.replace(' ', '_')}_{search_type}.json"
66
+ file_path = os.path.join(output_path, output_file)
67
+ if os.path.exists(file_path):
68
+ with open(file_path, 'r', encoding='utf-8') as f:
69
+ existing_data = json.load(f)
70
+ existing_bvids = set(video['bvid'] for video in existing_data)
71
+
72
+ for page in range(1, max_pages + 1):
73
+ url = url_template.format(keyword=keyword, page=page, search_type=search_type)
74
+ try:
75
+ data = make_api_request(url, headers)
76
+
77
+ if data['code'] != 0:
78
+ logger.error(f"Error fetching page {page}: {data['message']}")
79
+ break
80
+
81
+ if 'result' not in data['data']:
82
+ logger.info(f"No more results found on page {page}")
83
+ break
84
+
85
+ result = data['data']['result']
86
+
87
+ if not result:
88
+ logger.info(f"No more results found on page {page}")
89
+ break
90
+
91
+ new_videos = []
92
+ for video in result:
93
+ video_data = {
94
+ 'title': extract_and_combine(video['title']),
95
+ 'author': video['author'],
96
+ 'author_id': video['mid'],
97
+ 'bvid': video['bvid'],
98
+ '播放量': video['play'],
99
+ '弹幕': video['danmaku'],
100
+ '评论': video['review'],
101
+ '点赞': video['favorites'],
102
+ '发布时间': convert_timestamp_to_beijing_time(video['pubdate']),
103
+ '视频时长': video['duration'],
104
+ 'tag': video['tag'],
105
+ 'description': video['description']
106
+ }
107
+ new_videos.append(video_data)
108
+
109
+ new_bvids = set(video['bvid'] for video in new_videos)
110
+ duplicate_count = len(new_bvids.intersection(existing_bvids))
111
+ logger.info(f"Page {page}: {duplicate_count} out of {len(new_videos)} videos already exist in the dataset.")
112
+
113
+ videos.extend(new_videos)
114
+ logger.info(f"Collected {len(videos)} videos from {page} pages")
115
+ time.sleep(random.uniform(1, 3)) # Random delay between 1 and 3 seconds
116
+
117
+ except Exception as e:
118
+ logger.error(f"Error on page {page}: {str(e)}")
119
+ break
120
+
121
+ return videos
122
+
123
+ def main():
124
+ parser = argparse.ArgumentParser(description="Search for videos on Bilibili")
125
+ parser.add_argument("--keyword", default='天文馆的猫', help="Search keyword")
126
+ parser.add_argument("--csrf_token", default='40a227fcf12c380d7d3c81af2cd8c5e8', help="CSRF token for authentication")
127
+ parser.add_argument("--search_type", default='default', choices=['pubdate', 'default', 'stow', 'dm', 'click'], help="Search order type")
128
+ parser.add_argument("--max_pages", default=1, type=int, help="Maximum number of pages to fetch")
129
+ parser.add_argument("--output_path", default='search_results', help="Output directory for search results")
130
+ parser.add_argument("--interval", default=1, type=int, help="Interval in hours between searches")
131
+ parser.add_argument("--early_stop", default=True, help="Enable early stopping if all videos on a page already exist in the dataset")
132
+ args = parser.parse_args()
133
+
134
+ videos = search_videos(args.keyword, args.csrf_token, args.search_type, args.max_pages, args.output_path, early_stop=args.early_stop)
135
+ print(videos)
136
+
137
+ if __name__ == "__main__":
138
+ main()
docker_as_a_service/experimental_mods/test_docker_to_api.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import pickle
3
+ import io
4
+ import os
5
+ from pydantic import BaseModel, Field
6
+ from typing import Optional, Dict
7
+
8
+ class DockerServiceApiComModel(BaseModel):
9
+ client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
10
+ client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
11
+ server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
12
+ server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
13
+ server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
14
+ server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
15
+
16
+ def process_received(received: DockerServiceApiComModel, save_file_dir="./daas_output"):
17
+ # Process the received data
18
+ if received.server_message:
19
+ print(f"Recv message: {received.server_message}")
20
+ if received.server_std_err:
21
+ print(f"Recv standard error: {received.server_std_err}")
22
+ if received.server_std_out:
23
+ print(f"Recv standard output: {received.server_std_out}")
24
+ if received.server_file_attach:
25
+ # print(f"Recv file attach: {received.server_file_attach}")
26
+ for file_name, file_content in received.server_file_attach.items():
27
+ new_fp = os.path.join(save_file_dir, file_name)
28
+ new_fp_dir = os.path.dirname(new_fp)
29
+ if not os.path.exists(new_fp_dir):
30
+ os.makedirs(new_fp_dir, exist_ok=True)
31
+ with open(new_fp, 'wb') as f:
32
+ f.write(file_content)
33
+ print(f"Saved file attach to {save_file_dir}")
34
+
35
+ def send_file_and_stream_response(docker_service_api_com_model, server_url):
36
+ # Prepare the file
37
+ # Pickle the object
38
+ pickled_data = pickle.dumps(docker_service_api_com_model)
39
+
40
+ # Create a file-like object from the pickled data
41
+ file_obj = io.BytesIO(pickled_data)
42
+
43
+ # Prepare the file for sending
44
+ files = {'file': ('docker_service_api_com_model.pkl', file_obj, 'application/octet-stream')}
45
+
46
+ # Send the POST request
47
+ response = requests.post(server_url, files=files, stream=True)
48
+
49
+ max_full_package_size = 1024 * 1024 * 1024 * 1 # 1 GB
50
+
51
+ # Check if the request was successful
52
+ if response.status_code == 200:
53
+ # Process the streaming response
54
+ for chunk in response.iter_content(max_full_package_size):
55
+ if chunk:
56
+ received = pickle.loads(chunk)
57
+ process_received(received)
58
+
59
+ else:
60
+ print(f"Error: Received status code {response.status_code}")
61
+ print(response.text)
62
+
63
+ # Usage
64
+ if __name__ == "__main__":
65
+ server_url = "http://localhost:49000/stream" # Replace with your server URL
66
+ docker_service_api_com_model = DockerServiceApiComModel(
67
+ client_command='BV1LSSHYXEtv --audio-only',
68
+ )
69
+ send_file_and_stream_response(docker_service_api_com_model, server_url)
docker_as_a_service/shared_utils/docker_as_service_api.py CHANGED
@@ -3,13 +3,13 @@ import pickle
3
  import io
4
  import os
5
  from pydantic import BaseModel, Field
6
- from typing import Optional, Dict
7
  from loguru import logger
8
 
9
  class DockerServiceApiComModel(BaseModel):
10
  client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
11
  client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
12
- server_message: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
13
  server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
14
  server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
15
  server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")
 
3
  import io
4
  import os
5
  from pydantic import BaseModel, Field
6
+ from typing import Optional, Dict, Any
7
  from loguru import logger
8
 
9
  class DockerServiceApiComModel(BaseModel):
10
  client_command: Optional[str] = Field(default=None, title="Client command", description="The command to be executed on the client side")
11
  client_file_attach: Optional[dict] = Field(default=None, title="Client file attach", description="The file to be attached to the client side")
12
+ server_message: Optional[Any] = Field(default=None, title="Server standard error", description="The standard error from the server side")
13
  server_std_err: Optional[str] = Field(default=None, title="Server standard error", description="The standard error from the server side")
14
  server_std_out: Optional[str] = Field(default=None, title="Server standard output", description="The standard output from the server side")
15
  server_file_attach: Optional[dict] = Field(default=None, title="Server file attach", description="The file to be attached to the server side")