File size: 1,825 Bytes
a8b3f00
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
from flask_restful import Resource, reqparse

from controllers.console import api
from controllers.console.datasets.error import WebsiteCrawlError
from controllers.console.wraps import account_initialization_required, setup_required
from libs.login import login_required
from services.website_service import WebsiteService


class WebsiteCrawlApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument(
            "provider", type=str, choices=["firecrawl", "jinareader"], required=True, nullable=True, location="json"
        )
        parser.add_argument("url", type=str, required=True, nullable=True, location="json")
        parser.add_argument("options", type=dict, required=True, nullable=True, location="json")
        args = parser.parse_args()
        WebsiteService.document_create_args_validate(args)
        # crawl url
        try:
            result = WebsiteService.crawl_url(args)
        except Exception as e:
            raise WebsiteCrawlError(str(e))
        return result, 200


class WebsiteCrawlStatusApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
    def get(self, job_id: str):
        parser = reqparse.RequestParser()
        parser.add_argument("provider", type=str, choices=["firecrawl", "jinareader"], required=True, location="args")
        args = parser.parse_args()
        # get crawl status
        try:
            result = WebsiteService.get_crawl_status(job_id, args["provider"])
        except Exception as e:
            raise WebsiteCrawlError(str(e))
        return result, 200


api.add_resource(WebsiteCrawlApi, "/website/crawl")
api.add_resource(WebsiteCrawlStatusApi, "/website/crawl/status/<string:job_id>")