Spaces:

btrunghieu
/

detect-emotions-comment-tiktok

Sleeping

App Files Files Community

btrunghieu commited on Jun 28

Commit

45d9117

•

1 Parent(s): 75392c3

Update app.py

Browse files

Files changed (1) hide show

app.py +7 -2

app.py CHANGED Viewed

@@ -28,12 +28,14 @@ def parse_comments(response: Response) -> Dict:
     try:
         data = json.loads(response.text)
     except json.JSONDecodeError:
         return {"comments": [], "total_comments": 0}
     comments_data = data.get("comments", [])
     total_comments = data.get("total", 0)
     if not comments_data:
         return {"comments": [], "total_comments": total_comments}
     parsed_comments = []
@@ -58,16 +60,19 @@ async def scrape_comments(post_id: int, comments_count: int = 20, max_comments:
         }
         return base_url + urlencode(params)
     first_page = await client.get(form_api_url(0))
     data = parse_comments(first_page)
     comments_data = data["comments"]
     total_comments = data["total_comments"]
     if not comments_data:
         return []
     if max_comments and max_comments < total_comments:
         total_comments = max_comments
     _other_pages = [
         client.get(form_api_url(cursor=cursor))
         for cursor in range(comments_count, total_comments + comments_count, comments_count)
@@ -82,6 +87,8 @@ async def scrape_comments(post_id: int, comments_count: int = 20, max_comments:
         if max_comments and len(comments_data) >= max_comments:
             comments_data = comments_data[:max_comments]
             break
     return comments_data
 class SentimentClassifier(nn.Module):
@@ -172,8 +179,6 @@ model = SentimentClassifier(n_classes=3)
 model.to(device)
 model.load_state_dict(torch.load('phobert_fold1.pth', map_location=torch.device('cpu')))
-tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
 class_names = ['CLEAN', 'OFFENSIVE', 'HATE']

     try:
         data = json.loads(response.text)
     except json.JSONDecodeError:
+        log.error(f"Failed to parse JSON response: {response.text}")
         return {"comments": [], "total_comments": 0}
     comments_data = data.get("comments", [])
     total_comments = data.get("total", 0)
     if not comments_data:
+        log.warning(f"No comments found in response: {response.text}")
         return {"comments": [], "total_comments": total_comments}
     parsed_comments = []
         }
         return base_url + urlencode(params)
+    log.info(f"Scraping comments from post ID: {post_id}")
     first_page = await client.get(form_api_url(0))
     data = parse_comments(first_page)
     comments_data = data["comments"]
     total_comments = data["total_comments"]
     if not comments_data:
+        log.warning(f"No comments found for post ID {post_id}")
         return []
     if max_comments and max_comments < total_comments:
         total_comments = max_comments
+    log.info(f"Scraping comments pagination, remaining {total_comments // comments_count - 1} more pages")
     _other_pages = [
         client.get(form_api_url(cursor=cursor))
         for cursor in range(comments_count, total_comments + comments_count, comments_count)
         if max_comments and len(comments_data) >= max_comments:
             comments_data = comments_data[:max_comments]
             break
+    log.success(f"Scraped {len(comments_data)} comments from post ID {post_id}")
     return comments_data
 class SentimentClassifier(nn.Module):
 model.to(device)
 model.load_state_dict(torch.load('phobert_fold1.pth', map_location=torch.device('cpu')))
 class_names = ['CLEAN', 'OFFENSIVE', 'HATE']