btrunghieu commited on
Commit
45d9117
1 Parent(s): 75392c3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -28,12 +28,14 @@ def parse_comments(response: Response) -> Dict:
28
  try:
29
  data = json.loads(response.text)
30
  except json.JSONDecodeError:
 
31
  return {"comments": [], "total_comments": 0}
32
 
33
  comments_data = data.get("comments", [])
34
  total_comments = data.get("total", 0)
35
 
36
  if not comments_data:
 
37
  return {"comments": [], "total_comments": total_comments}
38
 
39
  parsed_comments = []
@@ -58,16 +60,19 @@ async def scrape_comments(post_id: int, comments_count: int = 20, max_comments:
58
  }
59
  return base_url + urlencode(params)
60
 
 
61
  first_page = await client.get(form_api_url(0))
62
  data = parse_comments(first_page)
63
  comments_data = data["comments"]
64
  total_comments = data["total_comments"]
65
 
66
  if not comments_data:
 
67
  return []
68
  if max_comments and max_comments < total_comments:
69
  total_comments = max_comments
70
 
 
71
  _other_pages = [
72
  client.get(form_api_url(cursor=cursor))
73
  for cursor in range(comments_count, total_comments + comments_count, comments_count)
@@ -82,6 +87,8 @@ async def scrape_comments(post_id: int, comments_count: int = 20, max_comments:
82
  if max_comments and len(comments_data) >= max_comments:
83
  comments_data = comments_data[:max_comments]
84
  break
 
 
85
  return comments_data
86
 
87
  class SentimentClassifier(nn.Module):
@@ -172,8 +179,6 @@ model = SentimentClassifier(n_classes=3)
172
  model.to(device)
173
  model.load_state_dict(torch.load('phobert_fold1.pth', map_location=torch.device('cpu')))
174
 
175
- tokenizer = AutoTokenizer.from_pretrained("vinai/phobert-base")
176
-
177
  class_names = ['CLEAN', 'OFFENSIVE', 'HATE']
178
 
179
 
 
28
  try:
29
  data = json.loads(response.text)
30
  except json.JSONDecodeError:
31
+ log.error(f"Failed to parse JSON response: {response.text}")
32
  return {"comments": [], "total_comments": 0}
33
 
34
  comments_data = data.get("comments", [])
35
  total_comments = data.get("total", 0)
36
 
37
  if not comments_data:
38
+ log.warning(f"No comments found in response: {response.text}")
39
  return {"comments": [], "total_comments": total_comments}
40
 
41
  parsed_comments = []
 
60
  }
61
  return base_url + urlencode(params)
62
 
63
+ log.info(f"Scraping comments from post ID: {post_id}")
64
  first_page = await client.get(form_api_url(0))
65
  data = parse_comments(first_page)
66
  comments_data = data["comments"]
67
  total_comments = data["total_comments"]
68
 
69
  if not comments_data:
70
+ log.warning(f"No comments found for post ID {post_id}")
71
  return []
72
  if max_comments and max_comments < total_comments:
73
  total_comments = max_comments
74
 
75
+ log.info(f"Scraping comments pagination, remaining {total_comments // comments_count - 1} more pages")
76
  _other_pages = [
77
  client.get(form_api_url(cursor=cursor))
78
  for cursor in range(comments_count, total_comments + comments_count, comments_count)
 
87
  if max_comments and len(comments_data) >= max_comments:
88
  comments_data = comments_data[:max_comments]
89
  break
90
+
91
+ log.success(f"Scraped {len(comments_data)} comments from post ID {post_id}")
92
  return comments_data
93
 
94
  class SentimentClassifier(nn.Module):
 
179
  model.to(device)
180
  model.load_state_dict(torch.load('phobert_fold1.pth', map_location=torch.device('cpu')))
181
 
 
 
182
  class_names = ['CLEAN', 'OFFENSIVE', 'HATE']
183
 
184