Spaces:

elopezlopez
/

socialboost

Sleeping

App Files Files

ezequiellopez commited on Apr 17, 2024

Commit

c8df78e

1 Parent(s): c6dd11e

debugging integration tests

Browse files

Files changed (3) hide show

app/app.py +2 -9
app/modules/classify.py +45 -5
app/modules/redistribute.py +18 -3

app/app.py CHANGED Viewed

@@ -3,9 +3,8 @@ from fastapi import FastAPI, HTTPException
 #import redis
 from dotenv import load_dotenv
 import os
-import torch
-from modules.redistribute import redistribute, insert_element_at_position
 #from modules.models.api import Input, Output, NewItem, UUID
 from modules.database import BoostDatabase, UserDatabase, User
 from _models.request import RankingRequest
@@ -19,10 +18,6 @@ load_dotenv('../.env')
 redis_port = os.getenv("REDIS_PORT")
 fastapi_port = os.getenv("FASTAPI_PORT")
-print(f"Is CUDA available: {torch.cuda.is_available()}")
-#print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
 #print("Redis port:", redis_port)
 print("FastAPI port:", fastapi_port)
@@ -47,10 +42,8 @@ async def rerank_items(input_data: RankingRequest) -> RankingResponse:
     # TODO consider sampling them?
     print(items)
-    reranked_ids, first_topic, insertion_pos = redistribute(items=items)
     #reranked_ids = [ for id_ in reranked_ids]
-    print("here!")
-    print(reranked_ids)
     user_in_db = user_db.get_user(user_id=user)

 #import redis
 from dotenv import load_dotenv
 import os
+from modules.redistribute import redistribute, insert_element_at_position, handle_text_content
 #from modules.models.api import Input, Output, NewItem, UUID
 from modules.database import BoostDatabase, UserDatabase, User
 from _models.request import RankingRequest
 redis_port = os.getenv("REDIS_PORT")
 fastapi_port = os.getenv("FASTAPI_PORT")
 #print("Redis port:", redis_port)
 print("FastAPI port:", fastapi_port)
     # TODO consider sampling them?
     print(items)
+    reranked_ids, first_topic, insertion_pos = redistribute(platform=platform, items=items)
     #reranked_ids = [ for id_ in reranked_ids]
     user_in_db = user_db.get_user(user_id=user)

app/modules/classify.py CHANGED Viewed

@@ -1,10 +1,17 @@
 from transformers import pipeline
 from typing import List
-#model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
-model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9", device=0)
 label_map = {
     "something else": "non-civic",
@@ -13,6 +20,7 @@ label_map = {
     "health are and public health": "health",
     "religious": "news" # CONSCIOUS DECISION
 }
 def map_scores(predicted_labels: List[dict], default_label: str):
     mapped_scores = [item['scores'][0] if item['labels'][0]!= default_label else 0 for item in predicted_labels]
@@ -26,7 +34,39 @@ def get_first_relevant_label(predicted_labels, mapped_scores: List[float], defau
 def classify(texts: List[str], labels: List[str]):
-    predicted_labels = model(texts, labels, multi_label=False)
     print(predicted_labels)
     return predicted_labels

 from transformers import pipeline
 from typing import List
+try:
+    import torch
+    print(f"Is CUDA available: {torch.cuda.is_available()}")
+    print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
+    device = 0
+except:
+    print("No GPU available, running on CPU")
+    device = None
+#model = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
+model = pipeline("zero-shot-classification", model="valhalla/distilbart-mnli-12-9", device=device)
 label_map = {
     "something else": "non-civic",
     "health are and public health": "health",
     "religious": "news" # CONSCIOUS DECISION
 }
+default_label = "something else"
 def map_scores(predicted_labels: List[dict], default_label: str):
     mapped_scores = [item['scores'][0] if item['labels'][0]!= default_label else 0 for item in predicted_labels]
 def classify(texts: List[str], labels: List[str]):
+    predicted_labels = model(texts, labels, multi_label=False, batch_size=16)
     print(predicted_labels)
     return predicted_labels
+def classify(texts: List[str], labels: List[str]):
+    results = []
+    # Lists to hold texts and indices for model processing
+    model_texts = []
+    model_indices = []
+    # Iterate through each text to check for special cases
+    for index, text in enumerate(texts):
+        if text == "NON-VALID":
+            # If text is "X", directly assign the label and score
+            results.append({
+                "sequence": text,
+                "labels": [default_label],  # Assuming the first label is the correct one for "X"
+                "scores": [1.0]  # Assign a full score
+            })
+        else:
+            # Otherwise, prepare for model processing
+            model_texts.append(text)
+            model_indices.append(index)
+    if model_texts:
+        # Process texts through the model if there are any
+        predicted_labels = model(model_texts, labels, multi_label=False, batch_size=16)
+        # Insert model results into the correct positions
+        for pred, idx in zip(predicted_labels, model_indices):
+            results.insert(idx, pred)
+    print(results)
+    return results

app/modules/redistribute.py CHANGED Viewed

@@ -4,12 +4,27 @@ from modules.classify import classify, map_scores, get_first_relevant_label
 labels = ["something else", "headlines, news channels, news articles, breaking news", "politics, policy and politicians", "health care and public health", "religious"]
-def redistribute(items):
-    predicted_labels = classify(texts=[item.text for item in items], labels=labels)
     mapped_scores = map_scores(predicted_labels=predicted_labels, default_label="something else")
     first_topic, insertion_pos = get_first_relevant_label(predicted_labels=predicted_labels, mapped_scores=mapped_scores, default_label="something else")
     # TODO include parent linking
-    print("OK?")
     reranked_ids, _ = distribute_evenly(ids=[item.id for item in items], scores=mapped_scores)
     print(reranked_ids)
     return reranked_ids, first_topic, insertion_pos

 labels = ["something else", "headlines, news channels, news articles, breaking news", "politics, policy and politicians", "health care and public health", "religious"]
+def handle_text_content(platform, items):
+    texts = []
+    for item in items:
+        if platform == "reddit" and item.title:
+            text = item.title +"\n"+ item.text
+        else:
+            text = item.text
+        if len(text) <=5:
+            text = "NON-VALID"
+        texts.append(text)
+    return texts
+def redistribute(platform, items):
+    predicted_labels = classify(texts=handle_text_content(platform=platform, items=items), labels=labels)
     mapped_scores = map_scores(predicted_labels=predicted_labels, default_label="something else")
     first_topic, insertion_pos = get_first_relevant_label(predicted_labels=predicted_labels, mapped_scores=mapped_scores, default_label="something else")
     # TODO include parent linking
+    print("OK--", predicted_labels)
     reranked_ids, _ = distribute_evenly(ids=[item.id for item in items], scores=mapped_scores)
     print(reranked_ids)
     return reranked_ids, first_topic, insertion_pos