Spaces:

joao-oak
/

classificacao-media-pt

Sleeping

App Files Files Community

joao-oak commited on Jun 14

Commit

b97b213

1 Parent(s): e9d1ac9

initial commit

Browse files

Files changed (29) hide show

.gitattributes +2 -0
Dockerfile +26 -0
app.py +943 -0
demo_data.csv +3 -0
logos/bombeiros.png +3 -0
logos/cm.jpg +0 -0
logos/default.png +3 -0
logos/direita.png +3 -0
logos/dn.png +3 -0
logos/expresso.png +3 -0
logos/extra.png +3 -0
logos/jn.png +3 -0
logos/lusa.jpg +0 -0
logos/magazine.png +3 -0
logos/negocios.png +3 -0
logos/publico.png +3 -0
logos/sic.png +3 -0
logos/tsf.png +3 -0
logos/tugapress.png +3 -0
logos/x.png +3 -0
objectivity_db.csv +3 -0
political_db.csv +3 -0
reliability_db.csv +3 -0
requirements.txt +25 -0
scores_ari.npy +3 -0
scores_fk.npy +3 -0
scores_fre.npy +3 -0
scores_g.npy +3 -0
utils.py +248 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.csv filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text

Dockerfile ADDED Viewed

	@@ -0,0 +1,26 @@

+FROM python:3.10-slim
+# Set environment variables
+ENV PORT=7860
+ENV TRANSFORMERS_CACHE=/app/models
+ENV HF_HOME=/app/models
+# Create app and model directories
+WORKDIR /code
+RUN mkdir -p /app/models && chmod -R 777 /app/models
+# Install dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Pre-download the SentenceTransformer model
+RUN python3 -c "from sentence_transformers import SentenceTransformer; SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')"
+# Copy the rest of the app code
+COPY . .
+# Expose port
+EXPOSE $PORT
+# Run the app
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,943 @@

+import dash
+from dash import dcc, html, Input, Output, State
+import dash_bootstrap_components as dbc
+import plotly.express as px
+import pandas as pd
+import dash_daq as daq
+import plotly.graph_objects as go
+import base64
+from openai import OpenAI
+from dash.exceptions import PreventUpdate
+import os
+# os.environ["TRANSFORMERS_CACHE"] = "/app/models"
+# os.environ["HF_HOME"] = "/app/models"
+import utils as u
+import requests
+import bs4
+import re
+from langchain_huggingface import HuggingFaceEmbeddings
+info_text = """
+### Funcionalidades:
+- A secção das definições permite-lhe escolher o tipo gráfico de dispersão: 1D, 2D, ou 3D.
+- A ferramenta classifica artigos consoante quatro eixos: viés político, fiabilidade, objetividade e legibilidade.
+- Todos os eixos podem ser escolhidos na secção das definições, bem como o seu intervalo de valores.
+- Um filtro de fontes está também disponível para melhor análise.
+- Na última parte da secção, um botão permite agrupar os dados e calcular a média por fonte, em vez dos dados de nível de artigo.
+- O gráfico em si oferece duas funções interativas:
+    - Dados específicos da notícia são mostrados com a passagem do rato por cima do respetivo ponto.
+    - Ao clicar num ponto, o corpo do artigo é exibido numa janela pop-up.
+- Por baixo do gráfico, um artigo pode ser adicionado para classificação em tempo real através de um ficheiro .txt ou upload de URL. Certifique-se de que qualquer artigo de uma URL é de uma fontes indicadas e não está bloqueado a subscrição.
+### Classificação:
+- A classificação de cada eixo é realizada usando um modelo de linguagem de grande escala (LLM).
+- O modelo gera um descritor para cada artigo e para cada eixo.
+- Exemplos de descritores podem ser encontrados nos dados de passagem dos pontos já presentes no gráfico.
+- O descritor gerado para cada eixo é então comparado com uma base de dados de descritores obtidos a partir de um conjunto de dados etiquetados.
+- Com base na similaridade com os descritores na base de dados, o valor do eixo é calculado para cada eixo de forma independente.
+- Para o eixo de legibilidade, o valor é obtido usando métricas de legibilidade estabelecidas e mapeando-as para a escala do eixo.
+"""
+# Dash app
+app = dash.Dash(__name__, title='Media Bias Chart', external_stylesheets=[dbc.themes.BOOTSTRAP])
+server = app.server
+data = pd.read_csv("./demo_data.csv")
+data = data[["Título", "Texto", "Fonte", "Descritor de Viés Político", "Descritor de Fiabilidade", "Descritor de Objetividade", "Viés Político", "Fiabilidade", "Objetividade", "Legibilidade"]]
+embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
+political_bias_db, reliability_db, objectivity_db = u.setup_db()
+app.layout = html.Div([
+    dcc.Store(id="data-store", data=data.to_dict("records"), storage_type="session"),
+    html.Div([
+        # dcc.Store(id='news-data', data=data.to_dict('records')),
+        html.H2("Definições", style={'textAlign': 'center'}),
+        # Dropdown for selecting chart type
+        html.Label("Selecione o tipo de gráfico:"),
+        dcc.Dropdown(
+            id="chart-type",
+            options=[
+                {"label": "Gráfico de Dispersão 1D", "value": "1D"},
+                {"label": "Gráfico de Dispersão 2D", "value": "2D"},
+                {"label": "Gráfico de Dispersão 3D", "value": "3D"}
+            ],
+            value="2D",
+            clearable=False,
+            style={'marginBottom': '40px'}
+        ),
+        # Dropdowns for selecting axes
+        html.Div([
+            html.Label("Selecione o eixo X:"),
+            dcc.Dropdown(
+                id="x-axis",
+                options=[{"label": col, "value": col} for col in data.columns[6:]],
+                value="Viés Político",
+                clearable=False,
+                style={'marginBottom': '10px'},
+            ),
+            dcc.RangeSlider(
+                id="x-filter",
+                min=-100,
+                max=100,
+                step=5,
+                marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'},
+                value=[-100, 100],
+                allowCross=False,
+                className="range-slider"
+            )
+        ], id="x-axis-container", style={'marginBottom': '20px'}),
+        html.Div([
+            html.Label("Selecione o eixo Y:"),
+            dcc.Dropdown(
+                id="y-axis",
+                options=[{"label": col, "value": col} for col in data.columns[6:]],
+                value="Fiabilidade",
+                clearable=False,
+                style={'marginBottom': '10px'},
+            ),
+            dcc.RangeSlider(
+                id="y-filter",
+                min=-100,
+                max=100,
+                step=5,
+                marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'},
+                value=[-100, 100],
+                allowCross=False,
+                className="range-slider"
+            )
+        ], id="y-axis-container", style={'marginBottom': '20px'}),
+        html.Div([
+            html.Label("Selecione o eixo Z:"),
+            dcc.Dropdown(
+                id="z-axis",
+                options=[{"label": col, "value": col} for col in data.columns[6:]],
+                value="Objetividade",
+                clearable=False,
+                style={'marginBottom': '10px'},
+            ),
+            dcc.RangeSlider(
+                id="z-filter",
+                min=-100,
+                max=100,
+                step=5,
+                marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'},
+                value=[-100, 100],
+                allowCross=False,
+                className="range-slider"
+            )
+        ], id="z-axis-container", style={'marginBottom': '30px'}),
+        # Checkbox filter for sources
+        html.Label("Filtrar por Fonte:"),
+        dcc.Dropdown(
+            id="source-filter",
+            options=([{"label": "Selecionar Todos", "value": "ALL"}] +
+                    [{"label": src, "value": src} for src in data["Fonte"].unique()]),
+            value=data["Fonte"].unique().tolist(),
+            multi=True,
+            placeholder="Selectione a fonte...",
+            style={'font-family' : 'Arial', 'marginBottom': '20px'}
+        ),
+        # Toggle button to group by source
+        daq.ToggleSwitch(
+            id="group-toggle",
+            label="Agrupar por fonte (Média)",
+            size=60
+        ),
+    ],
+        style={
+        'width': '25%', 'padding': '20px', 'backgroundColor': '#f8f9fa',
+        'position': 'fixed', 'height': '100vh', 'overflowY': 'auto', 'font-family': 'Calibri'
+        }
+    ),
+    html.Div([
+        html.Div(
+            dbc.Button("Informação", id="open-info", size="lg", n_clicks=0,
+                       style={
+                        "backgroundColor": "#E5ECF6",
+                        "color": "black",
+                        "border": "none",
+                        # "font-weight": "bold"
+                    }),
+            className="d-flex justify-content-end"
+        ),
+        dbc.Modal(
+            [
+                # dbc.ModalHeader(dbc.ModalTitle("How to Use This App")),
+                dbc.ModalBody(dcc.Markdown(info_text)),
+                dbc.ModalFooter(
+                    dbc.Button("Fechar", id="close-info", className="ms-auto", n_clicks=0)
+                ),
+            ],
+            id="modal-info",
+            is_open=False,
+        ),
+        # Graph
+        html.H1("Portuguese Media Charts", style={'textAlign': 'center', 'font-family': 'Calibri', 'font-weight': 'bold'}),
+        dcc.Graph(id="news-plot", style={'height': '800px'}),
+        dbc.Modal(
+            [
+                dbc.ModalHeader(dbc.ModalTitle(id="modal-title")),
+                dbc.ModalBody(id='modal-body'),
+                dbc.ModalFooter(
+                    dbc.Button("Fechar", id="close", className="ms-auto", n_clicks=0)
+                ),
+            ],
+            id="modal",
+            is_open=False,
+        ),
+        dcc.Loading(
+            id="upload-loading",
+            type="circle",
+            fullscreen=False,
+            children=[
+                # for article input in .txt
+                dcc.Upload(
+                    id='upload-article',
+                    children=html.Div([
+                        '📄 Arraste ou Selecione um Artigo para Classificar (formato .txt)'
+                    ]),
+                    style={
+                        'width': '100%',
+                        'height': '60px',
+                        'lineHeight': '60px',
+                        'borderWidth': '1px',
+                        'borderStyle': 'dashed',
+                        'borderRadius': '5px',
+                        'textAlign': 'center',
+                        'margin': '10px 0',
+                        'font-family': 'Calibri',
+                        'margin-bottom': '30px'
+                    },
+                    accept='.txt'
+                ),
+                # for article input in url format
+                html.Div([
+                html.Label([
+                    html.Span("Ou Insira um Link ", style={'font-size': '18px', 'font-family': 'Calibri'}),
+                    html.Span("(Apenas para Expresso, Público, Eco Sapo, e Diário de Notícias)", style={'font-size': '12px', 'font-family': 'Calibri'})
+                ]),
+                dcc.Input(
+                    id='url-input',
+                    type='url',
+                    placeholder='https://exemplo.com/artigo-notícias',
+                    style={
+                        'width': '98.3%',
+                        'padding': '10px',
+                        'margin': '10px 0',
+                        'border': '1px solid #ccc',
+                        'borderRadius': '5px',
+                        'font-family': 'Calibri'
+                        }
+                    ),
+                html.Button('Submeter Link', id='submit-url-button', n_clicks=0, style={
+                    'margin': '10px 0',
+                    'font-family': 'Calibri'
+                    }),
+                ]),
+                html.Div(id='upload-feedback', style={'margin': '10px 0'}),
+            ]
+        )
+    ],
+    style={'marginLeft': '27%', 'padding': '20px'}
+    #style={'width': '75%', 'padding': '20px', 'backgroundColor': '#f8f9fa', 'position': 'fixed', 'left': '25%', 'top': '0', 'bottom': '0', 'overflowY': 'auto'}
+    )
+])
+# to update the chart dynamically
+@app.callback(
+    Output("news-plot", "figure", allow_duplicate=True),
+    [Input("chart-type", "value"),
+     Input("x-axis", "value"),
+     Input("y-axis", "value"),
+     Input("z-axis", "value"),
+     Input("x-filter", "value"),
+     Input("y-filter", "value"),
+     Input("z-filter", "value"),
+     Input("source-filter", "value"),
+     Input("group-toggle", "value")],
+     State("data-store", "data"),
+     prevent_initial_call=True
+)
+def update_chart(chart_type, x_axis, y_axis, z_axis, x_range, y_range, z_range, selected_sources, group_toggle, data_records):
+    data = pd.DataFrame(data_records)
+    axis_extremities ={
+        "Viés Político" : ["Enviesado à Esquerda", "Enviesado à Direita", -100, 100],
+        "Fiabilidade" : ["Não fiável", "Fiável", -100, 100],
+        "Objetividade" : ["Baseado em Opinião", "Factual/Objetivo", -100, 100],
+        "Legibilidade" : ["Difícil de ler", "Fácil de ler", 0, 100]
+    }
+    all_sources = data["Fonte"].unique().tolist()
+    if set(selected_sources) == set(all_sources) or not selected_sources:
+        filtered_data = data
+    else:
+        filtered_data = data[data["Fonte"].isin(selected_sources)]
+    if group_toggle:
+        filtered_data_1 = filtered_data[filtered_data["Fonte"] != "Utilizador"].groupby("Fonte").mean().reset_index()
+        filtered_data_2 = filtered_data[filtered_data["Fonte"] == "Utilizador"][["Fonte", "Viés Político", "Fiabilidade", "Objetividade", "Legibilidade"]]
+        filtered_data = pd.concat([filtered_data_1, filtered_data_2], ignore_index=True)
+        filtered_data["Título"] = filtered_data["Fonte"]
+    # Apply range filters for each axis
+    filtered_data = filtered_data[(filtered_data[x_axis] >= x_range[0]) &
+                                 (filtered_data[x_axis] <= x_range[1])]
+    # Only apply Y and Z filters for the relevant chart types
+    if chart_type in ["2D", "3D"]:
+        filtered_data = filtered_data[(filtered_data[y_axis] >= y_range[0]) &
+                                     (filtered_data[y_axis] <= y_range[1])]
+    if chart_type == "3D":
+        filtered_data = filtered_data[(filtered_data[z_axis] >= z_range[0]) &
+                                     (filtered_data[z_axis] <= z_range[1])]
+    if chart_type == "1D":
+        to_hover = ["Fonte"]
+        if not group_toggle:
+            if x_axis != "Legibilidade":
+                to_hover.append(f"Descritor de {x_axis}")
+        # 1D plot
+        filtered_data['Custom Size'] = filtered_data["Fonte"].apply(lambda x: 0 if x == "Utilizador" else 12)
+        fig = px.scatter(filtered_data, x=x_axis, y=[0] * len(filtered_data), #text="Título",
+                         # color="reliability score", # size="size_fixed",
+                         color="Fonte",
+                         color_continuous_scale="Viridis",
+                         title=f"1D Scatter Plot: {x_axis}",
+                         hover_name="Título",
+                         hover_data={col: True for col in to_hover} | {"Custom Size": False},
+                         size='Custom Size',
+                         size_max=10,
+                         opacity=0.8)
+        fig.update_yaxes(visible=False, showticklabels=False)  # Hide Y-axis
+        # fig.update_traces(marker=dict(opacity=0.7, line=dict(width=1, color='black')))
+        if group_toggle:
+            fig.update_traces(marker=dict(opacity=0, size=0)) # to remove the original marker dots
+            for i, row in filtered_data.iterrows():
+                img_path = u.get_source_image(row["Fonte"])
+                img_data = u.get_img_data(img_path)
+                fig.add_layout_image(
+                    dict(
+                        source=f'data:image/png;base64,{img_data}',
+                        x=row[x_axis],
+                        y=0,
+                        xref="x",
+                        yref="y",
+                        sizex=6,
+                        sizey=6,
+                        xanchor="center",
+                        yanchor="middle",
+                        opacity=0.8,
+                    )
+                )
+        else:
+            for _, row in filtered_data.iterrows():
+                if row["Fonte"] == "Utilizador":
+                    img_data = u.get_img_data("logos/x.png")
+                    fig.add_layout_image(
+                        dict(
+                            source=f'data:image/png;base64,{img_data}',
+                            x=row[x_axis],
+                            y=0,
+                            xref="x",
+                            yref="y",
+                            sizex=7,
+                            sizey=7,
+                            xanchor="center",
+                            yanchor="middle"
+                        )
+                    )
+        # X-axis left annotation (outside)
+        fig.add_annotation(
+            x=0.13,   # Far left in paper coords
+            y=0,   # Bottom
+            xref="paper",
+            yref="paper",
+            text=f"⬅️ {axis_extremities[x_axis][0]}",
+            showarrow=False,
+            xanchor="right",
+            yanchor="top",
+            font=dict(size=12),
+            yshift=-25  # Further outside the plot
+        )
+        # X-axis right annotation (outside)
+        fig.add_annotation(
+            x=0.87,   # Far right
+            y=0,
+            xref="paper",
+            yref="paper",
+            text=f"{axis_extremities[x_axis][1]} ➡️",
+            showarrow=False,
+            xanchor="left",
+            yanchor="top",
+            font=dict(size=12),
+            yshift=-25
+        )
+        fig.update_layout(
+            xaxis=dict(
+                range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]]
+            ))
+    elif chart_type == "2D":
+        to_hover = ["Fonte"]
+        if not group_toggle:
+            if x_axis != "Legibilidade":
+                to_hover.append(f"Descritor de {x_axis}")
+            if y_axis != "Legibilidade":
+                to_hover.append(f"Descritor de {y_axis}")
+        # 2D plot
+        filtered_data['Custom Size'] = filtered_data["Fonte"].apply(lambda x: 0 if x == "Utilizador" else 10)
+        fig = px.scatter(filtered_data, x=x_axis, y=y_axis, # text="Título",
+            # color="reliability score", # size="size_fixed",
+            color_continuous_scale="Viridis",
+            color="Fonte",
+            title=f"2D Scatter: {x_axis} vs {y_axis}",
+            hover_name="Título",
+            size='Custom Size',
+            hover_data={col: True for col in to_hover} | {"Custom Size": False},
+            size_max=10,
+            opacity=0.8)
+        if group_toggle:
+            fig.update_traces(marker=dict(opacity=0, size=0)) # to remove the original marker dots
+            for i, row in filtered_data.iterrows():
+                img_path = u.get_source_image(row["Fonte"])
+                img_data = u.get_img_data(img_path)
+                fig.add_layout_image(
+                    dict(
+                        source=f'data:image/png;base64,{img_data}',
+                        x=row[x_axis],
+                        y=row[y_axis],
+                        xref="x",
+                        yref="y",
+                        sizex=10,
+                        sizey=10,
+                        xanchor="center",
+                        yanchor="middle",
+                        opacity=0.8,
+                    )
+                )
+        else:
+            for _, row in filtered_data.iterrows():
+                if row["Fonte"] == "Utilizador":
+                    img_data = u.get_img_data("logos/x.png")
+                    fig.add_layout_image(
+                        dict(
+                            source=f'data:image/png;base64,{img_data}',
+                            x=row[x_axis],
+                            y=row[y_axis],
+                            xref="x",
+                            yref="y",
+                            sizex=7,
+                            sizey=7,
+                            xanchor="center",
+                            yanchor="middle"
+                        )
+                    )
+        # X-axis left annotation (outside)
+        fig.add_annotation(
+            x=0.15,   # Far left in paper coords
+            y=0,   # Bottom
+            xref="paper",
+            yref="paper",
+            text=f"⬅️ {axis_extremities[x_axis][0]}",
+            showarrow=False,
+            xanchor="right",
+            yanchor="top",
+            font=dict(size=12),
+            yshift=-25  # Further outside the plot
+        )
+        # X-axis right annotation (outside)
+        fig.add_annotation(
+            x=0.85,   # Far right
+            y=0,
+            xref="paper",
+            yref="paper",
+            text=f"{axis_extremities[x_axis][1]} ➡️",
+            showarrow=False,
+            xanchor="left",
+            yanchor="top",
+            font=dict(size=12),
+            yshift=-25
+        )
+        # Y-axis bottom annotation (outside)
+        fig.add_annotation(
+            x=0,
+            y=0,   # Bottom
+            xref="paper",
+            yref="paper",
+            text=f"⬅️ {axis_extremities[y_axis][0]}",
+            showarrow=False,
+            xanchor="right",
+            yanchor="bottom",
+            font=dict(size=12),
+            xshift=-40,  # Move left outside
+            textangle=-90
+        )
+        # Y-axis top annotation (outside)
+        fig.add_annotation(
+            x=0,
+            y=1,   # Top
+            xref="paper",
+            yref="paper",
+            text=f"{axis_extremities[y_axis][1]} ➡️",
+            showarrow=False,
+            xanchor="right",
+            yanchor="top",
+            font=dict(size=12),
+            xshift=-40,
+            textangle=-90
+        )
+        fig.update_layout(
+            xaxis=dict(
+                range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]]
+            ),
+            yaxis=dict(
+                range=[axis_extremities[y_axis][2], axis_extremities[y_axis][3]]
+            )
+        )
+    elif chart_type == "3D":
+        to_hover = ["Fonte"]
+        if not group_toggle:
+            if x_axis != "Legibilidade":
+                to_hover.append(f"Descritor de {x_axis}")
+            if y_axis != "Legibilidade":
+                to_hover.append(f"Descritor de {y_axis}")
+            if z_axis != "Legibilidade":
+                to_hover.append(f"Descritor de {z_axis}")
+        user_upload = "Utilizador"
+        highlight_df = filtered_data[filtered_data["Fonte"] == user_upload]
+        other_df = filtered_data[filtered_data["Fonte"] != user_upload]
+        source_list = other_df["Fonte"].unique()
+        color_map = {source: f"hsl({i * 360 / len(source_list)}, 70%, 50%)" for i, source in enumerate(source_list)}
+        color_array = other_df["Fonte"].map(color_map)
+        fig = go.Figure()
+        # 3D plot
+        if group_toggle:
+            fig = px.scatter_3d(other_df, x=x_axis, y=y_axis, z=z_axis,
+                color="Fonte",  # Color points by source
+                color_continuous_scale="Viridis",
+                # symbol="Fonte",
+                title=f"Gráfico de Dispersão 3D: {x_axis} vs {y_axis} vs {z_axis}",
+                hover_name='Title')
+            fig.update_traces(marker=dict(size=10, opacity=0.8, line=dict(width=1, color='black')))
+            highlight_trace = px.scatter_3d(highlight_df, x=x_axis, y=y_axis, z=z_axis,
+                                            color_discrete_sequence=["black"],
+                                            hover_name="Título")
+            highlight_trace.update_traces(
+                marker=dict(size=10),
+                name="Utilizador",
+                showlegend=True)
+            for trace in highlight_trace.data:
+                fig.add_trace(trace)
+        else:
+            fig = px.scatter_3d(other_df, x=x_axis, y=y_axis, z=z_axis,
+                    color="Fonte",
+                    color_continuous_scale="Viridis",
+                    title=f"Gráfico de Dispersão 3D: {x_axis} vs {y_axis} vs {z_axis}",
+                    hover_name="Título",
+                    hover_data=to_hover)
+            fig.update_traces(marker=dict(size=5, opacity=0.8))
+            highlight_trace = px.scatter_3d(highlight_df, x=x_axis, y=y_axis, z=z_axis,
+                                            color_discrete_sequence=["black"],
+                                            hover_name="Título",
+                                            hover_data=to_hover)
+            highlight_trace.update_traces(
+                marker=dict(size=10),
+                name="Utilizador",
+                showlegend=True)
+            for trace in highlight_trace.data:
+                fig.add_trace(trace)
+        fig.update_layout(
+            scene=dict(
+                xaxis=dict(
+                    title=x_axis,
+                    range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]]
+                ),
+                yaxis=dict(
+                    title=y_axis,
+                    range=[axis_extremities[y_axis][2], axis_extremities[y_axis][3]]
+                ),
+                zaxis=dict(
+                    title=z_axis,
+                    range=[axis_extremities[z_axis][2], axis_extremities[z_axis][3]]
+                )
+            )
+        )
+    return fig
+# # To update the sources in the source filter
+# @app.callback(
+#     Output("source-filter", "options"),
+#     Output("source-filter", "value"),
+#     Input("news-data", "data")
+# )
+# def update_source_dropdown(data):
+#     df = pd.DataFrame(data)
+#     unique_sources = df["Fonte"].unique().tolist()
+#     options = [{"label": "Select All", "value": "ALL"}] + [
+#         {"label": src, "value": src} for src in unique_sources
+#     ]
+#     return options, unique_sources
+# To update the source filter
+@app.callback(
+    Output("source-filter", "value", allow_duplicate=True),
+    [Input("source-filter", "value")],
+    State("source-filter", "options"),
+    prevent_initial_call=True
+)
+def update_source_selection(selected_sources, options):
+    all_sources = data["Fonte"].unique().tolist()
+    if "ALL" in selected_sources:
+        # If "Select All" is clicked, return all sources
+        if "Utilizador" in [src["value"] for src in options]:
+            return all_sources + ["Utilizador"]
+        return all_sources
+    else:
+        # else return selected sources normally
+        return selected_sources
+# To updated the graph size based on chart type
+@app.callback(
+    Output("news-plot", "style"),
+    Input("chart-type", "value")
+)
+def update_graph_height(chart_type):
+    if chart_type == "1D":
+        return {"height": "400px"}
+    elif chart_type == "2D":
+        return {"height": "800px"}
+    elif chart_type == "3D":
+        return {"height": "1000px"}
+    return {"height": "800px"}
+# To disable Y-axis and Z-axis dropdowns based on chart type
+@app.callback(
+    [Output("y-axis", "disabled"),
+     Output("z-axis", "disabled")],
+    [Input("chart-type", "value")]
+)
+def update_dropdown_states(chart_type):
+    if chart_type == "1D":
+        return True, True
+    elif chart_type == "2D":
+        return False, True
+    else:
+        return False, False
+# To hide Y-axis and Z-axis dropdowns based on chart type
+@app.callback(
+    [Output("y-axis-container", "style"),
+     Output("z-axis-container", "style")],
+    [Input("chart-type", "value")]
+)
+def update_dropdown_visibility(chart_type):
+    base_style = {'marginBottom': '10px'}
+    disabled_style = {'marginBottom': '10px', 'opacity': '0.5'}
+    if chart_type == "1D":
+        return disabled_style, disabled_style
+    elif chart_type == "2D":
+        return base_style, disabled_style
+    else:
+        return base_style, base_style
+# show/hide filters based on chart type
+@app.callback(
+    [Output("x-filter", "disabled"),
+     Output("y-filter", "disabled"),
+     Output("z-filter", "disabled")],
+    [Input("chart-type", "value"),
+     Input("x-axis", "value"),
+     Input("y-axis", "value"),
+     Input("z-axis", "value")]
+)
+def update_filter_availability(chart_type, x_axis, y_axis, z_axis):
+    x_disabled = False
+    y_disabled = chart_type == "1D"
+    z_disabled = chart_type != "3D"
+    return x_disabled, y_disabled, z_disabled
+# To update the range sliders based on selected axis
+@app.callback(
+    [Output("x-filter", "min"), Output("x-filter", "max"),
+     Output("x-filter", "marks"), Output("x-filter", "value"),
+     Output("y-filter", "min"), Output("y-filter", "max"),
+     Output("y-filter", "marks"), Output("y-filter", "value"),
+     Output("z-filter", "min"), Output("z-filter", "max"),
+     Output("z-filter", "marks"), Output("z-filter", "value")],
+    [Input("x-axis", "value"), Input("y-axis", "value"), Input("z-axis", "value")]
+)
+def update_range_sliders(x_axis, y_axis, z_axis):
+    axis_data = {
+        "Viés Político": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}],
+        "Fiabilidade": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}],
+        "Objetividade": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}],
+        "Legibilidade": [0, 100, {0: "0", 25: "25", 50: "50", 75: "75", 100: "100"}]
+    }
+    x_min, x_max, x_marks = axis_data[x_axis]
+    y_min, y_max, y_marks = axis_data[y_axis]
+    z_min, z_max, z_marks = axis_data[z_axis]
+    return (x_min, x_max, x_marks, [x_min, x_max],
+            y_min, y_max, y_marks, [y_min, y_max],
+            z_min, z_max, z_marks, [z_min, z_max])
+@app.callback(
+    Output('upload-feedback', 'children', allow_duplicate=True),
+    Output("news-plot", "figure", allow_duplicate=True),
+    Output("source-filter", "options", allow_duplicate=True),
+    Output("source-filter", "value", allow_duplicate=True),
+    Output("data-store", "data", allow_duplicate=True),
+    Input('upload-article', 'contents'),
+    State('upload-article', 'filename'),
+    State("source-filter", "options"),
+    State("source-filter", "value"),
+    State("data-store", "data"),
+    prevent_initial_call=True
+)
+def classify_article(contents, filename, options, selected_sources, data_records):
+    data = pd.DataFrame(data_records)
+    if contents is None:
+        raise PreventUpdate
+    # Decode .txt content
+    content_type, content_string = contents.split(',')
+    decoded = base64.b64decode(content_string).decode('utf-8')
+    openai = OpenAI(
+        api_key=os.environ.get("API_KEY"),
+        base_url="https://api.deepinfra.com/v1/openai",
+    )
+    try:
+        # political_bias_db, reliability_db, objectivity_db = u.setup_db()
+        descriptor_political_bias = u.get_descriptor(decoded, "political_bias", u.prompts, openai)
+        descriptor_reliability = u.get_descriptor(decoded, "reliability", u.prompts, openai)
+        descriptor_objectivity = u.get_descriptor(decoded, "objectivity", u.prompts, openai)
+        political_bias_score = u.get_score(descriptor_political_bias, embedding_model, 0.5, political_bias_db, 50)
+        reliability_score = u.get_score(descriptor_reliability, embedding_model, 0.2, reliability_db, 7505)
+        objectivity_score = u.get_score(descriptor_objectivity, embedding_model, 0.2, objectivity_db, 9000)
+        reliability_score = u.classify_readability(decoded)
+        new_point = {
+            "Título": filename,
+            "Texto": decoded,
+            "Fonte": "Utilizador",
+            "Descritor de Viés Político": descriptor_political_bias,
+            "Descritor de Fiabilidade": descriptor_reliability,
+            "Descritor de Objetividade": descriptor_objectivity,
+            "Viés Político": political_bias_score,
+            "Fiabilidade": reliability_score,
+            "Objetividade": objectivity_score,
+            "Legibilidade": reliability_score
+        }
+        data = pd.concat([pd.DataFrame(data), pd.DataFrame([new_point])], ignore_index=True)
+        unique_sources = selected_sources + [new_point["Fonte"]]
+        options = options + [{"label": new_point["Fonte"], "value": new_point["Fonte"]}]
+        return f"✅ Classificado e adicionado '{filename}'", dash.no_update, options, unique_sources, data.to_dict("records")
+    except Exception as e:
+        return f"❌ Erro a classificar o artigo: {e}", dash.no_update, dash.no_update, dash.no_update, dash.no_update
+@app.callback(
+    Output('upload-feedback', 'children'),
+    Output("news-plot", "figure"),
+    Output("source-filter", "options"),
+    Output("source-filter", "value"),
+    Output("data-store", "data"),
+    Input('submit-url-button', 'n_clicks'),
+    State('url-input', 'value'),
+    State("source-filter", "options"),
+    State("source-filter", "value"),
+    State("data-store", "data")
+)
+def classify_url(n_clicks, url, options, selected_sources, data_records):
+    data = pd.DataFrame(data_records)
+    if n_clicks > 0 and url:
+        res = requests.get(url)
+        soup = bs4.BeautifulSoup(res.text, 'lxml')
+        if "expresso.pt" in url:
+            outlet = "Expresso"
+            title = soup.select('h1')[0].text
+            text = soup.select('.full-article-fragment.full-article-body.article-content.first')[0].getText()
+        elif "publico.pt" in url:
+            outlet = "Público"
+            title = soup.select('.headline.story__headline')[0].getText()
+            match = re.search(r'\s*(.*?)\s*$', title.strip())
+            if match:
+                title = match.group(1)
+            text = soup.select('.story__body')[0].getText()
+        elif "eco.sapo.pt" in url:
+            outlet = "Eco Sapo"
+            title = soup.select('.title')[0].get_text()
+            title = re.sub(r'\s+', ' ', title).strip()
+            text = soup.select('.entry__content')[0].get_text()
+        elif "dn.pt" in url:
+            outlet = "Diário de Notícias"
+            title = soup.select('.arrow-component.arr--story-headline.story-headline-m_wrapper__1Wey6')[0].getText()
+            text = soup.select('.arr--story-page-card-wrapper')[0].getText()
+        openai = OpenAI(
+            api_key=os.environ.get("API_KEY"),
+            base_url="https://api.deepinfra.com/v1/openai",
+        )
+        try:
+            # political_bias_db, reliability_db, objectivity_db = u.setup_db()
+            descriptor_political_bias = u.get_descriptor(text, "political_bias", u.prompts, openai)
+            descriptor_reliability = u.get_descriptor(text, "reliability", u.prompts, openai)
+            descriptor_objectivity = u.get_descriptor(text, "objectivity", u.prompts, openai)
+            political_bias_score = u.get_score(descriptor_political_bias, embedding_model, 0.5, political_bias_db, 50)
+            reliability_score = u.get_score(descriptor_reliability, embedding_model, 0.2, reliability_db, 7505)
+            objectivity_score = u.get_score(descriptor_objectivity, embedding_model, 0.2, objectivity_db, 9000)
+            reliability_score = u.classify_readability(text)
+            new_point = {
+                "Título": f"{outlet}: {title}",
+                "Text": text,
+                "Fonte": "Utilizador",
+                "Descritor de Viés Político": descriptor_political_bias,
+                "Descritor de Fiabilidade": descriptor_reliability,
+                "Descritor de Objetividade": descriptor_objectivity,
+                "Viés Político": political_bias_score,
+                "Fiabilidade": reliability_score,
+                "Objetividade": objectivity_score,
+                "Legibilidade": reliability_score
+            }
+            data = pd.concat([pd.DataFrame(data), pd.DataFrame([new_point])], ignore_index=True)
+            unique_sources = selected_sources + [new_point["Fonte"]]
+            options = options + [{"label": new_point["Fonte"], "value": new_point["Fonte"]}]
+            return f"✅ O artigo pedido foi classificao e adicionado.", dash.no_update, options, unique_sources, data.to_dict("records")
+        except Exception as e:
+            return f"❌ Erro a classificar o artigo: {e}", dash.no_update, dash.no_update, dash.no_update, dash.no_update
+    else:
+        raise PreventUpdate
+# to open window with article text
+@app.callback(
+    Output("modal", "is_open"),
+    Output("modal-body", "children"),
+    Output("modal-title", "children"),
+    Input("news-plot", "clickData"),
+    Input("close", "n_clicks"),
+    State("modal", "is_open"),
+    State("data-store", "data")
+)
+def display_modal(clickData, close_clicks, is_open, data_records):
+    data = pd.DataFrame(data_records)
+    ctx = dash.callback_context
+    if ctx.triggered_id == "news-plot" and clickData:
+        title = clickData['points'][0]['hovertext']
+        data_index = data[data["Título"] == title].index
+        news_text = data.loc[data_index, 'Texto']
+        return True, news_text, title
+    elif ctx.triggered_id == "close" and is_open:
+        return False, None, None
+    return is_open, None, None
+# to open informational window
+@app.callback(
+    Output("modal-info", "is_open"),
+    [Input("open-info", "n_clicks"), Input("close-info", "n_clicks")],
+    [State("modal-info", "is_open")],
+)
+def toggle_modal(n1, n2, is_open):
+    if n1 or n2:
+        return not is_open
+    return is_open
+if __name__ == '__main__':
+    app.run(host="0.0.0.0", port=7860)

demo_data.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:14de21cb200a398ff83263e34fa24006684e51a5e832c44721f962f976d6f790
+size 534135

logos/bombeiros.png ADDED Viewed

Git LFS Details

SHA256: 22a4d35fbe283c65f1eb800a95d443f5c69a27c0bb526bffe0dd719d75af0790
Pointer size: 131 Bytes
Size of remote file: 163 kB

logos/cm.jpg ADDED Viewed

logos/default.png ADDED Viewed

Git LFS Details

SHA256: a77ab8f3f6073fcc1185d3cb1e4cec41996a1155b96ac498ea8814eea6c9bba8
Pointer size: 129 Bytes
Size of remote file: 2.88 kB

logos/direita.png ADDED Viewed

Git LFS Details

SHA256: dede9e58850a69554b268414cc10c0f00e34a88e794194d8de6d367a27e686a6
Pointer size: 130 Bytes
Size of remote file: 74.5 kB

logos/dn.png ADDED Viewed

Git LFS Details

SHA256: 82c3fbead3a5c942743542b2599d633ae96efa3151eba5798fee9e80f66d0477
Pointer size: 131 Bytes
Size of remote file: 132 kB

logos/expresso.png ADDED Viewed

Git LFS Details

SHA256: b249594569d58127538b0ad923fe0d38b675e4192cfc83f30387037e47b40a03
Pointer size: 130 Bytes
Size of remote file: 11.1 kB

logos/extra.png ADDED Viewed

Git LFS Details

SHA256: 5676502e58388e95f050c216dfd9c96bec546b806ba5b70d264c7ea8cff6c300
Pointer size: 130 Bytes
Size of remote file: 16.1 kB

logos/jn.png ADDED Viewed

Git LFS Details

SHA256: d9251da217b7974cd08fc7ded1c6613d57bd35d3460b62ea4ca1332f7accb58e
Pointer size: 129 Bytes
Size of remote file: 8.09 kB

logos/lusa.jpg ADDED Viewed

logos/magazine.png ADDED Viewed

Git LFS Details

SHA256: 624af680e5715807a8a7544b1e5c26b7a0e8aacf2fb592128827258b34838f9c
Pointer size: 129 Bytes
Size of remote file: 1.84 kB

logos/negocios.png ADDED Viewed

Git LFS Details

SHA256: 7d818b07dbb3dd1560acbce3ac7fb4cb7c939ed035872b3fb7b208aaae491b58
Pointer size: 129 Bytes
Size of remote file: 4.84 kB

logos/publico.png ADDED Viewed

Git LFS Details

SHA256: 72236da1aad33e35035e615338c2e4c08779f00a2c12bd9fbd610ce0f62c4f62
Pointer size: 130 Bytes
Size of remote file: 49.8 kB

logos/sic.png ADDED Viewed

Git LFS Details

SHA256: a069718023a6584c6646df478e696830648f8db45d8681745d00557513ee5a83
Pointer size: 129 Bytes
Size of remote file: 5.65 kB

logos/tsf.png ADDED Viewed

Git LFS Details

SHA256: cf0bc71b97b008168535ad7e7e26c9a15e0ebdaf878c607738e231e9dc54df3d
Pointer size: 131 Bytes
Size of remote file: 103 kB

logos/tugapress.png ADDED Viewed

Git LFS Details

SHA256: 3e8882a41507b204a11e37be46f5e5920a02b6146e7f254f0e376d9f96a2f0ea
Pointer size: 130 Bytes
Size of remote file: 35.1 kB

logos/x.png ADDED Viewed

Git LFS Details

SHA256: 29f843226f3f8f09846ecefbdb251b2b4a1801c3ded7f4bab8afab547a922f44
Pointer size: 131 Bytes
Size of remote file: 216 kB

objectivity_db.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c97962c1d7d6ba4fd74e6fc4f1e5e64aeef91f8afed600f0711d934ce315f008
+size 155334033

political_db.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0af893206be882deb5064dd06dedeca80d4a2667c036d4006b269af3b2eaafe6
+size 78119550

reliability_db.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8b1f2e8a361aa57a684fc757724a8f79b9667d851297f3075a357632b6b8fa85
+size 125966914

requirements.txt ADDED Viewed

	@@ -0,0 +1,25 @@

+dash==3.0.4
+dash-daq==0.6.0
+pandas==1.5.3
+plotly==6.0.1
+openai==1.65.5
+requests==2.32.3
+requests-oauthlib==2.0.0
+requests-toolbelt==1.0.0
+beautifulsoup4==4.12.3
+regex==2024.9.11
+dataclasses-json==0.6.5
+jsonpatch==1.33
+jsonpointer==3.0.0
+orjson==3.10.15
+numpy==1.26.4
+langchain-community==0.3.12
+langchain==0.3.18
+langchain-core==0.3.34
+langchain-huggingface==0.1.2
+langchain-text-splitters==0.3.6
+gunicorn
+lxml==5.2.1
+sentence_transformers==3.4.1
+chromadb==0.6.3
+dash-bootstrap-components==2.0.3

scores_ari.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:073be0035a1ef26cced046a59d4689032630a9d8d17a906f84450546f756ad73
+size 48104

scores_fk.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffd40c12d0d88792d747ad2392ffa0ff92f354dc20bae787af87b17ac2b9a619
+size 48104

scores_fre.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1f036c041a38492f6d8b2cbff869f58ac8c695f4c7de6e641f131669c783b8dc
+size 48104

scores_g.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7b9940d479497f47666caed5e9db6f5943208a706a3e60354931319fa7ba0df4
+size 48128

utils.py ADDED Viewed

	@@ -0,0 +1,248 @@

+import base64
+import json
+import pandas as pd
+import re
+import numpy as np
+import ast
+def get_img_data(img_path):
+    with open(img_path, 'rb') as f:
+        img_data = f.read()
+    return base64.b64encode(img_data).decode('ascii')
+def get_source_image(source):
+    image_map = {
+        "Jornal de Notícias": "./logos/jn.png",
+        "CM Jornal": "./logos/cm.jpg",
+        "Jornal de Negócios": "./logos/negocios.png",
+        "Diário de Notícias": "./logos/dn.png",
+        "Expresso": "./logos/expresso.png",
+        "SIC Notícias": "./logos/sic.png",
+        "Público": "./logos/publico.png",
+        "TSF": "./logos/tsf.png",
+        "Lusa": "./logos/lusa.jpg",
+        "Direita Política": "./logos/direita.png",
+        "Magazine Lusa": "./logos/magazine.png",
+        "Tuga Press": "./logos/tugapress.png",
+        "Semanário Extra": "./logos/extra.png",
+        "Bombeiros24": "./logos/bombeiros.png",
+        "User Upload": "./logos/x.png",
+    }
+    return image_map.get(source, "./logos/default.png")
+def process_json(output):
+    json_object = json.loads(re.findall(r'\{.*?\}', output)[0])
+    return json_object
+prompts = { "political_bias" : {
+                "system" : ("És especialista em política Portuguesa. Vais interpretar texto e identificar enviesamento político. "
+                      "Em Portugal, o Partido Socialista (PS), Bloco de Esquerda (BE), Livre, e Partido Comunista Português (PCP) são os partidos de esquerda. "
+                      "O Partido Social Democrata (PSD), Iniciativa Liberal (IL), CDS, e Chega são os partidos de direita."),
+                "user" : ('Um indicador de viés político é aqui definido como um rótulo descritivo que represente a presença de enviesamento político num texto.\n '
+                    'O que vais detetar é especificamente enviesamento político. \n\n'
+                    'A saída deve ser sempre um JSON com o seguinte formato:\n'
+                    '{"indicador": "descrição do viés identificado"}'
+                    'Exemplos:\n'
+                    '{"indicador": "Refere-se à imigração como algo negativo para a sociedade."}\n'
+                    '{"indicador": "Apoia a intervenção do governo na economida e sociedade."}\n'
+                    '{"indicador": "Menciona as vantagens de impostos baixos para o crescimento económico."}\n'
+                    '{"indicador": "Critica o capitalismo."}\n'
+                    '{"indicador": "Desvaloriza o estado social e valoriza o mérito individual."}\n'
+                    '{"indicador": "Critica negativamente a posição de certos partidos de direita."}\n\n'
+                    'Extrai **apenas um novo indicador** de viés político do seguinte texto e formata a saída **exatamente como nos exemplos**:\n\n')
+            },
+            "reliability" : {
+                "system" : ('Tu és um assistente especializado em avaliar a fiabilidade de artigos com base na sua linguagem, estrutura e transparência. '
+                    'A tua tarefa é identificar indicadores de fiabilidade e apresentar a resposta num formato JSON bem definido.\n'
+                    'Diretrizes:\n'
+                    '- Considera **apenas elementos formais** do texto (linguagem, tom, estrutura, uso de fontes). **Não avalies a veracidade do conteúdo.**\n'
+                    '- Identifica um único indicador por vez.\n'
+                    '- A resposta deve ter o seguinte formato: {"indicador": "descrição do indicador identificado"}\n'
+                    "Quando receberes um artigo, analisa a sua forma e extrai apenas um indicador do nível de fiabilidade, seguindo rigorosamente o formato especificado."),
+                "user" : ('Um indicador de fiabilidade é aqui definido como um rótulo descritivo que represente a presença de elementos num artigo que podem indicar maior ou menor credibilidade.\n'
+                    'O que vais detetar são especificamente sinais linguísticos, estruturais ou estilísticos que afetam credibilidade percebida do artigo.\n'
+                    'A saída deve ser sempre um JSON com o seguinte formato:\n'
+                    '{"indicador": "descrição do indicador identificado"}\n'
+                    'Exemplos:\n'
+                    '{"indicador": "Apresenta fontes verificáveis para as informações mencionadas."}\n'
+                    '{"indicador": "Utiliza linguagem sensacionalista para atrair atenção."}\n'
+                    '{"indicador": "Utiliza uma linguagem neutra e objetiva."}\n'
+                    '{"indicador": "Faz afirmações fortes sem citar fontes verificáveis."}\n'
+                    '{"indicador": "Evita exageros ou distorções ao apresentar os fatos."}\n'
+                    '{"indicador": "Apresenta erros gramaticais e ortográficos."}\n'
+                    '{"indicador": "O texto está bem estruturado e sem erros gramaticais."}\n'
+                    '{"indicador": "Uso excessivo de linguagem emocional e adjetivos carregados."}\n\n'
+                    'Extrai **apenas um novo indicador** do nível de fiabilidade do seguinte texto e formata a saída **exatamente como nos exemplos**:\n')
+            },
+            "objectivity" : {
+                "system" : ('Tu és um assistente especializado em avaliar objetividade de artigos com base na sua linguagem, estrutura e transparência. '
+                    'A tua tarefa é identificar indicadores de objetividade/subjetividade e apresentar a resposta num formato JSON bem definido.\n'
+                    'Diretrizes:\n'
+                    '- Considera **apenas elementos formais** do texto (linguagem, terminologia, tom, estrutura). **Não avalies a veracidade do conteúdo.**\n'
+                    '- Identifica um único indicador por vez.\n'
+                    '- A resposta deve ter o seguinte formato: {"indicador": "descrição do indicador identificado"}\n'
+                    "Quando receberes um artigo, analisa a sua forma e extrai apenas um indicador de objetividade/subjetividade, seguindo rigorosamente o formato especificado."),
+                "user" : ('Um indicador de objetividade é aqui definido como um rótulo descritivo que represente a presença de elementos num artigo que contribuem para a sua maior ou menor imparcialidade e rigor.\n'
+                    'O que vais detetar são especificamente sinais linguísticos, estruturais ou estilísticos que afetam a objetividade percebida do artigo.\n'
+                    'A saída deve ser sempre um JSON com o seguinte formato:\n'
+                    '{"indicador": "descrição do indicador identificado"}\n'
+                    'Exemplos:\n'
+                    '{"indicador": "Apresenta dados concretos e verificáveis para fundamentar as informações."}\n'
+                    '{"indicador": "Utiliza linguagem opinativa, expressando juízos de valor."}\n'
+                    '{"indicador": "Evita linguagem emocional ou adjetivos subjetivos."}\n'
+                    '{"indicador": "Revela preferência explícita por um ponto de vista sem apresentar contrapontos."}\n'
+                    '{"indicador": "Utiliza um tom neutro e descritivo, sem expressar opinião."}\n'
+                    '{"indicador": "Inclui suposições ou generalizações sem suporte em dados verificáveis."}\n'
+                    '{"indicador": "Inclui referências a fontes credíveis e verificáveis."}\n'
+                    '{"indicador": "Apresenta argumentos persuasivos em vez de informações neutras."}\n\n'
+                    'Extrai **apenas um novo indicador** de objetividade do seguinte texto e formata a saída **exatamente como nos exemplos**:\n')
+            },
+            "sensationalism" : {
+                "system" : ('Gere exatamente cinco títulos jornalísticos para a notícia fornecida, com níveis crescentes de clickbait.\n'
+                       'Segue estas diretrizes rigorosamente:\n'
+                       '- O primeiro título deve ser puramente factual, sem qualquer clickbait.\n'
+                       '- O terceiro título deve ter um leve grau de clickbait, mas ainda parecer um título convencional.\n'
+                       '- O quinto título deve ter um nível claramente elevado de clickbait, mas sem exageros irreais ou sensacionalismo extremo. Deve continuar adequado a um jornal minimamente credível.\n'
+                       '- Os títulos devem ser usáveis em meios jornalísticos reais e manter a coerência com o conteúdo da notícia.\n'
+                       '- Retorna a resposta exclusivamente no seguinte formato JSON, sem qualquer outro texto adicional:\n'
+                       '[{"1" : "título gerado"}, {"2" : "título gerado"}, {"3" : "título gerado"}, {"4" : "título gerado"}, {"5" : "título gerado"}]'),
+                "user" : ('Analisa a notícia abaixo e gera exatamente 5 títulos, cada um com nível crescente de clickbait. '
+                    'Como referência, o primeiro título deve ser factual e sem qualquer clickbait, o terceiro deve ter ligeiro clickbait e o '
+                    'quinto seria o único título com nível claramente elevado de clickbait. Ainda assim, todos os títulos devem ser passíveis '
+                    'de serem usados como título de uma notícia num jornal minimamente credível, por isso nenhum dos títulos deve ter um nível '
+                    'de clickbait quase satírico de tão exagerado e irrealista que é.\n'
+                    'A saída deve ser **só** um JSON com apenas os 5 títulos gerados usando o **exatamente** seguinte formato:\n'
+                    '[{"1" : "título gerado"}, {"2" : "título gerado"}, {"3" : "título gerado"}, {"4" : "título gerado"}, {"5" : "título gerado"}].\n\n')
+            }
+        }
+def process_descriptor_json(descriptor):
+    json_object = json.loads(re.findall(r'\{.*?\}', descriptor)[0])
+    return json_object["indicador"]
+def cosine_similarity(vec1, vec2):
+    vec1 = np.array(vec1)
+    vec2 = np.array(vec2)
+    dot_product = np.dot(vec1, vec2)
+    norm1 = np.linalg.norm(vec1)
+    norm2 = np.linalg.norm(vec2)
+    if norm1 == 0 or norm2 == 0:
+        return 0.0
+    return dot_product / (norm1 * norm2)
+def get_score(descriptor, model, threshold, db_df, k):
+    # embedding descriptor
+    descriptor_embedding = model.embed_query(descriptor)
+    db_df["similarity_scores"] = db_df["embeddings"].apply(lambda x:cosine_similarity(x, descriptor_embedding))
+    # filtering out indicators with similarity lower than threshold
+    db_df = db_df[db_df["similarity_scores"] >= threshold].sort_values(by="similarity_scores", ascending=False).head(k)
+    if len(db_df) == 0:
+        return 0
+    count_unbiased = (db_df["score"] == 0).sum()
+    if count_unbiased >= 3:
+        return 0
+    # Axis score
+    db_df["scores"] = db_df["score"] * db_df["similarity_scores"]
+    return db_df["scores"].sum() / db_df["similarity_scores"].sum()
+# for political_bias, objectivity, and reliability
+def get_descriptor(article, bias_axis, prompts, openai):
+    system_prompt = prompts[bias_axis]["system"]
+    user_prompt = prompts[bias_axis]["user"]
+    user_prompt_complete = user_prompt + article
+    chat_completion = openai.chat.completions.create(
+    model="meta-llama/Llama-3.3-70B-Instruct",
+    messages=[
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": user_prompt_complete},
+    ],
+    temperature=0
+    )
+    descriptor = chat_completion.choices[0].message.content
+    try:
+        processed_descriptor = process_descriptor_json(descriptor)
+    except:
+        processed_descriptor = descriptor
+    return processed_descriptor
+def setup_db():
+    political_bias_db = pd.read_csv('political_db.csv', converters={"embeddings": lambda x: np.array(ast.literal_eval(x))})
+    reliability_db = pd.read_csv('reliability_db.csv', converters={"embeddings": lambda x: np.array(ast.literal_eval(x))})
+    objectivity_db = pd.read_csv('objectivity_db.csv', converters={"embeddings": lambda x: np.array(ast.literal_eval(x))})
+    return political_bias_db, reliability_db, objectivity_db
+# Readability
+def count_words(text):
+    words = text.split()
+    return len(words)
+def count_syllables(word):
+    vogal = ['a', 'ã', 'â', 'á', 'à', 'e', 'é', 'ê', 'i', 'í', 'o', 'ô', 'õ', 'ó', 'ò', 'u', 'ú']
+    ditongo = ['ae', 'ãe', 'ai', 'ao', 'ão', 'au', 'ea', 'ei', 'eo', 'eu', 'éu', 'ia', 'ie', 'io', 'iu', 'õe', 'oi', 'ói', 'ou', 'ua', 'ue', 'uê', 'ui', 'uo']
+    tritongo = ['uai', 'uei', 'uão', 'uõe', 'uiu', 'uou']
+    count = 0
+    for i in range(len(word)):
+        if word[i].lower() in vogal:
+            count += 1
+            if i > 1 and word[i-2:i+1].lower() in tritongo:
+                count -= 2
+            elif i > 0 and word[i-1:i+1].lower() in ditongo:
+                count -= 1
+    return count
+def count_sentences(text):
+    return text.count('.') + text.count('!') + text.count('?') - 3*text.count('...')
+def percentile_of_number(num, lst, inverted=False):
+    count = sum(1 for i in lst if i < num)
+    percentile = (count / len(lst)) * 100
+    if inverted:
+        percentile = 100 - percentile
+    return percentile
+def classify_readability(article):
+    scores_fre = np.load("./scores_fre.npy")
+    scores_ari = np.load("./scores_ari.npy")
+    scores_fk = np.load("./scores_fk.npy")
+    scores_g = np.load("./scores_g.npy")
+    words = count_words(article)
+    syllables = count_syllables(article)
+    sentences = count_sentences(article)
+    characters = len(article)
+    # The higher, the more readable
+    flesch_reading_ease =  227 - 1.04 * (words / sentences) - 72 * (syllables / words)
+    gulpease = 89 + 300 * (sentences/words) - 10 * (characters / words)
+    # The lower, the more readable
+    automated_readability_index = 0.44 * (words/sentences) + 4.6 * (characters / words) - 20
+    flesch_kincaid = 0.36 * (words / sentences) + 10.4 * (syllables / words) - 18
+    p_fre = percentile_of_number(flesch_reading_ease, scores_fre)
+    p_ari = percentile_of_number(automated_readability_index, scores_ari, inverted=True)
+    p_fk = percentile_of_number(flesch_kincaid, scores_fk, inverted=True)
+    p_g = percentile_of_number(gulpease, scores_g)
+    return np.mean([p_fre, p_ari, p_fk, p_g])