joao-oak's picture
added support for more .txt encodings
c2f95ec
import dash
from dash import dcc, html, Input, Output, State
import dash_bootstrap_components as dbc
import plotly.express as px
import pandas as pd
import dash_daq as daq
import plotly.graph_objects as go
import base64
from openai import OpenAI
from dash.exceptions import PreventUpdate
import os
# os.environ["TRANSFORMERS_CACHE"] = "/app/models"
# os.environ["HF_HOME"] = "/app/models"
import utils as u
import requests
import bs4
import re
from langchain_huggingface import HuggingFaceEmbeddings
info_text = """
### Funcionalidades:
- A secção das definições permite-lhe escolher o tipo gráfico de dispersão: 1D, 2D, ou 3D.
- A ferramenta classifica artigos consoante quatro eixos: viés político, fiabilidade, objetividade e legibilidade.
- Todos os eixos podem ser escolhidos na secção das definições, bem como o seu intervalo de valores.
- Um filtro de fontes está também disponível para melhor análise.
- Na última parte da secção, um botão permite agrupar os dados e calcular a média por fonte, em vez dos dados de nível de artigo.
- O gráfico em si oferece duas funções interativas:
- Dados específicos da notícia são mostrados com a passagem do rato por cima do respetivo ponto.
- Ao clicar num ponto, o corpo do artigo é exibido numa janela pop-up.
- Por baixo do gráfico, um artigo pode ser adicionado para classificação em tempo real através de um ficheiro .txt ou upload de URL. Certifique-se de que qualquer artigo de uma URL é de uma fontes indicadas e não está bloqueado a subscrição.
### Classificação:
- A classificação de cada eixo é realizada usando um modelo de linguagem de grande escala (LLM).
- O modelo gera um descritor para cada artigo e para cada eixo.
- Exemplos de descritores podem ser encontrados nos dados de passagem dos pontos já presentes no gráfico.
- O descritor gerado para cada eixo é então comparado com uma base de dados de descritores obtidos a partir de um conjunto de dados etiquetados.
- Com base na similaridade com os descritores na base de dados, o valor do eixo é calculado para cada eixo de forma independente.
- Para o eixo de legibilidade, o valor é obtido usando métricas de legibilidade estabelecidas e mapeando-as para a escala do eixo.
"""
# Dash app
app = dash.Dash(__name__, title='Media Bias Chart', external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
data = pd.read_csv("./demo_data.csv")
data = data[["Título", "Texto", "Fonte", "Descritor de Viés Político", "Descritor de Fiabilidade", "Descritor de Objetividade", "Viés Político", "Fiabilidade", "Objetividade", "Legibilidade"]]
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
political_bias_db, reliability_db, objectivity_db = u.setup_db()
app.layout = html.Div([
dcc.Store(id="data-store", data=data.to_dict("records"), storage_type="session"),
html.Div([
# dcc.Store(id='news-data', data=data.to_dict('records')),
html.H2("Definições", style={'textAlign': 'center'}),
# Dropdown for selecting chart type
html.Label("Selecione o tipo de gráfico:"),
dcc.Dropdown(
id="chart-type",
options=[
{"label": "Gráfico de Dispersão 1D", "value": "1D"},
{"label": "Gráfico de Dispersão 2D", "value": "2D"},
{"label": "Gráfico de Dispersão 3D", "value": "3D"}
],
value="2D",
clearable=False,
style={'marginBottom': '40px'}
),
# Dropdowns for selecting axes
html.Div([
html.Label("Selecione o eixo X:"),
dcc.Dropdown(
id="x-axis",
options=[{"label": col, "value": col} for col in data.columns[6:]],
value="Viés Político",
clearable=False,
style={'marginBottom': '10px'},
),
dcc.RangeSlider(
id="x-filter",
min=-100,
max=100,
step=5,
marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'},
value=[-100, 100],
allowCross=False,
className="range-slider"
)
], id="x-axis-container", style={'marginBottom': '20px'}),
html.Div([
html.Label("Selecione o eixo Y:"),
dcc.Dropdown(
id="y-axis",
options=[{"label": col, "value": col} for col in data.columns[6:]],
value="Fiabilidade",
clearable=False,
style={'marginBottom': '10px'},
),
dcc.RangeSlider(
id="y-filter",
min=-100,
max=100,
step=5,
marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'},
value=[-100, 100],
allowCross=False,
className="range-slider"
)
], id="y-axis-container", style={'marginBottom': '20px'}),
html.Div([
html.Label("Selecione o eixo Z:"),
dcc.Dropdown(
id="z-axis",
options=[{"label": col, "value": col} for col in data.columns[6:]],
value="Objetividade",
clearable=False,
style={'marginBottom': '10px'},
),
dcc.RangeSlider(
id="z-filter",
min=-100,
max=100,
step=5,
marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'},
value=[-100, 100],
allowCross=False,
className="range-slider"
)
], id="z-axis-container", style={'marginBottom': '30px'}),
# Checkbox filter for sources
html.Label("Filtrar por Fonte:"),
dcc.Dropdown(
id="source-filter",
options=([{"label": "Selecionar Todos", "value": "ALL"}] +
[{"label": src, "value": src} for src in data["Fonte"].unique()]),
value=data["Fonte"].unique().tolist(),
multi=True,
placeholder="Selectione a fonte...",
style={'font-family' : 'Arial', 'marginBottom': '20px'}
),
# Toggle button to group by source
daq.ToggleSwitch(
id="group-toggle",
label="Agrupar por fonte (Média)",
size=60
),
],
style={
'width': '25%', 'padding': '20px', 'backgroundColor': '#f8f9fa',
'position': 'fixed', 'height': '100vh', 'overflowY': 'auto', 'font-family': 'Calibri'
}
),
html.Div([
html.Div(
dbc.Button("Informação", id="open-info", size="lg", n_clicks=0,
style={
"backgroundColor": "#E5ECF6",
"color": "black",
"border": "none",
# "font-weight": "bold"
}),
className="d-flex justify-content-end"
),
dbc.Modal(
[
# dbc.ModalHeader(dbc.ModalTitle("How to Use This App")),
dbc.ModalBody(dcc.Markdown(info_text)),
dbc.ModalFooter(
dbc.Button("Fechar", id="close-info", className="ms-auto", n_clicks=0)
),
],
id="modal-info",
is_open=False,
),
# Graph
html.H1("Classificação dos Media Portugueses", style={'textAlign': 'center', 'font-family': 'Calibri', 'font-weight': 'bold'}),
dcc.Graph(id="news-plot", style={'height': '800px'}),
dbc.Modal(
[
dbc.ModalHeader(dbc.ModalTitle(id="modal-title")),
dbc.ModalBody(id='modal-body'),
dbc.ModalFooter(
dbc.Button("Fechar", id="close", className="ms-auto", n_clicks=0)
),
],
id="modal",
is_open=False,
),
dcc.Loading(
id="upload-loading",
type="circle",
fullscreen=False,
children=[
# for article input in .txt
dcc.Upload(
id='upload-article',
children=html.Div([
'📄 Arraste ou Selecione um Artigo para Classificar (formato .txt)'
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px 0',
'font-family': 'Calibri',
'margin-bottom': '30px'
},
accept='.txt'
),
# for article input in url format
html.Div([
html.Label([
html.Span("Ou Insira um Link ", style={'font-size': '18px', 'font-family': 'Calibri'}),
html.Span("(Apenas para Expresso, Público, Eco Sapo, e Diário de Notícias)", style={'font-size': '12px', 'font-family': 'Calibri'})
]),
dcc.Input(
id='url-input',
type='url',
placeholder='https://exemplo.com/artigo-notícias',
style={
'width': '98.3%',
'padding': '10px',
'margin': '10px 0',
'border': '1px solid #ccc',
'borderRadius': '5px',
'font-family': 'Calibri'
}
),
html.Button('Submeter Link', id='submit-url-button', n_clicks=0, style={
'margin': '10px 0',
'font-family': 'Calibri'
}),
]),
html.Div(id='upload-feedback', style={'margin': '10px 0'}),
]
)
],
style={'marginLeft': '27%', 'padding': '20px'}
#style={'width': '75%', 'padding': '20px', 'backgroundColor': '#f8f9fa', 'position': 'fixed', 'left': '25%', 'top': '0', 'bottom': '0', 'overflowY': 'auto'}
)
])
# to update the chart dynamically
@app.callback(
Output("news-plot", "figure", allow_duplicate=True),
[Input("chart-type", "value"),
Input("x-axis", "value"),
Input("y-axis", "value"),
Input("z-axis", "value"),
Input("x-filter", "value"),
Input("y-filter", "value"),
Input("z-filter", "value"),
Input("source-filter", "value"),
Input("group-toggle", "value")],
State("data-store", "data"),
prevent_initial_call=True
)
def update_chart(chart_type, x_axis, y_axis, z_axis, x_range, y_range, z_range, selected_sources, group_toggle, data_records):
data = pd.DataFrame(data_records)
axis_extremities ={
"Viés Político" : ["Enviesado à Esquerda", "Enviesado à Direita", -100, 100],
"Fiabilidade" : ["Não fiável", "Fiável", -100, 100],
"Objetividade" : ["Baseado em Opinião", "Factual/Objetivo", -100, 100],
"Legibilidade" : ["Difícil de ler", "Fácil de ler", 0, 100]
}
all_sources = data["Fonte"].unique().tolist()
if set(selected_sources) == set(all_sources) or not selected_sources:
filtered_data = data
else:
filtered_data = data[data["Fonte"].isin(selected_sources)]
if group_toggle:
filtered_data_1 = filtered_data[filtered_data["Fonte"] != "Utilizador"].groupby("Fonte").mean().reset_index()
filtered_data_2 = filtered_data[filtered_data["Fonte"] == "Utilizador"][["Fonte", "Viés Político", "Fiabilidade", "Objetividade", "Legibilidade"]]
filtered_data = pd.concat([filtered_data_1, filtered_data_2], ignore_index=True)
filtered_data["Título"] = filtered_data["Fonte"]
# Apply range filters for each axis
filtered_data = filtered_data[(filtered_data[x_axis] >= x_range[0]) &
(filtered_data[x_axis] <= x_range[1])]
# Only apply Y and Z filters for the relevant chart types
if chart_type in ["2D", "3D"]:
filtered_data = filtered_data[(filtered_data[y_axis] >= y_range[0]) &
(filtered_data[y_axis] <= y_range[1])]
if chart_type == "3D":
filtered_data = filtered_data[(filtered_data[z_axis] >= z_range[0]) &
(filtered_data[z_axis] <= z_range[1])]
if chart_type == "1D":
to_hover = ["Fonte"]
if not group_toggle:
if x_axis != "Legibilidade":
to_hover.append(f"Descritor de {x_axis}")
# 1D plot
filtered_data['Custom Size'] = filtered_data["Fonte"].apply(lambda x: 0 if x == "Utilizador" else 12)
fig = px.scatter(filtered_data, x=x_axis, y=[0] * len(filtered_data), #text="Título",
# color="reliability score", # size="size_fixed",
color="Fonte",
color_continuous_scale="Viridis",
title=f"Dispersão 1D: {x_axis}",
hover_name="Título",
hover_data={col: True for col in to_hover} | {"Custom Size": False},
size='Custom Size',
size_max=10,
opacity=0.8)
fig.update_yaxes(visible=False, showticklabels=False) # Hide Y-axis
# fig.update_traces(marker=dict(opacity=0.7, line=dict(width=1, color='black')))
if group_toggle:
fig.update_traces(marker=dict(opacity=0, size=0)) # to remove the original marker dots
for i, row in filtered_data.iterrows():
img_path = u.get_source_image(row["Fonte"])
img_data = u.get_img_data(img_path)
fig.add_layout_image(
dict(
source=f'data:image/png;base64,{img_data}',
x=row[x_axis],
y=0,
xref="x",
yref="y",
sizex=6,
sizey=6,
xanchor="center",
yanchor="middle",
opacity=0.8,
)
)
else:
for _, row in filtered_data.iterrows():
if row["Fonte"] == "Utilizador":
img_data = u.get_img_data("logos/x.png")
fig.add_layout_image(
dict(
source=f'data:image/png;base64,{img_data}',
x=row[x_axis],
y=0,
xref="x",
yref="y",
sizex=7,
sizey=7,
xanchor="center",
yanchor="middle"
)
)
# X-axis left annotation (outside)
fig.add_annotation(
x=0.13, # Far left in paper coords
y=0, # Bottom
xref="paper",
yref="paper",
text=f"⬅️ {axis_extremities[x_axis][0]}",
showarrow=False,
xanchor="right",
yanchor="top",
font=dict(size=12),
yshift=-25 # Further outside the plot
)
# X-axis right annotation (outside)
fig.add_annotation(
x=0.87, # Far right
y=0,
xref="paper",
yref="paper",
text=f"{axis_extremities[x_axis][1]} ➡️",
showarrow=False,
xanchor="left",
yanchor="top",
font=dict(size=12),
yshift=-25
)
fig.update_layout(
xaxis=dict(
range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]]
))
elif chart_type == "2D":
to_hover = ["Fonte"]
if not group_toggle:
if x_axis != "Legibilidade":
to_hover.append(f"Descritor de {x_axis}")
if y_axis != "Legibilidade":
to_hover.append(f"Descritor de {y_axis}")
# 2D plot
filtered_data['Custom Size'] = filtered_data["Fonte"].apply(lambda x: 0 if x == "Utilizador" else 10)
fig = px.scatter(filtered_data, x=x_axis, y=y_axis, # text="Título",
# color="reliability score", # size="size_fixed",
color_continuous_scale="Viridis",
color="Fonte",
title=f"Dispersão 2D: {x_axis} vs {y_axis}",
hover_name="Título",
size='Custom Size',
hover_data={col: True for col in to_hover} | {"Custom Size": False},
size_max=10,
opacity=0.8)
if group_toggle:
fig.update_traces(marker=dict(opacity=0, size=0)) # to remove the original marker dots
for i, row in filtered_data.iterrows():
img_path = u.get_source_image(row["Fonte"])
img_data = u.get_img_data(img_path)
fig.add_layout_image(
dict(
source=f'data:image/png;base64,{img_data}',
x=row[x_axis],
y=row[y_axis],
xref="x",
yref="y",
sizex=10,
sizey=10,
xanchor="center",
yanchor="middle",
opacity=0.8,
)
)
else:
for _, row in filtered_data.iterrows():
if row["Fonte"] == "Utilizador":
img_data = u.get_img_data("logos/x.png")
fig.add_layout_image(
dict(
source=f'data:image/png;base64,{img_data}',
x=row[x_axis],
y=row[y_axis],
xref="x",
yref="y",
sizex=7,
sizey=7,
xanchor="center",
yanchor="middle"
)
)
# X-axis left annotation (outside)
fig.add_annotation(
x=0.15, # Far left in paper coords
y=0, # Bottom
xref="paper",
yref="paper",
text=f"⬅️ {axis_extremities[x_axis][0]}",
showarrow=False,
xanchor="right",
yanchor="top",
font=dict(size=12),
yshift=-25 # Further outside the plot
)
# X-axis right annotation (outside)
fig.add_annotation(
x=0.85, # Far right
y=0,
xref="paper",
yref="paper",
text=f"{axis_extremities[x_axis][1]} ➡️",
showarrow=False,
xanchor="left",
yanchor="top",
font=dict(size=12),
yshift=-25
)
# Y-axis bottom annotation (outside)
fig.add_annotation(
x=0,
y=0, # Bottom
xref="paper",
yref="paper",
text=f"⬅️ {axis_extremities[y_axis][0]}",
showarrow=False,
xanchor="right",
yanchor="bottom",
font=dict(size=12),
xshift=-40, # Move left outside
textangle=-90
)
# Y-axis top annotation (outside)
fig.add_annotation(
x=0,
y=1, # Top
xref="paper",
yref="paper",
text=f"{axis_extremities[y_axis][1]} ➡️",
showarrow=False,
xanchor="right",
yanchor="top",
font=dict(size=12),
xshift=-40,
textangle=-90
)
fig.update_layout(
xaxis=dict(
range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]]
),
yaxis=dict(
range=[axis_extremities[y_axis][2], axis_extremities[y_axis][3]]
)
)
elif chart_type == "3D":
to_hover = ["Fonte"]
if not group_toggle:
if x_axis != "Legibilidade":
to_hover.append(f"Descritor de {x_axis}")
if y_axis != "Legibilidade":
to_hover.append(f"Descritor de {y_axis}")
if z_axis != "Legibilidade":
to_hover.append(f"Descritor de {z_axis}")
user_upload = "Utilizador"
highlight_df = filtered_data[filtered_data["Fonte"] == user_upload]
other_df = filtered_data[filtered_data["Fonte"] != user_upload]
source_list = other_df["Fonte"].unique()
color_map = {source: f"hsl({i * 360 / len(source_list)}, 70%, 50%)" for i, source in enumerate(source_list)}
color_array = other_df["Fonte"].map(color_map)
fig = go.Figure()
# 3D plot
if group_toggle:
fig = px.scatter_3d(other_df, x=x_axis, y=y_axis, z=z_axis,
color="Fonte", # Color points by source
color_continuous_scale="Viridis",
# symbol="Fonte",
title=f"Dispersão 3D: {x_axis} vs {y_axis} vs {z_axis}",
hover_name='Título')
fig.update_traces(marker=dict(size=10, opacity=0.8, line=dict(width=1, color='black')))
highlight_trace = px.scatter_3d(highlight_df, x=x_axis, y=y_axis, z=z_axis,
color_discrete_sequence=["black"],
hover_name="Título")
highlight_trace.update_traces(
marker=dict(size=10),
name="Utilizador",
showlegend=True)
for trace in highlight_trace.data:
fig.add_trace(trace)
else:
fig = px.scatter_3d(other_df, x=x_axis, y=y_axis, z=z_axis,
color="Fonte",
color_continuous_scale="Viridis",
title=f"Dispersão 3D: {x_axis} vs {y_axis} vs {z_axis}",
hover_name="Título",
hover_data=to_hover)
fig.update_traces(marker=dict(size=5, opacity=0.8))
highlight_trace = px.scatter_3d(highlight_df, x=x_axis, y=y_axis, z=z_axis,
color_discrete_sequence=["black"],
hover_name="Título",
hover_data=to_hover)
highlight_trace.update_traces(
marker=dict(size=10),
name="Utilizador",
showlegend=True)
for trace in highlight_trace.data:
fig.add_trace(trace)
fig.update_layout(
scene=dict(
xaxis=dict(
title=x_axis,
range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]]
),
yaxis=dict(
title=y_axis,
range=[axis_extremities[y_axis][2], axis_extremities[y_axis][3]]
),
zaxis=dict(
title=z_axis,
range=[axis_extremities[z_axis][2], axis_extremities[z_axis][3]]
)
)
)
return fig
# # To update the sources in the source filter
# @app.callback(
# Output("source-filter", "options"),
# Output("source-filter", "value"),
# Input("news-data", "data")
# )
# def update_source_dropdown(data):
# df = pd.DataFrame(data)
# unique_sources = df["Fonte"].unique().tolist()
# options = [{"label": "Select All", "value": "ALL"}] + [
# {"label": src, "value": src} for src in unique_sources
# ]
# return options, unique_sources
# To update the source filter
@app.callback(
Output("source-filter", "value", allow_duplicate=True),
[Input("source-filter", "value")],
State("source-filter", "options"),
prevent_initial_call=True
)
def update_source_selection(selected_sources, options):
all_sources = data["Fonte"].unique().tolist()
if "ALL" in selected_sources:
# If "Select All" is clicked, return all sources
if "Utilizador" in [src["value"] for src in options]:
return all_sources + ["Utilizador"]
return all_sources
else:
# else return selected sources normally
return selected_sources
# To updated the graph size based on chart type
@app.callback(
Output("news-plot", "style"),
Input("chart-type", "value")
)
def update_graph_height(chart_type):
if chart_type == "1D":
return {"height": "400px"}
elif chart_type == "2D":
return {"height": "800px"}
elif chart_type == "3D":
return {"height": "1000px"}
return {"height": "800px"}
# To disable Y-axis and Z-axis dropdowns based on chart type
@app.callback(
[Output("y-axis", "disabled"),
Output("z-axis", "disabled")],
[Input("chart-type", "value")]
)
def update_dropdown_states(chart_type):
if chart_type == "1D":
return True, True
elif chart_type == "2D":
return False, True
else:
return False, False
# To hide Y-axis and Z-axis dropdowns based on chart type
@app.callback(
[Output("y-axis-container", "style"),
Output("z-axis-container", "style")],
[Input("chart-type", "value")]
)
def update_dropdown_visibility(chart_type):
base_style = {'marginBottom': '10px'}
disabled_style = {'marginBottom': '10px', 'opacity': '0.5'}
if chart_type == "1D":
return disabled_style, disabled_style
elif chart_type == "2D":
return base_style, disabled_style
else:
return base_style, base_style
# show/hide filters based on chart type
@app.callback(
[Output("x-filter", "disabled"),
Output("y-filter", "disabled"),
Output("z-filter", "disabled")],
[Input("chart-type", "value"),
Input("x-axis", "value"),
Input("y-axis", "value"),
Input("z-axis", "value")]
)
def update_filter_availability(chart_type, x_axis, y_axis, z_axis):
x_disabled = False
y_disabled = chart_type == "1D"
z_disabled = chart_type != "3D"
return x_disabled, y_disabled, z_disabled
# To update the range sliders based on selected axis
@app.callback(
[Output("x-filter", "min"), Output("x-filter", "max"),
Output("x-filter", "marks"), Output("x-filter", "value"),
Output("y-filter", "min"), Output("y-filter", "max"),
Output("y-filter", "marks"), Output("y-filter", "value"),
Output("z-filter", "min"), Output("z-filter", "max"),
Output("z-filter", "marks"), Output("z-filter", "value")],
[Input("x-axis", "value"), Input("y-axis", "value"), Input("z-axis", "value")]
)
def update_range_sliders(x_axis, y_axis, z_axis):
axis_data = {
"Viés Político": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}],
"Fiabilidade": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}],
"Objetividade": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}],
"Legibilidade": [0, 100, {0: "0", 25: "25", 50: "50", 75: "75", 100: "100"}]
}
x_min, x_max, x_marks = axis_data[x_axis]
y_min, y_max, y_marks = axis_data[y_axis]
z_min, z_max, z_marks = axis_data[z_axis]
return (x_min, x_max, x_marks, [x_min, x_max],
y_min, y_max, y_marks, [y_min, y_max],
z_min, z_max, z_marks, [z_min, z_max])
@app.callback(
Output('upload-feedback', 'children', allow_duplicate=True),
Output("news-plot", "figure", allow_duplicate=True),
Output("source-filter", "options", allow_duplicate=True),
Output("source-filter", "value", allow_duplicate=True),
Output("data-store", "data", allow_duplicate=True),
Input('upload-article', 'contents'),
State('upload-article', 'filename'),
State("source-filter", "options"),
State("source-filter", "value"),
State("data-store", "data"),
prevent_initial_call=True
)
def classify_article(contents, filename, options, selected_sources, data_records):
data = pd.DataFrame(data_records)
if contents is None:
raise PreventUpdate
# Decode .txt content
content_type, content_string = contents.split(',')
decoded = u.decode_text_file(content_string)
openai = OpenAI(
api_key=os.environ.get("API_KEY"),
base_url="https://api.deepinfra.com/v1/openai",
)
try:
# political_bias_db, reliability_db, objectivity_db = u.setup_db()
descriptor_political_bias = u.get_descriptor(decoded, "political_bias", u.prompts, openai)
descriptor_reliability = u.get_descriptor(decoded, "reliability", u.prompts, openai)
descriptor_objectivity = u.get_descriptor(decoded, "objectivity", u.prompts, openai)
political_bias_score = u.get_score(descriptor_political_bias, embedding_model, 0.5, political_bias_db, 50)
reliability_score = u.get_score(descriptor_reliability, embedding_model, 0.2, reliability_db, 7505)
objectivity_score = u.get_score(descriptor_objectivity, embedding_model, 0.2, objectivity_db, 9000)
reliability_score = u.classify_readability(decoded)
new_point = {
"Título": filename,
"Texto": decoded,
"Fonte": "Utilizador",
"Descritor de Viés Político": descriptor_political_bias,
"Descritor de Fiabilidade": descriptor_reliability,
"Descritor de Objetividade": descriptor_objectivity,
"Viés Político": political_bias_score,
"Fiabilidade": reliability_score,
"Objetividade": objectivity_score,
"Legibilidade": reliability_score
}
data = pd.concat([pd.DataFrame(data), pd.DataFrame([new_point])], ignore_index=True)
unique_sources = selected_sources + [new_point["Fonte"]]
options = options + [{"label": new_point["Fonte"], "value": new_point["Fonte"]}]
return f"✅ Classificado e adicionado '{filename}'", dash.no_update, options, unique_sources, data.to_dict("records")
except Exception as e:
return f"❌ Erro a classificar o artigo: {e}", dash.no_update, dash.no_update, dash.no_update, dash.no_update
@app.callback(
Output('upload-feedback', 'children'),
Output("news-plot", "figure"),
Output("source-filter", "options"),
Output("source-filter", "value"),
Output("data-store", "data"),
Input('submit-url-button', 'n_clicks'),
State('url-input', 'value'),
State("source-filter", "options"),
State("source-filter", "value"),
State("data-store", "data")
)
def classify_url(n_clicks, url, options, selected_sources, data_records):
data = pd.DataFrame(data_records)
if n_clicks > 0 and url:
res = requests.get(url)
soup = bs4.BeautifulSoup(res.text, 'lxml')
if "expresso.pt" in url:
outlet = "Expresso"
title = soup.select('h1')[0].text
text = soup.select('.full-article-fragment.full-article-body.article-content.first')[0].getText()
elif "publico.pt" in url:
outlet = "Público"
title = soup.select('.headline.story__headline')[0].getText()
match = re.search(r'\s*(.*?)\s*$', title.strip())
if match:
title = match.group(1)
text = soup.select('.story__body')[0].getText()
elif "eco.sapo.pt" in url:
outlet = "Eco Sapo"
title = soup.select('.title')[0].get_text()
title = re.sub(r'\s+', ' ', title).strip()
text = soup.select('.entry__content')[0].get_text()
elif "dn.pt" in url:
outlet = "Diário de Notícias"
title = soup.select('.arrow-component.arr--story-headline.story-headline-m_wrapper__1Wey6')[0].getText()
text = soup.select('.arr--story-page-card-wrapper')[0].getText()
openai = OpenAI(
api_key=os.environ.get("API_KEY"),
base_url="https://api.deepinfra.com/v1/openai",
)
try:
# political_bias_db, reliability_db, objectivity_db = u.setup_db()
descriptor_political_bias = u.get_descriptor(text, "political_bias", u.prompts, openai)
descriptor_reliability = u.get_descriptor(text, "reliability", u.prompts, openai)
descriptor_objectivity = u.get_descriptor(text, "objectivity", u.prompts, openai)
political_bias_score = u.get_score(descriptor_political_bias, embedding_model, 0.5, political_bias_db, 50)
reliability_score = u.get_score(descriptor_reliability, embedding_model, 0.2, reliability_db, 7505)
objectivity_score = u.get_score(descriptor_objectivity, embedding_model, 0.2, objectivity_db, 9000)
reliability_score = u.classify_readability(text)
new_point = {
"Título": f"{outlet}: {title}",
"Texto": text,
"Fonte": "Utilizador",
"Descritor de Viés Político": descriptor_political_bias,
"Descritor de Fiabilidade": descriptor_reliability,
"Descritor de Objetividade": descriptor_objectivity,
"Viés Político": political_bias_score,
"Fiabilidade": reliability_score,
"Objetividade": objectivity_score,
"Legibilidade": reliability_score
}
data = pd.concat([pd.DataFrame(data), pd.DataFrame([new_point])], ignore_index=True)
unique_sources = selected_sources + [new_point["Fonte"]]
options = options + [{"label": new_point["Fonte"], "value": new_point["Fonte"]}]
return f"✅ O artigo pedido foi classificao e adicionado.", dash.no_update, options, unique_sources, data.to_dict("records")
except Exception as e:
return f"❌ Erro a classificar o artigo: {e}", dash.no_update, dash.no_update, dash.no_update, dash.no_update
else:
raise PreventUpdate
# to open window with article text
@app.callback(
Output("modal", "is_open"),
Output("modal-body", "children"),
Output("modal-title", "children"),
Input("news-plot", "clickData"),
Input("close", "n_clicks"),
State("modal", "is_open"),
State("data-store", "data")
)
def display_modal(clickData, close_clicks, is_open, data_records):
data = pd.DataFrame(data_records)
ctx = dash.callback_context
if ctx.triggered_id == "news-plot" and clickData:
title = clickData['points'][0]['hovertext']
data_index = data[data["Título"] == title].index
news_text = data.loc[data_index, 'Texto']
return True, news_text, title
elif ctx.triggered_id == "close" and is_open:
return False, None, None
return is_open, None, None
# to open informational window
@app.callback(
Output("modal-info", "is_open"),
[Input("open-info", "n_clicks"), Input("close-info", "n_clicks")],
[State("modal-info", "is_open")],
)
def toggle_modal(n1, n2, is_open):
if n1 or n2:
return not is_open
return is_open
if __name__ == '__main__':
app.run(host="0.0.0.0", port=7860)