import dash from dash import dcc, html, Input, Output, State import dash_bootstrap_components as dbc import plotly.express as px import pandas as pd import dash_daq as daq import plotly.graph_objects as go import base64 from openai import OpenAI from dash.exceptions import PreventUpdate import os # os.environ["TRANSFORMERS_CACHE"] = "/app/models" # os.environ["HF_HOME"] = "/app/models" import utils as u import requests import bs4 import re from langchain_huggingface import HuggingFaceEmbeddings info_text = """ ### Funcionalidades: - A secção das definições permite-lhe escolher o tipo gráfico de dispersão: 1D, 2D, ou 3D. - A ferramenta classifica artigos consoante quatro eixos: viés político, fiabilidade, objetividade e legibilidade. - Todos os eixos podem ser escolhidos na secção das definições, bem como o seu intervalo de valores. - Um filtro de fontes está também disponível para melhor análise. - Na última parte da secção, um botão permite agrupar os dados e calcular a média por fonte, em vez dos dados de nível de artigo. - O gráfico em si oferece duas funções interativas: - Dados específicos da notícia são mostrados com a passagem do rato por cima do respetivo ponto. - Ao clicar num ponto, o corpo do artigo é exibido numa janela pop-up. - Por baixo do gráfico, um artigo pode ser adicionado para classificação em tempo real através de um ficheiro .txt ou upload de URL. Certifique-se de que qualquer artigo de uma URL é de uma fontes indicadas e não está bloqueado a subscrição. ### Classificação: - A classificação de cada eixo é realizada usando um modelo de linguagem de grande escala (LLM). - O modelo gera um descritor para cada artigo e para cada eixo. - Exemplos de descritores podem ser encontrados nos dados de passagem dos pontos já presentes no gráfico. - O descritor gerado para cada eixo é então comparado com uma base de dados de descritores obtidos a partir de um conjunto de dados etiquetados. - Com base na similaridade com os descritores na base de dados, o valor do eixo é calculado para cada eixo de forma independente. - Para o eixo de legibilidade, o valor é obtido usando métricas de legibilidade estabelecidas e mapeando-as para a escala do eixo. """ # Dash app app = dash.Dash(__name__, title='Media Bias Chart', external_stylesheets=[dbc.themes.BOOTSTRAP]) server = app.server data = pd.read_csv("./demo_data.csv") data = data[["Título", "Texto", "Fonte", "Descritor de Viés Político", "Descritor de Fiabilidade", "Descritor de Objetividade", "Viés Político", "Fiabilidade", "Objetividade", "Legibilidade"]] embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2") political_bias_db, reliability_db, objectivity_db = u.setup_db() app.layout = html.Div([ dcc.Store(id="data-store", data=data.to_dict("records"), storage_type="session"), html.Div([ # dcc.Store(id='news-data', data=data.to_dict('records')), html.H2("Definições", style={'textAlign': 'center'}), # Dropdown for selecting chart type html.Label("Selecione o tipo de gráfico:"), dcc.Dropdown( id="chart-type", options=[ {"label": "Gráfico de Dispersão 1D", "value": "1D"}, {"label": "Gráfico de Dispersão 2D", "value": "2D"}, {"label": "Gráfico de Dispersão 3D", "value": "3D"} ], value="2D", clearable=False, style={'marginBottom': '40px'} ), # Dropdowns for selecting axes html.Div([ html.Label("Selecione o eixo X:"), dcc.Dropdown( id="x-axis", options=[{"label": col, "value": col} for col in data.columns[6:]], value="Viés Político", clearable=False, style={'marginBottom': '10px'}, ), dcc.RangeSlider( id="x-filter", min=-100, max=100, step=5, marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'}, value=[-100, 100], allowCross=False, className="range-slider" ) ], id="x-axis-container", style={'marginBottom': '20px'}), html.Div([ html.Label("Selecione o eixo Y:"), dcc.Dropdown( id="y-axis", options=[{"label": col, "value": col} for col in data.columns[6:]], value="Fiabilidade", clearable=False, style={'marginBottom': '10px'}, ), dcc.RangeSlider( id="y-filter", min=-100, max=100, step=5, marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'}, value=[-100, 100], allowCross=False, className="range-slider" ) ], id="y-axis-container", style={'marginBottom': '20px'}), html.Div([ html.Label("Selecione o eixo Z:"), dcc.Dropdown( id="z-axis", options=[{"label": col, "value": col} for col in data.columns[6:]], value="Objetividade", clearable=False, style={'marginBottom': '10px'}, ), dcc.RangeSlider( id="z-filter", min=-100, max=100, step=5, marks={-100: '-100', -50: '-50', 0: '0', 50: '50', 100: '100'}, value=[-100, 100], allowCross=False, className="range-slider" ) ], id="z-axis-container", style={'marginBottom': '30px'}), # Checkbox filter for sources html.Label("Filtrar por Fonte:"), dcc.Dropdown( id="source-filter", options=([{"label": "Selecionar Todos", "value": "ALL"}] + [{"label": src, "value": src} for src in data["Fonte"].unique()]), value=data["Fonte"].unique().tolist(), multi=True, placeholder="Selectione a fonte...", style={'font-family' : 'Arial', 'marginBottom': '20px'} ), # Toggle button to group by source daq.ToggleSwitch( id="group-toggle", label="Agrupar por fonte (Média)", size=60 ), ], style={ 'width': '25%', 'padding': '20px', 'backgroundColor': '#f8f9fa', 'position': 'fixed', 'height': '100vh', 'overflowY': 'auto', 'font-family': 'Calibri' } ), html.Div([ html.Div( dbc.Button("Informação", id="open-info", size="lg", n_clicks=0, style={ "backgroundColor": "#E5ECF6", "color": "black", "border": "none", # "font-weight": "bold" }), className="d-flex justify-content-end" ), dbc.Modal( [ # dbc.ModalHeader(dbc.ModalTitle("How to Use This App")), dbc.ModalBody(dcc.Markdown(info_text)), dbc.ModalFooter( dbc.Button("Fechar", id="close-info", className="ms-auto", n_clicks=0) ), ], id="modal-info", is_open=False, ), # Graph html.H1("Classificação dos Media Portugueses", style={'textAlign': 'center', 'font-family': 'Calibri', 'font-weight': 'bold'}), dcc.Graph(id="news-plot", style={'height': '800px'}), dbc.Modal( [ dbc.ModalHeader(dbc.ModalTitle(id="modal-title")), dbc.ModalBody(id='modal-body'), dbc.ModalFooter( dbc.Button("Fechar", id="close", className="ms-auto", n_clicks=0) ), ], id="modal", is_open=False, ), dcc.Loading( id="upload-loading", type="circle", fullscreen=False, children=[ # for article input in .txt dcc.Upload( id='upload-article', children=html.Div([ '📄 Arraste ou Selecione um Artigo para Classificar (formato .txt)' ]), style={ 'width': '100%', 'height': '60px', 'lineHeight': '60px', 'borderWidth': '1px', 'borderStyle': 'dashed', 'borderRadius': '5px', 'textAlign': 'center', 'margin': '10px 0', 'font-family': 'Calibri', 'margin-bottom': '30px' }, accept='.txt' ), # for article input in url format html.Div([ html.Label([ html.Span("Ou Insira um Link ", style={'font-size': '18px', 'font-family': 'Calibri'}), html.Span("(Apenas para Expresso, Público, Eco Sapo, e Diário de Notícias)", style={'font-size': '12px', 'font-family': 'Calibri'}) ]), dcc.Input( id='url-input', type='url', placeholder='https://exemplo.com/artigo-notícias', style={ 'width': '98.3%', 'padding': '10px', 'margin': '10px 0', 'border': '1px solid #ccc', 'borderRadius': '5px', 'font-family': 'Calibri' } ), html.Button('Submeter Link', id='submit-url-button', n_clicks=0, style={ 'margin': '10px 0', 'font-family': 'Calibri' }), ]), html.Div(id='upload-feedback', style={'margin': '10px 0'}), ] ) ], style={'marginLeft': '27%', 'padding': '20px'} #style={'width': '75%', 'padding': '20px', 'backgroundColor': '#f8f9fa', 'position': 'fixed', 'left': '25%', 'top': '0', 'bottom': '0', 'overflowY': 'auto'} ) ]) # to update the chart dynamically @app.callback( Output("news-plot", "figure", allow_duplicate=True), [Input("chart-type", "value"), Input("x-axis", "value"), Input("y-axis", "value"), Input("z-axis", "value"), Input("x-filter", "value"), Input("y-filter", "value"), Input("z-filter", "value"), Input("source-filter", "value"), Input("group-toggle", "value")], State("data-store", "data"), prevent_initial_call=True ) def update_chart(chart_type, x_axis, y_axis, z_axis, x_range, y_range, z_range, selected_sources, group_toggle, data_records): data = pd.DataFrame(data_records) axis_extremities ={ "Viés Político" : ["Enviesado à Esquerda", "Enviesado à Direita", -100, 100], "Fiabilidade" : ["Não fiável", "Fiável", -100, 100], "Objetividade" : ["Baseado em Opinião", "Factual/Objetivo", -100, 100], "Legibilidade" : ["Difícil de ler", "Fácil de ler", 0, 100] } all_sources = data["Fonte"].unique().tolist() if set(selected_sources) == set(all_sources) or not selected_sources: filtered_data = data else: filtered_data = data[data["Fonte"].isin(selected_sources)] if group_toggle: filtered_data_1 = filtered_data[filtered_data["Fonte"] != "Utilizador"].groupby("Fonte").mean().reset_index() filtered_data_2 = filtered_data[filtered_data["Fonte"] == "Utilizador"][["Fonte", "Viés Político", "Fiabilidade", "Objetividade", "Legibilidade"]] filtered_data = pd.concat([filtered_data_1, filtered_data_2], ignore_index=True) filtered_data["Título"] = filtered_data["Fonte"] # Apply range filters for each axis filtered_data = filtered_data[(filtered_data[x_axis] >= x_range[0]) & (filtered_data[x_axis] <= x_range[1])] # Only apply Y and Z filters for the relevant chart types if chart_type in ["2D", "3D"]: filtered_data = filtered_data[(filtered_data[y_axis] >= y_range[0]) & (filtered_data[y_axis] <= y_range[1])] if chart_type == "3D": filtered_data = filtered_data[(filtered_data[z_axis] >= z_range[0]) & (filtered_data[z_axis] <= z_range[1])] if chart_type == "1D": to_hover = ["Fonte"] if not group_toggle: if x_axis != "Legibilidade": to_hover.append(f"Descritor de {x_axis}") # 1D plot filtered_data['Custom Size'] = filtered_data["Fonte"].apply(lambda x: 0 if x == "Utilizador" else 12) fig = px.scatter(filtered_data, x=x_axis, y=[0] * len(filtered_data), #text="Título", # color="reliability score", # size="size_fixed", color="Fonte", color_continuous_scale="Viridis", title=f"Dispersão 1D: {x_axis}", hover_name="Título", hover_data={col: True for col in to_hover} | {"Custom Size": False}, size='Custom Size', size_max=10, opacity=0.8) fig.update_yaxes(visible=False, showticklabels=False) # Hide Y-axis # fig.update_traces(marker=dict(opacity=0.7, line=dict(width=1, color='black'))) if group_toggle: fig.update_traces(marker=dict(opacity=0, size=0)) # to remove the original marker dots for i, row in filtered_data.iterrows(): img_path = u.get_source_image(row["Fonte"]) img_data = u.get_img_data(img_path) fig.add_layout_image( dict( source=f'data:image/png;base64,{img_data}', x=row[x_axis], y=0, xref="x", yref="y", sizex=6, sizey=6, xanchor="center", yanchor="middle", opacity=0.8, ) ) else: for _, row in filtered_data.iterrows(): if row["Fonte"] == "Utilizador": img_data = u.get_img_data("logos/x.png") fig.add_layout_image( dict( source=f'data:image/png;base64,{img_data}', x=row[x_axis], y=0, xref="x", yref="y", sizex=7, sizey=7, xanchor="center", yanchor="middle" ) ) # X-axis left annotation (outside) fig.add_annotation( x=0.13, # Far left in paper coords y=0, # Bottom xref="paper", yref="paper", text=f"⬅️ {axis_extremities[x_axis][0]}", showarrow=False, xanchor="right", yanchor="top", font=dict(size=12), yshift=-25 # Further outside the plot ) # X-axis right annotation (outside) fig.add_annotation( x=0.87, # Far right y=0, xref="paper", yref="paper", text=f"{axis_extremities[x_axis][1]} ➡️", showarrow=False, xanchor="left", yanchor="top", font=dict(size=12), yshift=-25 ) fig.update_layout( xaxis=dict( range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]] )) elif chart_type == "2D": to_hover = ["Fonte"] if not group_toggle: if x_axis != "Legibilidade": to_hover.append(f"Descritor de {x_axis}") if y_axis != "Legibilidade": to_hover.append(f"Descritor de {y_axis}") # 2D plot filtered_data['Custom Size'] = filtered_data["Fonte"].apply(lambda x: 0 if x == "Utilizador" else 10) fig = px.scatter(filtered_data, x=x_axis, y=y_axis, # text="Título", # color="reliability score", # size="size_fixed", color_continuous_scale="Viridis", color="Fonte", title=f"Dispersão 2D: {x_axis} vs {y_axis}", hover_name="Título", size='Custom Size', hover_data={col: True for col in to_hover} | {"Custom Size": False}, size_max=10, opacity=0.8) if group_toggle: fig.update_traces(marker=dict(opacity=0, size=0)) # to remove the original marker dots for i, row in filtered_data.iterrows(): img_path = u.get_source_image(row["Fonte"]) img_data = u.get_img_data(img_path) fig.add_layout_image( dict( source=f'data:image/png;base64,{img_data}', x=row[x_axis], y=row[y_axis], xref="x", yref="y", sizex=10, sizey=10, xanchor="center", yanchor="middle", opacity=0.8, ) ) else: for _, row in filtered_data.iterrows(): if row["Fonte"] == "Utilizador": img_data = u.get_img_data("logos/x.png") fig.add_layout_image( dict( source=f'data:image/png;base64,{img_data}', x=row[x_axis], y=row[y_axis], xref="x", yref="y", sizex=7, sizey=7, xanchor="center", yanchor="middle" ) ) # X-axis left annotation (outside) fig.add_annotation( x=0.15, # Far left in paper coords y=0, # Bottom xref="paper", yref="paper", text=f"⬅️ {axis_extremities[x_axis][0]}", showarrow=False, xanchor="right", yanchor="top", font=dict(size=12), yshift=-25 # Further outside the plot ) # X-axis right annotation (outside) fig.add_annotation( x=0.85, # Far right y=0, xref="paper", yref="paper", text=f"{axis_extremities[x_axis][1]} ➡️", showarrow=False, xanchor="left", yanchor="top", font=dict(size=12), yshift=-25 ) # Y-axis bottom annotation (outside) fig.add_annotation( x=0, y=0, # Bottom xref="paper", yref="paper", text=f"⬅️ {axis_extremities[y_axis][0]}", showarrow=False, xanchor="right", yanchor="bottom", font=dict(size=12), xshift=-40, # Move left outside textangle=-90 ) # Y-axis top annotation (outside) fig.add_annotation( x=0, y=1, # Top xref="paper", yref="paper", text=f"{axis_extremities[y_axis][1]} ➡️", showarrow=False, xanchor="right", yanchor="top", font=dict(size=12), xshift=-40, textangle=-90 ) fig.update_layout( xaxis=dict( range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]] ), yaxis=dict( range=[axis_extremities[y_axis][2], axis_extremities[y_axis][3]] ) ) elif chart_type == "3D": to_hover = ["Fonte"] if not group_toggle: if x_axis != "Legibilidade": to_hover.append(f"Descritor de {x_axis}") if y_axis != "Legibilidade": to_hover.append(f"Descritor de {y_axis}") if z_axis != "Legibilidade": to_hover.append(f"Descritor de {z_axis}") user_upload = "Utilizador" highlight_df = filtered_data[filtered_data["Fonte"] == user_upload] other_df = filtered_data[filtered_data["Fonte"] != user_upload] source_list = other_df["Fonte"].unique() color_map = {source: f"hsl({i * 360 / len(source_list)}, 70%, 50%)" for i, source in enumerate(source_list)} color_array = other_df["Fonte"].map(color_map) fig = go.Figure() # 3D plot if group_toggle: fig = px.scatter_3d(other_df, x=x_axis, y=y_axis, z=z_axis, color="Fonte", # Color points by source color_continuous_scale="Viridis", # symbol="Fonte", title=f"Dispersão 3D: {x_axis} vs {y_axis} vs {z_axis}", hover_name='Título') fig.update_traces(marker=dict(size=10, opacity=0.8, line=dict(width=1, color='black'))) highlight_trace = px.scatter_3d(highlight_df, x=x_axis, y=y_axis, z=z_axis, color_discrete_sequence=["black"], hover_name="Título") highlight_trace.update_traces( marker=dict(size=10), name="Utilizador", showlegend=True) for trace in highlight_trace.data: fig.add_trace(trace) else: fig = px.scatter_3d(other_df, x=x_axis, y=y_axis, z=z_axis, color="Fonte", color_continuous_scale="Viridis", title=f"Dispersão 3D: {x_axis} vs {y_axis} vs {z_axis}", hover_name="Título", hover_data=to_hover) fig.update_traces(marker=dict(size=5, opacity=0.8)) highlight_trace = px.scatter_3d(highlight_df, x=x_axis, y=y_axis, z=z_axis, color_discrete_sequence=["black"], hover_name="Título", hover_data=to_hover) highlight_trace.update_traces( marker=dict(size=10), name="Utilizador", showlegend=True) for trace in highlight_trace.data: fig.add_trace(trace) fig.update_layout( scene=dict( xaxis=dict( title=x_axis, range=[axis_extremities[x_axis][2], axis_extremities[x_axis][3]] ), yaxis=dict( title=y_axis, range=[axis_extremities[y_axis][2], axis_extremities[y_axis][3]] ), zaxis=dict( title=z_axis, range=[axis_extremities[z_axis][2], axis_extremities[z_axis][3]] ) ) ) return fig # # To update the sources in the source filter # @app.callback( # Output("source-filter", "options"), # Output("source-filter", "value"), # Input("news-data", "data") # ) # def update_source_dropdown(data): # df = pd.DataFrame(data) # unique_sources = df["Fonte"].unique().tolist() # options = [{"label": "Select All", "value": "ALL"}] + [ # {"label": src, "value": src} for src in unique_sources # ] # return options, unique_sources # To update the source filter @app.callback( Output("source-filter", "value", allow_duplicate=True), [Input("source-filter", "value")], State("source-filter", "options"), prevent_initial_call=True ) def update_source_selection(selected_sources, options): all_sources = data["Fonte"].unique().tolist() if "ALL" in selected_sources: # If "Select All" is clicked, return all sources if "Utilizador" in [src["value"] for src in options]: return all_sources + ["Utilizador"] return all_sources else: # else return selected sources normally return selected_sources # To updated the graph size based on chart type @app.callback( Output("news-plot", "style"), Input("chart-type", "value") ) def update_graph_height(chart_type): if chart_type == "1D": return {"height": "400px"} elif chart_type == "2D": return {"height": "800px"} elif chart_type == "3D": return {"height": "1000px"} return {"height": "800px"} # To disable Y-axis and Z-axis dropdowns based on chart type @app.callback( [Output("y-axis", "disabled"), Output("z-axis", "disabled")], [Input("chart-type", "value")] ) def update_dropdown_states(chart_type): if chart_type == "1D": return True, True elif chart_type == "2D": return False, True else: return False, False # To hide Y-axis and Z-axis dropdowns based on chart type @app.callback( [Output("y-axis-container", "style"), Output("z-axis-container", "style")], [Input("chart-type", "value")] ) def update_dropdown_visibility(chart_type): base_style = {'marginBottom': '10px'} disabled_style = {'marginBottom': '10px', 'opacity': '0.5'} if chart_type == "1D": return disabled_style, disabled_style elif chart_type == "2D": return base_style, disabled_style else: return base_style, base_style # show/hide filters based on chart type @app.callback( [Output("x-filter", "disabled"), Output("y-filter", "disabled"), Output("z-filter", "disabled")], [Input("chart-type", "value"), Input("x-axis", "value"), Input("y-axis", "value"), Input("z-axis", "value")] ) def update_filter_availability(chart_type, x_axis, y_axis, z_axis): x_disabled = False y_disabled = chart_type == "1D" z_disabled = chart_type != "3D" return x_disabled, y_disabled, z_disabled # To update the range sliders based on selected axis @app.callback( [Output("x-filter", "min"), Output("x-filter", "max"), Output("x-filter", "marks"), Output("x-filter", "value"), Output("y-filter", "min"), Output("y-filter", "max"), Output("y-filter", "marks"), Output("y-filter", "value"), Output("z-filter", "min"), Output("z-filter", "max"), Output("z-filter", "marks"), Output("z-filter", "value")], [Input("x-axis", "value"), Input("y-axis", "value"), Input("z-axis", "value")] ) def update_range_sliders(x_axis, y_axis, z_axis): axis_data = { "Viés Político": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}], "Fiabilidade": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}], "Objetividade": [-100, 100, {-100: "-100", -50: "-50", 0: "0", 50: "50", 100: "100"}], "Legibilidade": [0, 100, {0: "0", 25: "25", 50: "50", 75: "75", 100: "100"}] } x_min, x_max, x_marks = axis_data[x_axis] y_min, y_max, y_marks = axis_data[y_axis] z_min, z_max, z_marks = axis_data[z_axis] return (x_min, x_max, x_marks, [x_min, x_max], y_min, y_max, y_marks, [y_min, y_max], z_min, z_max, z_marks, [z_min, z_max]) @app.callback( Output('upload-feedback', 'children', allow_duplicate=True), Output("news-plot", "figure", allow_duplicate=True), Output("source-filter", "options", allow_duplicate=True), Output("source-filter", "value", allow_duplicate=True), Output("data-store", "data", allow_duplicate=True), Input('upload-article', 'contents'), State('upload-article', 'filename'), State("source-filter", "options"), State("source-filter", "value"), State("data-store", "data"), prevent_initial_call=True ) def classify_article(contents, filename, options, selected_sources, data_records): data = pd.DataFrame(data_records) if contents is None: raise PreventUpdate # Decode .txt content content_type, content_string = contents.split(',') decoded = u.decode_text_file(content_string) openai = OpenAI( api_key=os.environ.get("API_KEY"), base_url="https://api.deepinfra.com/v1/openai", ) try: # political_bias_db, reliability_db, objectivity_db = u.setup_db() descriptor_political_bias = u.get_descriptor(decoded, "political_bias", u.prompts, openai) descriptor_reliability = u.get_descriptor(decoded, "reliability", u.prompts, openai) descriptor_objectivity = u.get_descriptor(decoded, "objectivity", u.prompts, openai) political_bias_score = u.get_score(descriptor_political_bias, embedding_model, 0.5, political_bias_db, 50) reliability_score = u.get_score(descriptor_reliability, embedding_model, 0.2, reliability_db, 7505) objectivity_score = u.get_score(descriptor_objectivity, embedding_model, 0.2, objectivity_db, 9000) reliability_score = u.classify_readability(decoded) new_point = { "Título": filename, "Texto": decoded, "Fonte": "Utilizador", "Descritor de Viés Político": descriptor_political_bias, "Descritor de Fiabilidade": descriptor_reliability, "Descritor de Objetividade": descriptor_objectivity, "Viés Político": political_bias_score, "Fiabilidade": reliability_score, "Objetividade": objectivity_score, "Legibilidade": reliability_score } data = pd.concat([pd.DataFrame(data), pd.DataFrame([new_point])], ignore_index=True) unique_sources = selected_sources + [new_point["Fonte"]] options = options + [{"label": new_point["Fonte"], "value": new_point["Fonte"]}] return f"✅ Classificado e adicionado '{filename}'", dash.no_update, options, unique_sources, data.to_dict("records") except Exception as e: return f"❌ Erro a classificar o artigo: {e}", dash.no_update, dash.no_update, dash.no_update, dash.no_update @app.callback( Output('upload-feedback', 'children'), Output("news-plot", "figure"), Output("source-filter", "options"), Output("source-filter", "value"), Output("data-store", "data"), Input('submit-url-button', 'n_clicks'), State('url-input', 'value'), State("source-filter", "options"), State("source-filter", "value"), State("data-store", "data") ) def classify_url(n_clicks, url, options, selected_sources, data_records): data = pd.DataFrame(data_records) if n_clicks > 0 and url: res = requests.get(url) soup = bs4.BeautifulSoup(res.text, 'lxml') if "expresso.pt" in url: outlet = "Expresso" title = soup.select('h1')[0].text text = soup.select('.full-article-fragment.full-article-body.article-content.first')[0].getText() elif "publico.pt" in url: outlet = "Público" title = soup.select('.headline.story__headline')[0].getText() match = re.search(r'\s*(.*?)\s*$', title.strip()) if match: title = match.group(1) text = soup.select('.story__body')[0].getText() elif "eco.sapo.pt" in url: outlet = "Eco Sapo" title = soup.select('.title')[0].get_text() title = re.sub(r'\s+', ' ', title).strip() text = soup.select('.entry__content')[0].get_text() elif "dn.pt" in url: outlet = "Diário de Notícias" title = soup.select('.arrow-component.arr--story-headline.story-headline-m_wrapper__1Wey6')[0].getText() text = soup.select('.arr--story-page-card-wrapper')[0].getText() openai = OpenAI( api_key=os.environ.get("API_KEY"), base_url="https://api.deepinfra.com/v1/openai", ) try: # political_bias_db, reliability_db, objectivity_db = u.setup_db() descriptor_political_bias = u.get_descriptor(text, "political_bias", u.prompts, openai) descriptor_reliability = u.get_descriptor(text, "reliability", u.prompts, openai) descriptor_objectivity = u.get_descriptor(text, "objectivity", u.prompts, openai) political_bias_score = u.get_score(descriptor_political_bias, embedding_model, 0.5, political_bias_db, 50) reliability_score = u.get_score(descriptor_reliability, embedding_model, 0.2, reliability_db, 7505) objectivity_score = u.get_score(descriptor_objectivity, embedding_model, 0.2, objectivity_db, 9000) reliability_score = u.classify_readability(text) new_point = { "Título": f"{outlet}: {title}", "Texto": text, "Fonte": "Utilizador", "Descritor de Viés Político": descriptor_political_bias, "Descritor de Fiabilidade": descriptor_reliability, "Descritor de Objetividade": descriptor_objectivity, "Viés Político": political_bias_score, "Fiabilidade": reliability_score, "Objetividade": objectivity_score, "Legibilidade": reliability_score } data = pd.concat([pd.DataFrame(data), pd.DataFrame([new_point])], ignore_index=True) unique_sources = selected_sources + [new_point["Fonte"]] options = options + [{"label": new_point["Fonte"], "value": new_point["Fonte"]}] return f"✅ O artigo pedido foi classificao e adicionado.", dash.no_update, options, unique_sources, data.to_dict("records") except Exception as e: return f"❌ Erro a classificar o artigo: {e}", dash.no_update, dash.no_update, dash.no_update, dash.no_update else: raise PreventUpdate # to open window with article text @app.callback( Output("modal", "is_open"), Output("modal-body", "children"), Output("modal-title", "children"), Input("news-plot", "clickData"), Input("close", "n_clicks"), State("modal", "is_open"), State("data-store", "data") ) def display_modal(clickData, close_clicks, is_open, data_records): data = pd.DataFrame(data_records) ctx = dash.callback_context if ctx.triggered_id == "news-plot" and clickData: title = clickData['points'][0]['hovertext'] data_index = data[data["Título"] == title].index news_text = data.loc[data_index, 'Texto'] return True, news_text, title elif ctx.triggered_id == "close" and is_open: return False, None, None return is_open, None, None # to open informational window @app.callback( Output("modal-info", "is_open"), [Input("open-info", "n_clicks"), Input("close-info", "n_clicks")], [State("modal-info", "is_open")], ) def toggle_modal(n1, n2, is_open): if n1 or n2: return not is_open return is_open if __name__ == '__main__': app.run(host="0.0.0.0", port=7860)