[] [] [{"variableName": "ds_dados", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": false}] [{"variableName": "ds_dados", "type": "dictionary", "supportedEngines": ["pandas"], "isLocalVariable": false}] dados_tokenizados: DatasetDict({ train: Dataset({ features: ['rotulo', 'rotulo_simples', 'text', 'label', 'input_ids', 'attention_mask'], num_rows: 4000 }) validation: Dataset({ features: ['rotulo', 'rotulo_simples', 'text', 'label', 'input_ids', 'attention_mask'], num_rows: 1000 }) test: Dataset({ features: ['rotulo', 'rotulo_simples', 'text', 'label', 'input_ids', 'attention_mask'], num_rows: 1000 }) }) /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages/transformers/tokenization_utils_base.py:1601: FutureWarning: `clean_up_tokenization_spaces` was not set. It will be set to `True` by default. This behavior will be depracted in transformers v4.45, and will be then set to `False` by default. For more details check this issue: https://github.com/huggingface/transformers/issues/31884 warnings.warn( Some weights of DistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert/distilbert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'pre_classifier.bias', 'pre_classifier.weight'] You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.