from dataPipeline import DataPipeline from my_tokenize import Database from yeni_tokenize import TokenizerProcessor from transformers import BertTokenizer # Tokenizer'ı başlat tokenizer_name = 'bert-base-cased' pipeline = DataPipeline(tokenizer_name=tokenizer_name, max_length=100) # MongoDB'den input metinlerini çek input_texts = [doc["Prompt"] for doc in Database.get_input_texts()] # Metinleri tokenize et tokenized_texts = pipeline.tokenize_texts(input_texts) print("Tokenized Texts:") for text, tokens in zip(input_texts, tokenized_texts): print(f"Original Text: {text}") print(f"Tokenized Text: {tokens}") # Metinleri encode et encoded_texts = pipeline.encode_texts(input_texts) print("Encoded Texts:") for text, encoded in zip(input_texts, encoded_texts): print(f"Original Text: {text}") print(f"Encoded Text: {encoded['input_ids'].squeeze().tolist()}")