library(tidyverse) # ----- # read_captions_from_directory <- function(directory_path) { # ディレクトリ内の.txtファイルのリストを取得 txt_files <- list.files(directory_path, pattern = "\\.txt$", full.names = TRUE) # .txtファイルが存在しない場合、エラーメッセージを含むリストを返す if (length(txt_files) == 0) { return() } # 各.txtファイルからキャプションを読み込み data <- lapply(txt_files, function(file) { captions <- readLines(file, warn = FALSE) if (length(captions) == 0) { return() } captions_list <- strsplit(captions, ",")[[1]] captions_list <- trimws(captions_list) # 余分な空白を取り除く tibble( image_path = gsub(".txt$", ".png", file), caption_order = 1:length(captions_list), caption = captions_list ) }) # データフレームに変換 bind_rows(data) } get_caption_frequency <- function(data) { data %>% group_by(caption) %>% summarise(frequency = n()) %>% arrange(-frequency) } search_by_caption <- function(data, target_caption) { data %>% filter(caption == target_caption) %>% group_by(image_path) %>% distinct() } remove_caption_and_adjust_order <- function(data, target_image_path, target_caption) { # キャプションが存在するか確認 if (!any(data$image_path == target_image_path & data$caption == target_caption)) { cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path)) return(data) } # 削除するキャプションのcaption_orderを取得 removed_order <- data$caption_order[data$image_path == target_image_path & data$caption == target_caption] # キャプションを削除 data <- data %>% filter(!(image_path == target_image_path & caption == target_caption)) # caption_orderを調整 data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] <- data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] - 1 return(data) } remove_low_frequency_captions <- function(data, threshold) { # キャプションの頻度を取得 caption_freq <- get_caption_frequency(data) # 指定された頻度以下のキャプションのリストを作成 low_freq_captions <- caption_freq %>% filter(frequency <= threshold) %>% pull(caption) # 低頻度のキャプションを削除し、caption_orderを調整 for (caption in low_freq_captions) { unique_images <- unique(data$image_path[data$caption == caption]) for (image in unique_images) { data <- remove_caption_and_adjust_order(data, image, caption) } } return(data) } edit_captions_interactively <- function(data, target_caption) { # キャプションで画像を検索 image_paths <- search_by_caption(data, target_caption)$image_path for (path in image_paths) { # OSに応じて画像を開く if (Sys.info()["sysname"] == "Windows") { cmd <- sprintf('start "" "%s"', path) shell(cmd, intern = TRUE) } else if (Sys.info()["sysname"] == "Darwin") { # macOS cmd <- sprintf('open "%s"', path) system(cmd) } else { # Linux cmd <- sprintf('xdg-open "%s"', path) system(cmd) } # ユーザーにキャプションの削除を選択させる cat(sprintf("Do you want to remove the caption '%s' from image '%s'? (yes/no/end): ", target_caption, path)) response <- readline() if (tolower(response) == "end") { break } else if (tolower(response) == "yes") { data <- remove_caption_and_adjust_order(data, path, target_caption) } } return(data) } add_caption_at_order <- function(data, target_image_path, target_caption, target_order = NULL) { # 指定された画像の最大のcaption_orderを取得 max_order <- max(data$caption_order[data$image_path == target_image_path], na.rm = TRUE) # キャプションの重複チェック if (target_caption %in% data$caption[data$image_path == target_image_path]) { return(data) # 重複がある場合、データをそのまま返す } # target_orderが指定されていない場合、キャプションを表示してユーザーに選ばせる if (is.null(target_order)) { print_image_captions_as_csv(data, target_image_path) cat("Enter the position (order) to insert the new caption (1 to", max_order + 1, "): ") target_order <- as.numeric(readline()) # 不適切な値が入力された場合、最大のorder + 1で追加 if (target_order <= 0 || target_order > max_order + 1) { target_order <- max_order + 1 } } # 指定されたorder以降のcaption_orderを増加 data <- data %>% mutate(caption_order = ifelse(image_path == target_image_path & caption_order >= target_order, caption_order + 1, caption_order)) # 新しいキャプションを追加 new_caption <- tibble( image_path = target_image_path, caption_order = target_order, caption = target_caption ) data <- bind_rows(data, new_caption) return(data) } move_caption_order <- function(data, target_image_path, target_caption, new_order) { # キャプションが存在するか確認 if (!any(data$image_path == target_image_path & data$caption == target_caption)) { cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path)) return(data) } # キャプションを削除 data_after_removal <- remove_caption_and_adjust_order(data, target_image_path, target_caption) # 新しい位置にキャプションを追加 data_after_addition <- add_caption_at_order(data_after_removal, target_image_path, target_caption, new_order) return(data_after_addition) } # キャプションが存在するか確認 (使わないかも) is_caption_present <- function(data, target_image_path, target_caption) { return(any(data$image_path == target_image_path & data$caption == target_caption)) } # すべてのキャプションを表示 print_all_unique_captions_as_csv <- function(data) { # 重複なく全てのキャプションを取得 unique_captions <- unique(data$caption) # CSV形式で表示 cat(paste(unique_captions, collapse = ", "), "\n") } print_image_captions_as_csv <- function(data, target_image_path) { captions <- filter(data, image_path == target_image_path) %>% arrange(caption_order) %>% pull(caption) cat(paste(captions, collapse = ", "), "\n") } # 代表するキャプションに集約 remove_related_captions_except_representative <- function(data, related_captions, representative_caption, target_image_path) { # representative_captionがtarget_image_pathに紐づいているか確認 if (!any(data$image_path == target_image_path & data$caption == representative_caption)) { cat(sprintf("The representative caption '%s' is not associated with image '%s'.\n", representative_caption, target_image_path)) return(data) } # target_image_pathに関連するキャプションを削除 for (caption in related_captions) { if (caption != representative_caption) { data <- remove_caption_and_adjust_order(data, target_image_path, caption) } } return(data) }