SDtageditoR / script.R
cella110n's picture
ver 1.1
f8dfd01
library(tidyverse)
# ----- #
read_captions_from_directory <- function(directory_path) {
# ディレクトリ内の.txtファイルのリストを取得
txt_files <- list.files(directory_path, pattern = "\\.txt$", full.names = TRUE)
# .txtファイルが存在しない場合、エラーメッセージを含むリストを返す
if (length(txt_files) == 0) {
return()
}
# 各.txtファイルからキャプションを読み込み
data <- lapply(txt_files, function(file) {
captions <- readLines(file, warn = FALSE)
if (length(captions) == 0) {
return()
}
captions_list <- strsplit(captions, ",")[[1]]
captions_list <- trimws(captions_list) # 余分な空白を取り除く
tibble(
image_path = gsub(".txt$", ".png", file),
caption_order = 1:length(captions_list),
caption = captions_list
)
})
# データフレームに変換
bind_rows(data)
}
get_caption_frequency <- function(data) {
data %>%
group_by(caption) %>%
summarise(frequency = n()) %>%
arrange(-frequency)
}
search_by_caption <- function(data, target_caption) {
data %>%
filter(caption == target_caption) %>%
group_by(image_path) %>%
distinct()
}
remove_caption_and_adjust_order <- function(data, target_image_path, target_caption) {
# キャプションが存在するか確認
if (!any(data$image_path == target_image_path & data$caption == target_caption)) {
cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path))
return(data)
}
# 削除するキャプションのcaption_orderを取得
removed_order <- data$caption_order[data$image_path == target_image_path & data$caption == target_caption]
# キャプションを削除
data <- data %>% filter(!(image_path == target_image_path & caption == target_caption))
# caption_orderを調整
data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] <- data$caption_order[data$image_path == target_image_path & data$caption_order > removed_order] - 1
return(data)
}
remove_low_frequency_captions <- function(data, threshold) {
# キャプションの頻度を取得
caption_freq <- get_caption_frequency(data)
# 指定された頻度以下のキャプションのリストを作成
low_freq_captions <- caption_freq %>%
filter(frequency <= threshold) %>%
pull(caption)
# 低頻度のキャプションを削除し、caption_orderを調整
for (caption in low_freq_captions) {
unique_images <- unique(data$image_path[data$caption == caption])
for (image in unique_images) {
data <- remove_caption_and_adjust_order(data, image, caption)
}
}
return(data)
}
edit_captions_interactively <- function(data, target_caption) {
# キャプションで画像を検索
image_paths <- search_by_caption(data, target_caption)$image_path
for (path in image_paths) {
# OSに応じて画像を開く
if (Sys.info()["sysname"] == "Windows") {
cmd <- sprintf('start "" "%s"', path)
shell(cmd, intern = TRUE)
} else if (Sys.info()["sysname"] == "Darwin") { # macOS
cmd <- sprintf('open "%s"', path)
system(cmd)
} else { # Linux
cmd <- sprintf('xdg-open "%s"', path)
system(cmd)
}
# ユーザーにキャプションの削除を選択させる
cat(sprintf("Do you want to remove the caption '%s' from image '%s'? (yes/no/end): ", target_caption, path))
response <- readline()
if (tolower(response) == "end") {
break
} else if (tolower(response) == "yes") {
data <- remove_caption_and_adjust_order(data, path, target_caption)
}
}
return(data)
}
add_caption_at_order <- function(data, target_image_path, target_caption, target_order = NULL) {
# 指定された画像の最大のcaption_orderを取得
max_order <- max(data$caption_order[data$image_path == target_image_path], na.rm = TRUE)
# キャプションの重複チェック
if (target_caption %in% data$caption[data$image_path == target_image_path]) {
return(data) # 重複がある場合、データをそのまま返す
}
# target_orderが指定されていない場合、キャプションを表示してユーザーに選ばせる
if (is.null(target_order)) {
print_image_captions_as_csv(data, target_image_path)
cat("Enter the position (order) to insert the new caption (1 to", max_order + 1, "): ")
target_order <- as.numeric(readline())
# 不適切な値が入力された場合、最大のorder + 1で追加
if (target_order <= 0 || target_order > max_order + 1) {
target_order <- max_order + 1
}
}
# 指定されたorder以降のcaption_orderを増加
data <- data %>%
mutate(caption_order = ifelse(image_path == target_image_path & caption_order >= target_order, caption_order + 1, caption_order))
# 新しいキャプションを追加
new_caption <- tibble(
image_path = target_image_path,
caption_order = target_order,
caption = target_caption
)
data <- bind_rows(data, new_caption)
return(data)
}
move_caption_order <- function(data, target_image_path, target_caption, new_order) {
# キャプションが存在するか確認
if (!any(data$image_path == target_image_path & data$caption == target_caption)) {
cat(sprintf("The caption '%s' does not exist for image '%s'.\n", target_caption, target_image_path))
return(data)
}
# キャプションを削除
data_after_removal <- remove_caption_and_adjust_order(data, target_image_path, target_caption)
# 新しい位置にキャプションを追加
data_after_addition <- add_caption_at_order(data_after_removal, target_image_path, target_caption, new_order)
return(data_after_addition)
}
# キャプションが存在するか確認 (使わないかも)
is_caption_present <- function(data, target_image_path, target_caption) {
return(any(data$image_path == target_image_path & data$caption == target_caption))
}
# すべてのキャプションを表示
print_all_unique_captions_as_csv <- function(data) {
# 重複なく全てのキャプションを取得
unique_captions <- unique(data$caption)
# CSV形式で表示
cat(paste(unique_captions, collapse = ", "), "\n")
}
print_image_captions_as_csv <- function(data, target_image_path) {
captions <- filter(data, image_path == target_image_path) %>%
arrange(caption_order) %>%
pull(caption)
cat(paste(captions, collapse = ", "), "\n")
}
# 代表するキャプションに集約
remove_related_captions_except_representative <- function(data, related_captions, representative_caption, target_image_path) {
# representative_captionがtarget_image_pathに紐づいているか確認
if (!any(data$image_path == target_image_path & data$caption == representative_caption)) {
cat(sprintf("The representative caption '%s' is not associated with image '%s'.\n", representative_caption, target_image_path))
return(data)
}
# target_image_pathに関連するキャプションを削除
for (caption in related_captions) {
if (caption != representative_caption) {
data <- remove_caption_and_adjust_order(data, target_image_path, caption)
}
}
return(data)
}