# Visual semantic with BERT-CNN This model can be used to assign an object-to-caption relatedness score, which is valuable for (1) caption diverse re-ranking, and (2) generate soft labels for caption filtering when scraping image captions from the internet. For quick start please have a look this [colab](https://colab.research.google.com/drive/1N0JVa6y8FKGLLSpiG7hd_W75UYhHRe2j?usp=sharing) For the [dataset](https://huggingface.co/datasets/AhmedSSabir/Textual-Image-Caption-Dataset) ``` conda create -n BERT_visual python=3.6 anaconda conda activate BERT_visual pip install tensorflow==1.15.0 pip install --upgrade tensorflow_hub==0.7.0 ``` ``` git clone https://github.com/gaphex/bert_experimental/ ``` ```python import tensorflow as tf import numpy as np import pandas as pd import sys from sklearn.model_selection import train_test_split sys.path.insert(0, "bert_experimental") from bert_experimental.finetuning.text_preprocessing import build_preprocessor from bert_experimental.finetuning.graph_ops import load_graph df = pd.read_csv("test.tsv", sep='\t') texts = [] delimiter = " ||| " for vis, cap in zip(df.visual.tolist(), df.caption.tolist()): texts.append(delimiter.join((str(vis), str(cap)))) texts = np.array(texts) trX, tsX = train_test_split(texts, shuffle=False, test_size=0.01) restored_graph = load_graph("frozen_graph.pb") graph_ops = restored_graph.get_operations() input_op, output_op = graph_ops[0].name, graph_ops[-1].name print(input_op, output_op) x = restored_graph.get_tensor_by_name(input_op + ':0') y = restored_graph.get_tensor_by_name(output_op + ':0') preprocessor = build_preprocessor("vocab.txt", 64) py_func = tf.numpy_function(preprocessor, [x], [tf.int32, tf.int32, tf.int32], name='preprocessor') py_func = tf.numpy_function(preprocessor, [x], [tf.int32, tf.int32, tf.int32]) ##predictions sess = tf.Session(graph=restored_graph) print(trX[:4]) y = tf.print(y, summarize=-1) y_out = sess.run(y, feed_dict={ x: trX[:4].reshape((-1,1)) }) print(y_out) ````