LMM_sewerML / app.py
rtaormina's picture
Add reference
f8d3973 verified
import pandas as pd
import gradio as gr
import numpy as np
df_table_dsc = pd.read_csv('./results/df_table_dsc.csv')
df_table_prd = pd.read_csv('./results/df_table_prd.csv')
n_lines = 12
defect_dict = {'NoDefect':'No defect', 'RO': 'Defect: Roots', 'OB': 'Defect: Surface damage',
'RB': 'Defect: Cracks, breaks, and collapses', 'PF': 'Defect: Production error',
'DE':'Defect: Deformation'}
css="""
#image-out {
height: 450px;
width: 450px;
}
"""
def conv_int(txt):
try:
return (int(txt))
except:
return 'NaN'
def return_prev(index):
# Update current index based on navigation input
index -=1
# Ensure index is within bounds
index = max(0, min(index, len(df_table_dsc) - 1))
image_path, gpt4, gpt4b, cogvlm, llava, defect_class, predictions, filename = return_image(index)
return index, index, image_path, gpt4, gpt4b, cogvlm, llava, defect_class, predictions, filename
def return_next(index):
# Update current index based on navigation input
index +=1
# Ensure index is within bounds
index = max(0, min(index, len(df_table_dsc) - 1))
image_path, gpt4, gpt4b, cogvlm, llava, defect_class, predictions, filename = return_image(index)
return index, index, image_path, gpt4, gpt4b, cogvlm, llava, defect_class, predictions, filename
def return_rand(index):
# Update current index based on navigation input
index = np.random.randint(0, len(df_table_dsc))
image_path, gpt4, gpt4b, cogvlm, llava, defect_class, predictions, filename = return_image(index)
return index, index, image_path, gpt4, gpt4b, cogvlm, llava, defect_class, predictions, filename
def return_image(index):
# Fetch row from DataFrame
row = df_table_dsc.iloc[index]
image_path = f"./images/{row['img_id']}"
defect_class = defect_dict[row['defect_type']]
row_prd = df_table_prd.iloc[index]
predictions = f"XIE:{conv_int(row_prd['Xie']>0.5)}, GPT4-V:{conv_int(row_prd['GPT4'])}, GPT4-Vs:{conv_int(row_prd['GPT4_basic'])}, CogVLM:{conv_int(row_prd['CogVLM'])}, LLaVA:{conv_int(row_prd['LLaVA'])}"
# Return iamge path and descriptions
return image_path, row['GPT4'], row['GPT4_basic'], row['CogVLM'], row['LLaVA'], defect_class, predictions, row['img_id']
with gr.Blocks(css=css) as demo:
# gr.Markdown("""
## Demo for 'The Potential of Generative AI for the Urban Water Sector'
### Riccardo Taormina, Delft University of Technology (TU Delft), Department of Water Management
### email: r.taormina@tudelft.nl""")
gr.Markdown("""
## Testing Large Multimodal Models for Sewer Defect Inspection
Press on \<Next\>, \<Previous\> or \<Random\> to start!
""")
index = gr.Number(value=-1, visible=False)
with gr.Row():
with gr.Column(scale=1):
img_out = gr.Image(type="filepath", label="Image", elem_id="image-out")
with gr.Row():
txt_item = gr.Textbox(label="Sample no.", min_width= 20)
txt_filename = gr.Textbox(label="SewerML file", min_width= 20)
txt_defect_class = gr.Textbox(label="Defect class", interactive=False, min_width= 150)
txt_xie_pred = gr.Textbox(label="Predictions (XIE = benchmark*, 0 = No Defect, 1 = Defect)", min_width= 300, interactive=False)
with gr.Row():
prev_btn = gr.Button("Previous")
next_btn = gr.Button("Next")
rand_btn = gr.Button("Random")
with gr.Row():
gr.Markdown('*Xie, Qian, et al. "Automatic detection and classification of sewer defects via hierarchical deep learning." IEEE Transactions on Automation Science and Engineering 16.4 (2019): 1836-1847.')
with gr.Column(scale=1):
gr.Markdown('Multimodal descriptions')
with gr.Row():
txt_out_GPT4 = gr.Textbox(label="GPT4-V", lines= n_lines, max_lines=n_lines)
txt_out_GPT4s = gr.Textbox(label="GPT4-V simple", lines= n_lines, max_lines=n_lines)
with gr.Row():
txt_out_CogVLM = gr.Textbox(label="CogVLM", lines= n_lines, max_lines=n_lines)
txt_out_LLaVa = gr.Textbox(label="LLaVa", lines= n_lines, max_lines=n_lines)
with gr.Row():
with gr.Accordion("Basic prompt used for GPT4-V", open=False):
gr.Markdown("""
You are a virtual sewer technician with the capability to analyze images from CCTV cameras taken inside sewer pipes.
Your task is to examine each image and provide a concise, yet accurate, summary for retrieval.
After summarizing, you must classify the image as DEFECTIVE or NON DEFECTIVE.
You will always try to describe the image that you see.
You must provide your output in JSON format with DESCRIPTION: <your description of the image>, PREDICTION: <your prediction, either DEFECTIVE or NON DEFECTIVE>"""
)
with gr.Row():
with gr.Accordion("Prompt used for GPT4-V, CogVLM and LLaVA", open=False):
gr.Markdown("""
You are a virtual sewer technician with the capability to analyze images from CCTV cameras taken inside sewer pipes.
Your task is to examine each image and provide a concise, yet accurate, summary for retrieval.
After summarizing, you must classify the image as DEFECTIVE or NON DEFECTIVE.
While providing the summary, remember the following guidelines:"
1) Provide a general overview of the image that you see, describing important elements such image clarity, lighting conditions, type of pipe (concrete, PVC, ...), presence of water.
2) Check for defects in the sewer pipes in the image.
3) Pipes in good condition usually show a smooth, unbroken surface, no visible signs of damage like cracks or collapses, and an absence of blockages such as roots.
4) On the other hand, you can have the following defects:
4a)Cracks, Breaks, and Collapses: Identify visible cracks along the pipe, instances where the pipe has fractured or completely broken apart, and areas where the pipe has collapsed.
This includes longitudinal cracks, circumferential breaks, and complete structural failures that compromise the integrity of the sewer system.
4b)Surface Damage: Detect areas of the pipe's interior that exhibit signs of wear, erosion, or damage on the surface.
This includes minor scratches, pitting, scaling, or any form of deterioration that affects the pipe's surface but does not necessarily penetrate deeply into the structure.
4c) Production Error: Identify defects that originated during the pipe's manufacturing process, such as inconsistent pipe thickness, improper joint alignment, or material imperfections.
These are flaws that were introduced before installation and could potentially affect the pipe's performance or longevity.
4d) Deformations: Recognize any alterations in the shape of the pipe, such as bending, sagging, or bulging, that indicate a deformation.
This includes both minor deformations that may affect flow efficiency and major deformations that threaten the pipe's structural integrity.
4e) Roots: Detect the presence of roots infiltrating the sewer pipe, whether through joints, cracks, or other vulnerabilities.
This involves identifying both the initial stages of root intrusion and the more advanced stages where roots have significantly obstructed the pipe.
5) Additional considerations while analyzing the images: do not consider blurred text or user-defined circled areas in the images.
6) You will always try to describe the image that you see.
You must provide your output in JSON format with DESCRIPTION: <your description of the image>, PREDICTION: <your prediction, either DEFECTIVE or NON DEFECTIVE>"""
)
prev_btn.click(fn=return_prev, inputs=index, outputs=[index, txt_item, img_out,
txt_out_GPT4, txt_out_GPT4s,
txt_out_CogVLM, txt_out_LLaVa, txt_defect_class, txt_xie_pred, txt_filename])
next_btn.click(fn=return_next, inputs=index, outputs=[index, txt_item, img_out,
txt_out_GPT4, txt_out_GPT4s,
txt_out_CogVLM, txt_out_LLaVa, txt_defect_class, txt_xie_pred, txt_filename])
rand_btn.click(fn=return_rand, inputs=index, outputs=[index, txt_item, img_out,
txt_out_GPT4, txt_out_GPT4s,
txt_out_CogVLM, txt_out_LLaVa, txt_defect_class, txt_xie_pred, txt_filename])
if __name__ == "__main__":
demo.launch()