Spaces:

achterbrain
/

Intel-Generative-Image-Dashboard

Runtime error

App Files Files Community

achterbrain commited on Dec 7, 2022

Commit

748cc87

1 Parent(s): 1914d22

Transferred files from streamlit cloud repo

Browse files

Files changed (9) hide show

Dashboard.py +156 -0
Data/Prompt_dir_221128.csv +62 -0
Graphics/IL_Logo.png +0 -0
pages/1_⚙️Manual assessment.py +142 -0
pages/2_🤖Automated assessment.py +101 -0
pages/3_📊Assessment summary.py +160 -0
pages/Functions/Assessment_functions.py +208 -0
pages/Functions/coco-labels-paper.txt +91 -0
requirements.txt +61 -0

Dashboard.py ADDED Viewed

	@@ -0,0 +1,156 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+@st.cache
+def prompt_to_csv(df):
+    # IMPORTANT: Cache the conversion to prevent computation on every rerun
+    df_download = df
+    df_download['Filename']='p'+df_download['ID'].astype('str')+'_1.png'
+    df_download = df[['Prompt','Filename']].drop_duplicates(subset='Filename')
+    return df_download.to_csv().encode('utf-8')
+# Setup
+## Load prompt directory
+prompt_dir = pd.read_csv('Data/Prompt_dir_221128.csv') #second version of prompt_dir
+st.session_state['prompt_dir'] = prompt_dir
+## Create lists of prompts for manual and automated assessments
+st.session_state['automated_tasks'] = ['Multiple object types', 'Single object','Negation']
+automated_prompts = prompt_dir.loc[
+    (prompt_dir['Auto_assessment']==True)&
+    (prompt_dir['Task']).isin(st.session_state['automated_tasks'])].ID.tolist()
+manual_prompts = prompt_dir.ID.tolist()
+# Page
+st.title('Generative Image Benchmark')
+st.write('This is an evaluation platform to assess the performance of image generation algorithms developed by Intel Labs. This is the alpha version of the platform.')
+st.subheader('User guide')
+st.write('To assess a generative image algorithm, download a set of prompts using the prompt downloader below. Generate one image per prompt and use the file names provided to name your images. Upload these generated images in the data upload section below. The pages for manual assessment and automated assessment allow you to systematically assess the generated images. The results will be presented and ready for download on the assessment summary page.')
+st.sidebar.image('Graphics/IL_Logo.png')
+# Add prompt downloading functions
+prompt_download_dict = {}
+## Count how many prompts are in database to allow for max value in selection
+prompt_task_count = prompt_dir.Task.value_counts(sort=False)
+prompt_task_count = prompt_task_count.drop(index='Single object')
+prompt_task_select = prompt_task_count.copy()
+## Hide downloader in box
+with st.expander("Prompt downloader"):
+    st.write('Select the number of prompts you want to download for each task category. The set of prompts will automatically also include all single objects appearing in the selected prompts.')
+    # Create numerical selector for every task in prompt directory
+    for i_task in prompt_task_select.index:
+        prompt_task_select[i_task] = st.number_input(
+            i_task,
+            value = prompt_task_count[i_task],
+            max_value=prompt_task_count[i_task],
+            min_value=0,
+            step = 1)
+    # Create df with selected number of prompts per task
+    for i_task in prompt_task_select.index:
+        temp_df = prompt_dir.loc[prompt_dir['Task']==i_task][0:prompt_task_select[i_task]]
+        if len(temp_df)>0:
+            prompt_download_dict[i_task]=temp_df
+    # Concat all tasks to dataframe
+    prompt_download = pd.concat(prompt_download_dict.values())
+    # Add relevant single object prompts
+    single_object_ids = prompt_download.Linked_prompts.str.split(',').explode().unique().astype('int')
+    prompt_download = pd.concat([
+       prompt_download,
+       prompt_dir.loc[prompt_dir['ID'].isin(single_object_ids)]
+    ])
+    # Add download button for prompts
+    st.download_button(
+        label="Download prompts",
+        data=prompt_to_csv(prompt_download),
+        file_name='prompt_list.csv',
+        mime='text/csv',
+    )
+# Generate empty dataset for results, if it does not exist yet
+try:
+    num_uploaded_images = st.session_state['eval_df'].shape[0]
+except KeyError:
+    st.session_state['eval_df'] = pd.DataFrame(
+        columns=['File_name','Prompt_no','automated_eval','manual_eval','manual_eval_completed','manual_eval_task_score'])
+    st.session_state['uploaded_img'] = []
+# Create dic for automated asssssment if it does not excist yet
+try:
+    test_dict = st.session_state['results_dict']
+except KeyError:
+    st.session_state['results_dict'] = {}
+# Data upload setup
+st.subheader('Data upload')
+#uploaded_files = st.file_uploader('Upload generated images', accept_multiple_files=True)
+with st.form("my-form", clear_on_submit=True):
+        uploaded_files = st.file_uploader('Select images for upload', accept_multiple_files=True)
+        man_assessment_share = st.selectbox(
+            'Select share of uploaded images to be used for manual assessment.',
+            ('100%', '50%'))
+        submitted = st.form_submit_button("Add images")
+        st.session_state['uploaded_img'] = st.session_state['uploaded_img']+uploaded_files
+# Add new uploaded images to session state
+## Try to append it to pre-existing list, else create new list in session state
+## Always reset uploaded files to empty list after they have been added to state
+if len(uploaded_files) != 0:
+    try:
+        # Extract prompts of uploaded files
+        file_names = [x.name for x in uploaded_files]
+        files_prompts = [x.split('_')[0][1:] for x in file_names]
+        # Create manual evaluation df
+        df_dict = {'File_name':file_names, 'Prompt_no':files_prompts}
+        eval_df = pd.DataFrame(df_dict)
+        eval_df['automated_eval'] = eval_df['Prompt_no'].astype('int').isin(automated_prompts)
+        eval_df['manual_eval'] = eval_df['Prompt_no'].astype('int').isin(manual_prompts)
+        eval_df['manual_eval_completed'] = False
+        eval_df['manual_eval_task_score'] = np.nan
+        # Exclude given percentage of uploaded images from manual assessment; with random selection
+        if man_assessment_share == '50%':
+            reassign_number = int(len(eval_df)/2)
+            manual_eval_reassign = eval_df['manual_eval']
+            random_image_indices = np.random.choice(len(manual_eval_reassign),reassign_number, replace=False)
+            manual_eval_reassign.iloc[random_image_indices]=False
+            eval_df['manual_eval'] = manual_eval_reassign
+        # Join new uploaded df with existing df
+        joint_eval_df = pd.concat([st.session_state['eval_df'], eval_df], ignore_index=True)
+        # Add task name to eval_df
+        Prompt_no_task_dict = dict(zip(prompt_dir.ID.astype('str').to_list(),prompt_dir.Task.to_list()))
+        joint_eval_df['Task'] = joint_eval_df.Prompt_no.map(Prompt_no_task_dict)
+        # Save eval_df to session state
+        st.session_state['eval_df'] = joint_eval_df
+    except KeyError:
+        st.session_state['uploaded_img'] = uploaded_files
+eval_df = st.session_state['eval_df']
+if eval_df.shape[0]!=0:
+    # Print current state of uploaded data
+    st.write("{0} images uploaded. Reload the page to reset the image upload.".format(str(eval_df.shape[0])))
+    st.write("- Available for manual assessment: ", str(sum(eval_df.manual_eval)))
+    manual_eval_available = sum(eval_df.manual_eval)
+    st.write("- Available for automated assessment: ", str(sum(eval_df.automated_eval)))
+else:
+    st.write("Upload files to start the assessment.")
+#st.write(eval_df)
+#st.write(prompt_dir)
+#st.session_state['eval_df']

Data/Prompt_dir_221128.csv ADDED Viewed

	@@ -0,0 +1,62 @@

+ID,Task,Prompt,Representations,Task_specific_label,Auto_assessment,Linked_prompts
+1,Directed Actions,Girl pointing at a dog,"girl, dog",pointing at,FALSE,"29,30"
+2,Directed Actions,Monkey hitting a can,"monkey, can",hitting,FALSE,"31,32"
+3,Directed Actions,Man petting a cat,"man, cat",petting,FALSE,"33,34"
+4,Negation,A vase with no flowers in it,vase,flower,TRUE,35
+5,Negation,A street without a car,street,car,TRUE,36
+6,Negation,Man without a beard,man,beard,FALSE,33
+7,Spatial relationships,A book on top of a chair,"book, chair",on top,FALSE,"37,38"
+8,Spatial relationships,Car behind a bus,"car, bus",behind,FALSE,"39,61"
+9,Spatial relationships,Cup in front of a vase,"cup, vase",in front,FALSE,"40,35"
+10,Numbers (multiple objects),Five apples,apple,5,TRUE,41
+11,Numbers (multiple objects),Eight cars,car,8,TRUE,39
+12,Numbers (multiple objects),Three cups,cup ,3,TRUE,40
+13,Basic Arithmetic,Two apples and twice as many oranges,"apple, orange","2,4",TRUE,"41,42"
+14,Basic Arithmetic,One cat and three times as a many dogs,"cat, dog","1,3",TRUE,"34,30"
+15,Basic Arithmetic,Five spoons and two less knifes,"spoon, knife","5,3",TRUE,"43,44"
+16,Multiple object types,"A cow, an apple and a cat","cow, apple, cat",NA,TRUE,"45,41,34"
+17,Multiple object types,"A cup, a knife, a plate and a hat","cup, knife, plate, hat",NA,TRUE,"40,44,46,47"
+18,Multiple object types,"A balloon, a lion, a cow, a monkey and a surfboard","balloon, lion, cow, monkey, surfboard",NA,TRUE,"48,49,45,51"
+19,Chimeras,A cow with the tail of a snake,"cow, snake",NA,FALSE,"45,52"
+20,Chimeras,A fish with the wings of a duck,"fish, duck",NA,FALSE,"53,54"
+21,Chimeras,A lion with the head of a goat,"lion, goat",NA,FALSE,"49,55"
+22, Conditional generation,A green dinosaur or a red cup. Only render green things.,dinosaur,cup,TRUE,56
+23, Conditional generation,A snail or a horse. Only render mammals.,horse,snail,TRUE,57
+24, Conditional generation,A girl with a dog or a boy with a cat. Only show females.,"girl, dog","boy, cat",FALSE,"25,29,30"
+25,Multiple object types,A girl with a dog,"girl, dog",NA,TRUE,"29,30"
+26,Text rendering,"A book titled ""Probabilistic Models""",book ,Probabilistic Models,TRUE,37
+27,Text rendering,"A bar sign with the name ""Wish you were beer""",bar sign,Wish you were beer,TRUE,59
+28,Text rendering,"A shirt that says ""Sorry I am late. I did not want to come.""",shirt,Sorry I am late. I did not want to come.,TRUE,60
+29,Single object,A girl,girl,NA,TRUE,
+30,Single object,A dog,dog,NA,TRUE,
+31,Single object,A monkey,monkey,NA,TRUE,
+32,Single object,A can,can,NA,TRUE,
+33,Single object,A man,man,NA,TRUE,
+34,Single object,A cat,cat,NA,TRUE,
+35,Single object,A vase,vase,NA,TRUE,
+36,Single object,A street,street,NA,TRUE,
+37,Single object,A book,book,NA,TRUE,
+38,Single object,A chair,chair,NA,TRUE,
+39,Single object,A car,car,NA,TRUE,
+40,Single object,A cup,cup,NA,TRUE,
+41,Single object,An apple,apple,NA,TRUE,
+42,Single object,An orange,orange,NA,TRUE,
+43,Single object,A spoon,spoon,NA,TRUE,
+44,Single object,A knife,knife,NA,TRUE,
+45,Single object,A cow,cow,NA,TRUE,
+46,Single object,A plate,plate,NA,TRUE,
+47,Single object,A hat,hat,NA,TRUE,
+48,Single object,A balloon,balloon,NA,TRUE,
+49,Single object,A lion,lion,NA,TRUE,
+50,Single object,A monkey,monkey,NA,TRUE,
+51,Single object,A surfboard,surfboard,NA,TRUE,
+52,Single object,A snake,snake,NA,TRUE,
+53,Single object,A fish,fish,NA,TRUE,
+54,Single object,A duck,duck,NA,TRUE,
+55,Single object,A goat,goat,NA,TRUE,
+56,Single object,A dinosaur,dinosaur,NA,TRUE,
+57,Single object,A horse,horse,NA,TRUE,
+58,Single object,A girl,girl,NA,TRUE,
+59,Single object,A bar sign,bar sign,NA,TRUE,
+60,Single object,A shirt,shirt,NA,TRUE,
+61,Single object,A bus,bus,NA,TRUE,

Graphics/IL_Logo.png ADDED Viewed

pages/1_⚙️Manual assessment.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import streamlit as st
+import numpy as np
+import pandas as pd
+from PIL import Image
+st.title('Manual assessment')
+st.write('On this page you can rate all uploaded images with regards to how good they match their respective prompts. You can see the outcome of your assessment on the summary page.')
+st.write(' ')
+side_image = Image.open('Graphics/IL_Logo.png')
+st.sidebar.image(side_image)
+# Create placeholders for key elements
+assessment_progress = st.empty()
+# Extract how many images are available for manual assessment in entire uploaded dataset
+## Set to zero if the dataset has not been created yet due to starting the app on an assessment page
+manual_eval_available = 0
+try:
+    curr_eval_df = st.session_state['eval_df']
+    curr_eval_df['Picture_index']=curr_eval_df.index.values
+    curr_manual_eval = curr_eval_df.loc[(curr_eval_df['manual_eval']==True)&(curr_eval_df['manual_eval_completed']==False)]
+    manual_eval_available = len(curr_manual_eval)
+    curr_prompt_dir = st.session_state['prompt_dir']
+except KeyError:
+    manual_eval_available = 0
+    st.session_state['uploaded_img'] = [] #safety if program is started on manual assesssment page and not desktop
+# Main rating loop
+## If images are available for rating this creates a from to submit ratings to database
+## If subprompt option is selected, it expands the form to include these as well
+## If no images are available it prints situation specific instructions
+if manual_eval_available > 0:
+    # Let user choose whether subprompts should be presented
+    include_subprompts = st.checkbox('Show related subprompts if available (uploaded subprompts may not be shown if images have been assessed already).', value=True)
+    # Update the progress statement
+    assessment_progress.write('{0} images ready / left for assessment.'.format(manual_eval_available))
+    # Extract first example for manual assessment which is not rated yet (first meaning the lowest index, for lowest prompt number)
+    ## Also extract relevant metadata of this example
+    curr_eval_df = st.session_state['eval_df']
+    lowest_prompt_no = curr_eval_df.loc[(curr_eval_df['manual_eval']==True)&(curr_eval_df['manual_eval_completed']==False)].Prompt_no.astype('int').min()
+    curr_picture_index = curr_eval_df.loc[
+        (curr_eval_df['manual_eval']==True)&
+        (curr_eval_df['manual_eval_completed']==False)&
+        (curr_eval_df['Prompt_no']==str(lowest_prompt_no))].Picture_index.min()
+    curr_manual_eval_row = curr_eval_df.iloc[[curr_picture_index]]
+    curr_prompt_ID = int(curr_manual_eval_row.Prompt_no.item())
+    curr_prompt_row =st.session_state['prompt_dir'].loc[st.session_state['prompt_dir']['ID']==curr_prompt_ID]
+    # Extract information about linked subprompts
+    curr_linked_prompts = curr_prompt_row.Linked_prompts.item()
+    # Set it to nan if the user chose to hide subprompts in evaluation
+    if include_subprompts == False:
+        curr_linked_prompts = float('nan')
+    # Split the subprompt string to get actual list of subprompt IDs
+    if pd.notna(curr_linked_prompts):
+        curr_linked_prompts = curr_linked_prompts.split(',')
+    # Create form to collect assessment
+    ## First create main prompt inputs, then render subprompts if  subprompt list found
+    ## The submit button writes assessment to database
+    form_loc = st.empty()
+    with form_loc.form("multi_form",clear_on_submit=True):
+        # Write main prompt
+        st.write('Prompt: {0}'.format(
+            curr_prompt_dir.loc[curr_prompt_dir['ID']==int(curr_manual_eval_row.Prompt_no.item())]['Prompt'].item()
+        ))
+        # Exclude prompt from rating if user chooses to
+        include_prompt = st.checkbox('Include this prompt in assessment summary', value=True)
+        # Show image of current prompt and rating
+        st.image(st.session_state['uploaded_img'][curr_manual_eval_row.Picture_index.item()],width=350)
+        curr_manual_eval_row['manual_eval_task_score'] = st.radio(
+                "Does the image match the prompt?",('Yes', 'No'), horizontal=True, key='base')
+        st.write(' ') # Create whitespace
+        st.write(' ') # Create whitespace
+        # If there are linked prompts, create df with info
+        # Else create emtpy df which will automatically skip the rating creation for these prompts
+        # Here we do not test for (curr_eval_df['manual_eval']==True) as the curr_linked_prompts is already testing for valid prompt number and we want to ignore the exclusion for subprompts
+        if type(curr_linked_prompts)==list:
+            curr_linked_rows = curr_eval_df.loc[
+                (curr_eval_df['manual_eval_completed']==False)&
+                (curr_eval_df['Prompt_no'].isin(curr_linked_prompts))]
+            curr_linked_rows = curr_linked_rows.groupby('Prompt_no').first()
+        else:
+            curr_linked_rows = pd.DataFrame()
+        # Create rating for subprompts if a df for subprompt info was created
+        for row in curr_linked_rows.itertuples():
+            # Prompt
+            st.write('Prompt: {0}'.format(
+                curr_prompt_dir.loc[curr_prompt_dir['ID']==int(row.Index)]['Prompt'].item()
+            ))
+            # Image
+            st.image(st.session_state['uploaded_img'][row.Picture_index],width=350)
+            # Rating
+            curr_linked_rows.loc[curr_linked_rows['Picture_index']==row.Picture_index,'manual_eval_task_score'] = st.radio(
+                "Does the image match the prompt?",('Yes', 'No'), horizontal=True, key=row.Picture_index)
+            st.write(' ')
+            st.write(' ')
+        # Submit assessments to database
+        submitted = st.form_submit_button("Submit")
+        if submitted:
+            # First add main prompt assessment
+            st.session_state['eval_df'].loc[
+                curr_picture_index,'manual_eval']=include_prompt
+            st.session_state['eval_df'].loc[
+                curr_picture_index,'manual_eval_completed']=True
+            st.session_state['eval_df'].loc[
+                curr_picture_index,'manual_eval_task_score']=curr_manual_eval_row['manual_eval_task_score'].item()
+            # Add subprompt assessment if dataset was created for subprompts
+            # This stage will automatically be skipped if the df for linked prompts is empty
+            for row in curr_linked_rows.itertuples():
+                st.session_state['eval_df'].loc[
+                    row.Picture_index,'manual_eval']=include_prompt
+                st.session_state['eval_df'].loc[
+                    row.Picture_index,'manual_eval_completed']=True
+                st.session_state['eval_df'].loc[
+                    row.Picture_index,'manual_eval_task_score']=row.manual_eval_task_score
+            # Reset page after ratings were submitted
+            st.experimental_rerun()
+# If no files are uploaded
+elif len(st.session_state['uploaded_img'])==0:
+    assessment_progress.write('Upload files on dashboard starting page to start manual assessment.')
+# If files are uploaded but all ratings are completed
+else:
+    assessment_progress.write('You finished assessing the current batch of uploaded images. Upload more pictures of generate your results on the summary page.')
+#st.session_state['eval_df'].loc[curr_manual_eval,'manual_eval_completed']=True
+#st.write(st.session_state['eval_df'])

pages/2_🤖Automated assessment.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import streamlit as st
+import numpy as np
+from itertools import compress
+from PIL import Image
+#from pages.Functions.Assessment_functions import Empty_DSwrapper
+@st.cache
+def Empty_DSwrapper(img, representations = None, Task_specific_label = None):
+    '''
+    Dashboard wrapper of DETR_multi_object_counting
+    '''
+    img_input = img
+    return True
+# Create dictionary to hold functions
+fun_dict = {
+    'Multiple object types':Empty_DSwrapper,
+    'Single object':Empty_DSwrapper,
+    'Negation':Empty_DSwrapper}
+st.title('Automated Assessment')
+st.write('On this page you can use automated assessment algorithms to assess how good uploaded images match their respective prompts.')
+st.write(' ')
+side_image = Image.open('Graphics/IL_Logo.png')
+st.sidebar.image(side_image)
+try:
+    # Create necessary variables
+    prompt_dir = st.session_state['prompt_dir']
+    curr_eval_df = st.session_state['eval_df']
+    curr_eval_df['Picture_index']=curr_eval_df.index.values
+    # Assess how many images are available for automatic assessment
+    automated_eval_available = sum(curr_eval_df['automated_eval'])
+    # Add task name to eval_df
+    temp_prompt_dir=prompt_dir[['ID','Representations','Task_specific_label']]
+    temp_prompt_dir['Prompt_no']=temp_prompt_dir['ID'].astype('str')
+    curr_eval_df = curr_eval_df.merge(temp_prompt_dir,on='Prompt_no')
+except KeyError:
+    automated_eval_available = 0
+# If images for assessment available: create form to start assessment
+# Else: Note to upload images for assessment
+if automated_eval_available > 0:
+    with st.form("auto_assessment_form",clear_on_submit=True):
+        # Form info statment
+        st.write('Select tasks to assess with the automated assessment:')
+        # Add selection for available categories
+        assess_multi_object = st.checkbox(
+            'Multiple object types ({0} images available)'.format(
+                len(curr_eval_df.loc[
+                    (curr_eval_df['automated_eval']==True)&
+                    (curr_eval_df['Task']=='Multiple object types')])
+            ))
+        assess_single_object = st.checkbox(
+            'Single object type ({0} images available)'.format(
+                len(curr_eval_df.loc[
+                    (curr_eval_df['automated_eval']==True)&
+                    (curr_eval_df['Task']=='Single object')])
+            ))
+        negation = st.checkbox(
+            'Negation ({0} images available)'.format(
+                len(curr_eval_df.loc[
+                    (curr_eval_df['automated_eval']==True)&
+                    (curr_eval_df['Task']=='Negation')])
+            ))
+        submitted = st.form_submit_button("Start automated assessment")
+        if submitted:
+            # Create list for tasks which were selected for assessment
+            selected_tasks = list(
+                compress(
+                    ['Multiple object types','Single object','Negation'],
+                    [assess_multi_object,assess_single_object,negation]))
+            # Create dataset to loop over with assessment
+            assessed_df = curr_eval_df.loc[
+                    (curr_eval_df['automated_eval']==True)&
+                    (curr_eval_df['Task'].isin(selected_tasks))]
+            results_column = []
+            for row in assessed_df.itertuples():
+                # Apply task based classifier and safe in list
+                temp_image = Image.open(st.session_state['uploaded_img'][row.Picture_index])
+                temp_result = fun_dict[row.Task](
+                    temp_image,row.Representations,row.Task_specific_label)
+                results_column.append(temp_result)
+            assessed_df['Score']=results_column
+            st.session_state['auto_eval_df']=assessed_df[['File_name','Prompt_no','Picture_index','Task','Score']]
+            st.write('Completed assessment. Access results on the summary page.')
+else:
+    st.write('Upload files on dashboard starting page to start automated assessment.')
+#st.write(st.session_state['auto_eval_df'])

pages/3_📊Assessment summary.py ADDED Viewed

	@@ -0,0 +1,160 @@

+import streamlit as st
+import pandas as pd
+import seaborn as sns
+import matplotlib.pyplot as plt
+from PIL import Image
+side_image = Image.open('Graphics/IL_Logo.png')
+st.sidebar.image(side_image)
+@st.cache
+def convert_df_to_csv(df):
+  # IMPORTANT: Cache the conversion to prevent computation on every rerun
+  return df[['File_name','Prompt_no','Task','Score']].to_csv().encode('utf-8')
+def plot_style_simple(results_df):
+  eval_sum = results_df.groupby('Task')['Score'].sum()
+  eval_count = results_df.groupby('Task')['Score'].count()
+  eval_share = (eval_sum/eval_count)*100
+  fig = plt.figure(figsize=(12, 3))
+  sns.barplot(x=eval_share.index, y=eval_share.values, palette='GnBu')
+  plt.xticks(rotation=-45)
+  plt.ylabel('Percentage correct')
+  plt.xlabel(' ')
+  return fig
+def plot_style_combined(results_df, uploaded_df = None):
+  # Create joined dataframe of results and uploadd_df
+  uploaded_results_df = pd.read_csv(uploaded_df)
+  manual_results_df['Model']='Current'
+  uploaded_results_df['Model']='Uploaded'
+  results_df = pd.concat([manual_results_df,uploaded_results_df])
+  # Create scores for plot
+  eval_sum = results_df.groupby(['Model','Task'])['Score'].sum()
+  eval_count = results_df.groupby(['Model','Task'])['Score'].count()
+  eval_share = (eval_sum/eval_count)*100
+  eval_share = eval_share.reset_index()
+  # Create plot
+  fig = plt.figure(figsize=(12, 3))
+  sns.barplot(data=eval_share,x='Task',y='Score',hue='Model', palette='GnBu')
+  plt.xticks(rotation=-45)
+  plt.ylabel('Percentage correct')
+  plt.xlabel(' ')
+  return fig
+assessment_result_frames = {}
+st.title('Assessment Summary')
+st.header('Manual assessment')
+try:
+  if sum(st.session_state['eval_df']['manual_eval_completed'])>0:
+    # Display file uploader
+    manual_file_upload = st.file_uploader("Upload .csv with saved manual assessment for model comparison")
+    # Create dataset for manual summary plots
+    manual_eval_df = st.session_state['eval_df']
+    manual_eval_df['Score'] = manual_eval_df['manual_eval_task_score'].map({'Yes':True, 'No':False})
+    manual_results_df = manual_eval_df.loc[
+      (manual_eval_df['manual_eval']==True)&
+      (manual_eval_df['manual_eval_completed']==True)]
+    assessment_result_frames['Manual assessment'] = manual_results_df
+    # If df was uploaded for comparison, we create comparison plot, else simple plot
+    if manual_file_upload == None:
+      fig = plot_style_simple(manual_results_df)
+      st.pyplot(fig)
+    else:
+      fig = plot_style_combined(manual_results_df,manual_file_upload)
+      st.pyplot(fig)
+    st.download_button(
+      label="Download manual assessment data",
+      data=convert_df_to_csv(manual_results_df),
+      file_name='manual_assessment.csv',
+      mime='text/csv',
+    )
+  else:
+    st.write('Complete manual assessment to generate summary.')
+except KeyError:
+  st.write('Complete automated assessment to generate summary.')
+st.write(' ')
+st.header('Automated assessment')
+try:
+  # Create dataset for automated summary plots
+  auto_eval_df = st.session_state['auto_eval_df']
+  assessment_result_frames['Automated assessment'] = auto_eval_df
+  # Display file uploader
+  auto_file_upload = st.file_uploader("Upload .csv with saved automated assessment for model comparison")
+  # If df was uploaded for comparison, we create comparison plot, else simple plot
+  if auto_file_upload == None:
+    fig = plot_style_simple(auto_eval_df)
+    st.pyplot(fig)
+  else:
+    fig = plot_style_combined(auto_eval_df,auto_file_upload)
+    st.pyplot(fig)
+  st.download_button(
+    label="Download automated assessment data",
+    data=convert_df_to_csv(auto_eval_df),
+    file_name='automated_assessment.csv',
+    mime='text/csv',
+  )
+except KeyError:
+  st.write('Complete automated assessment to generate summary.')
+try:
+  # Start gallery
+  st.header('Assessment gallery')
+  assessment_method_selected = st.selectbox(
+      'Select generation method',
+      assessment_result_frames.keys())
+  if len(assessment_result_frames.keys())<1:
+    st.write('Complete manual or automated assessment to access images in the gallery.')
+  # Create needed info frames
+  gallery_df = assessment_result_frames[assessment_method_selected]
+  curr_prompt_dir = st.session_state['prompt_dir']
+  # Select task
+  tasks_available = gallery_df.Task.unique().tolist()
+  task_selected = st.selectbox('Select task type',tasks_available)
+  # Select image type
+  type_selected = st.selectbox(
+      'Select image type',
+      ('Correctly generated images', 'Incorrectly generated images'))
+  type_selected_dict = {'Correctly generated images':True, 'Incorrectly generated images':False}
+  # Create df for presented images
+  gallery_df_print = gallery_df.loc[
+    (gallery_df['Score']==type_selected_dict[type_selected])&
+    (gallery_df['Task']==task_selected)]
+  # Select presented image and prompt
+  generation_number = st.number_input('Generation number',min_value=1, max_value=len(gallery_df_print), step=1)
+  gallery_row_print = gallery_df_print.iloc[int(generation_number-1)]
+  curr_Prompt_no = gallery_row_print.Prompt_no
+  curr_Prompt = curr_prompt_dir[curr_prompt_dir['ID']==int(curr_Prompt_no)].Prompt
+  curr_Picture_index = gallery_row_print.Picture_index.item()
+  # Plot prompt and image
+  st.write('Prompt: '+curr_Prompt.item())
+  st.image(st.session_state['uploaded_img'][curr_Picture_index],width=350)
+  #st.write(auto_df_print)
+except IndexError:
+  st.write('There is no image availabe in your selected category.')
+except KeyError:
+  pass

pages/Functions/Assessment_functions.py ADDED Viewed

	@@ -0,0 +1,208 @@

+import random
+import os
+import torch
+import pandas as pd
+from transformers import CLIPProcessor, CLIPModel, DetrFeatureExtractor, DetrForObjectDetection
+from PIL import Image
+CLIPmodel_import = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
+CLIPprocessor_import = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
+DetrFeatureExtractor_import = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
+DetrModel_import = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
+# Import list of coco example objects
+script_path = os.path.dirname(__file__)
+coco_objects = open(script_path+"/coco-labels-paper.txt", "r")
+coco_objects = coco_objects.read()
+coco_objects = coco_objects.split("\n")
+# Example image
+#test_image = Image.open('pages/Functions/test_image.png')
+#test_image = Image.open('pages/Functions/test_imageIV.png')
+###### Empty function for debugging
+def Empty_DSwrapper(img, representations = None, Task_specific_label = None):
+    '''
+    Dashboard wrapper of DETR_multi_object_counting
+    '''
+    img_input = img
+    return True
+###### Helper functions
+def Coco_object_set(included_object, set_length=6):
+    '''
+    Creates set of object based on coco objects and the currently correct object.
+    '''
+    curr_object_set = set([included_object])
+    while len(curr_object_set)<set_length:
+        temp_object = random.choice(coco_objects)
+        curr_object_set.add(temp_object)
+    return list(curr_object_set)
+def Object_set_creator(included_object, list_of_all_objects = coco_objects, excluded_objects_list = [], set_length=6):
+    '''
+    Creates set of object based on list_of_all_objects.
+    The included object will always be in the list.
+    Optional list of objects to be excluded from the set.
+    '''
+    curr_object_set = set([included_object])
+    # Check that the included object is not contained in the excluded objects
+    if included_object in excluded_objects_list:
+        raise ValueError('The included_object can not be part of the excluded_objects list.')
+    while len(curr_object_set)<set_length:
+        temp_object = random.choice(list_of_all_objects)
+        if temp_object not in excluded_objects_list:
+            curr_object_set.add(temp_object)
+    return list(curr_object_set)
+###### Single object recognition
+def CLIP_single_object_classifier(img, object_class, task_specific_label=None):
+    '''
+    Test presence of object in image by using the "red herring strategy" and CLIP algorithm.
+    Note that the task_specific_label is not used for this classifier.
+    '''
+    # Define model and parameters
+    word_list = Coco_object_set(object_class)
+    inputs = CLIPprocessor_import(text=word_list, images=img, return_tensors="pt", padding=True)
+    # Run inference
+    outputs = CLIPmodel_import(**inputs)
+    # Get image-text similarity score
+    logits_per_image = outputs.logits_per_image
+    # Get probabilities
+    probs = logits_per_image.softmax(dim=1)
+    # Return true if the highest prob value is recognised
+    if word_list[probs.argmax().item()]==object_class:
+        return True
+    else:
+        return False
+def CLIP_object_recognition(img, object_class, tested_classes):
+    '''
+    More general CLIP object recogntintion implementation
+    '''
+    if object_class not in tested_classes:
+        raise ValueError('The object_class has to be part of the tested_classes list.')
+    # Define model and parameters
+    inputs = CLIPprocessor_import(text=tested_classes, images=img, return_tensors="pt", padding=True)
+    # Run inference
+    outputs = CLIPmodel_import(**inputs)
+    # Get image-text similarity score
+    logits_per_image = outputs.logits_per_image
+    # Get probabilities
+    probs = logits_per_image.softmax(dim=1)
+    # Return true if the highest prob value is recognised
+    if tested_classes[probs.argmax().item()]==object_class:
+        return True
+    else:
+        return False
+###### Multi object recognition
+#list_of_objects = ['cat','apple','cow']
+def CLIP_multi_object_recognition(img, list_of_objects):
+    '''
+    Algorithm based on CLIP to test presence of multiple objects.
+    Currently has a debugging print call in.
+    '''
+    # Loop over list of objects, test for presence of each inidividually, making sure that non of the other objects is part of test set
+    for i_object in list_of_objects:
+        # Create list with objects not in test set (all objects which arent i_object)
+        untested_objects = [x for x in list_of_objects if x!= i_object]
+        # Create set going into clip object recogniser and test this set using standard recognition function
+        CLIP_test_classes = Object_set_creator(included_object=i_object, excluded_objects_list=untested_objects)
+        i_object_present = CLIP_object_recognition(img, i_object, CLIP_test_classes)
+        print(i_object+str(i_object_present))
+        # Stop loop and return false if one of the objects is not recognised by CLIP
+        if i_object_present == False:
+            return False
+    # Return true if all objects were recognised
+    return True
+def CLIP_multi_object_recognition_DSwrapper(img, representations, task_specific_label=None):
+    '''
+    Dashboard wrapper of CLIP_multi_object_recognition
+    Note that the task_specific_label is not used for this classifier.
+    '''
+    list_of_objects = representations.split(', ')
+    return CLIP_multi_object_recognition(img,list_of_objects)
+###### Negation
+def CLIP_object_negation(img, present_object, absent_object):
+    '''
+    Algorithm based on CLIP to test negation prompts
+    '''
+    # Create sets of objects for present and absent object
+    tested_classes_present = Object_set_creator(
+        included_object=present_object, excluded_objects_list=[absent_object])
+    tested_classes_absent = Object_set_creator(
+        included_object=absent_object, excluded_objects_list=[present_object],set_length=10)
+    # Use CLIP object recognition to test for objects.
+    presence_test = CLIP_object_recognition(img, present_object, tested_classes_present)
+    absence_test = CLIP_object_recognition(img, absent_object, tested_classes_absent)
+    if presence_test==True and absence_test==False:
+        return True
+    else:
+        return False
+###### Counting / arithmetic
+'''
+test_image = Image.open('pages/Functions/test_imageIII.jpeg')
+object_classes = ['cat','remote']
+object_counts = [2,2]
+'''
+def DETR_multi_object_counting(img, object_classes, object_counts, confidence_treshold=0.5):
+  # Apply Detr to image
+  inputs = DetrFeatureExtractor_import(images=img, return_tensors="pt")
+  outputs = DetrModel_import(**inputs)
+  # Convert outputs (bounding boxes and class logits) to COCO API
+  target_sizes = torch.tensor([img.size[::-1]])
+  results = DetrFeatureExtractor_import.post_process_object_detection(
+      outputs, threshold=confidence_treshold, target_sizes=target_sizes)[0]
+  # Create dict with value_counts
+  count_dict = pd.Series(results['labels'].numpy())
+  count_dict = count_dict.map(DetrModel_import.config.id2label)
+  count_dict = count_dict.value_counts().to_dict()
+  # Create dict for correct response
+  label_dict = dict(zip(object_classes, object_counts))
+  # Return False is the count for a given label does not match
+  for i_item in label_dict.items():
+    if int(count_dict[i_item[0]])==int(i_item[1]): # Adding type control for comparison due to str read in
+        print(str(i_item)+'_true')
+    else:
+        print(str(i_item)+'_false')
+        print("oberserved: "+str(count_dict[i_item[0]]))
+        return False
+  # If all match, return true
+  return True
+def DETR_multi_object_counting_DSwrapper(img, representations, Task_specific_label):
+    '''
+    Dashboard wrapper of DETR_multi_object_counting
+    '''
+    list_of_objects = representations.split(', ')
+    object_counts = Task_specific_label.split(', ')
+    return DETR_multi_object_counting(img,list_of_objects, object_counts, confidence_treshold=0.5)

pages/Functions/coco-labels-paper.txt ADDED Viewed

	@@ -0,0 +1,91 @@

+person
+bicycle
+car
+motorcycle
+airplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+street sign
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+hat
+backpack
+umbrella
+shoe
+eye glasses
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+plate
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+couch
+potted plant
+bed
+mirror
+dining table
+window
+desk
+toilet
+door
+tv
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+blender
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
+hair brush

requirements.txt ADDED Viewed

	@@ -0,0 +1,61 @@

+altair==4.2.0
+attrs==22.1.0
+blinker==1.5
+cachetools==5.2.0
+charset-normalizer==2.1.1
+click==8.1.3
+commonmark==0.9.1
+contourpy==1.0.6
+cycler==0.11.0
+decorator==5.1.1
+entrypoints==0.4
+filelock==3.8.0
+fonttools==4.38.0
+gitdb==4.0.10
+GitPython==3.1.29
+huggingface-hub==0.11.1
+idna==3.4
+importlib-metadata==5.1.0
+Jinja2==3.1.2
+jsonschema==4.17.3
+kiwisolver==1.4.4
+MarkupSafe==2.1.1
+matplotlib==3.6.2
+numpy==1.23.5
+packaging==21.3
+pandas==1.5.2
+Pillow==9.3.0
+protobuf==3.20.3
+pyarrow==10.0.1
+pydeck==0.8.0
+Pygments==2.13.0
+Pympler==1.0.1
+pyparsing==3.0.9
+pyrsistent==0.19.2
+python-dateutil==2.8.2
+pytz==2022.6
+pytz-deprecation-shim==0.1.0.post0
+PyYAML==6.0
+regex==2022.10.31
+requests==2.28.1
+rich==12.6.0
+seaborn==0.12.1
+semver==2.13.0
+six==1.16.0
+smmap==5.0.0
+streamlit==1.15.1
+timm==0.6.12
+tokenizers==0.13.2
+toml==0.10.2
+toolz==0.12.0
+torch==1.13.0
+torchvision==0.14.0
+tornado==6.2
+tqdm==4.64.1
+transformers==4.24.0
+typing_extensions==4.4.0
+tzdata==2022.7
+tzlocal==4.2
+urllib3==1.26.13
+validators==0.20.0
+zipp==3.11.0