Spaces:

Sumsub
/

Sumsub-ffs-demo

Running

File size: 5,315 Bytes

fcfe2af
 
 
f3b2c5b
 
 
 
 
 
fcfe2af
 
f3b2c5b
fcfe2af
 
f3b2c5b
 
 
 
 
 
 
 
 
fcfe2af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d008d64
70bcea5
 
 
 
 
f3b2c5b
295487b
fcfe2af
 
 
 
 
 
f3b2c5b
fcfe2af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f3b2c5b
fcfe2af
 
d008d64
 
 
f3b2c5b
3f0b4ff
d008d64
fcfe2af
2b58cb2
 
 
 
fcfe2af

import gradio as gr
from torch.nn import functional as F

from model_loader import ModelType, type_to_transforms, type_to_loaded_model

def get_y(model_type, model, image):
    if model_type == ModelType.SYNTHETIC_DETECTOR_V2:
        return model.forward(image.unsqueeze(0).to("cpu"))
    return model.forward(image[None, ...])

def predict(raw_image, model_name):
    if model_name not in ModelType.get_list():
        return {'error': [0.]}

    model_type = ModelType[str(model_name).upper()].value
    model = type_to_loaded_model[model_type]
    tfm = type_to_transforms[model_type]
    image = tfm(raw_image)
    y = get_y(model_type, model, image)
    y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
    y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
    return {'created by AI': y_1.tolist(),
            'created by human': y_2.tolist()}

general_examples = [
    ["images/general/img_1.jpg"],
    ["images/general/img_2.jpg"],
    ["images/general/img_3.jpg"],
    ["images/general/img_4.jpg"],
    ["images/general/img_5.jpg"],
    ["images/general/img_6.jpg"],
    ["images/general/img_7.jpg"],
    ["images/general/img_8.jpg"],
    ["images/general/img_9.jpg"],
    ["images/general/img_10.jpg"],
]

optic_examples = [
    ["images/optic/img_1.jpg"],
    ["images/optic/img_2.jpg"],
    ["images/optic/img_3.jpg"],
    ["images/optic/img_4.jpg"],
    ["images/optic/img_5.jpg"],
]

famous_deepfake_examples = [
    ["images/famous_deepfakes/img_1.jpg"],
    ["images/famous_deepfakes/img_2.jpg"],
    ["images/famous_deepfakes/img_3.jpg"],
    ["images/famous_deepfakes/img_4.webp"],
]

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        <h1 style="text-align: center;">For Fake's Sake: a set of models for detecting generated and synthetic images</h3>
        This is a demo space for synthetic image detectors:
        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a> (Aug, 2023),
        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a> (Aug, 2023),
        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a> (Aug, 2023),
        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a> (Aug, 2023),
        <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-2.0'>synthetic_detector_v2</a> (Sep, 2023).
        <br>We provide several detectors for images generated by popular tools, such as Midjourney and Stable Diffusion.<br>
        Please refer to model cards for evaluation metrics and limitations.
        """
    )

    with gr.Row():
        with gr.Column():
            image_input = gr.Image(type="pil")
            drop_down = gr.Dropdown(ModelType.get_list(), type="value", label="Model", value=ModelType.SYNTHETIC_DETECTOR_V2)
            with gr.Row():
                gr.ClearButton(components=[image_input])
                submit_button = gr.Button("Submit", variant="primary")
        with gr.Column():
            result_score = gr.Label(label='result', num_top_classes=2)
    with gr.Tab("Examples"):
        gr.Examples(examples=general_examples, inputs=image_input)
    # with gr.Tab("More examples"):
    #     gr.Examples(examples=optic_examples, inputs=image_input)
    with gr.Tab("Widely known deepfakes"):
        gr.Examples(examples=famous_deepfake_examples, inputs=image_input)

    submit_button.click(predict, inputs=[image_input, drop_down], outputs=result_score)

    gr.Markdown(
        """
        <h3>Models</h3>
        <p><code>*_200M</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>640x640</code></p>
        <p><code>*_5M</code> models are based on <code>tf_mobilenetv3_large_100.in1k</code> with image size <code>224x224</code></p>
        <p><code>synthetic_detector_2.0</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>384x384</code></p>

        <h3>Details</h3>
        <li>Model cards: <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
                         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
                         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
                         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>,
                         <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-2.0'>synthetic_detector_v2</a>.
        </li>
        <li>License: CC-By-SA-3.0</li>
        <h3>Limitations</h3>
        The model output should only be used as an indication that an image may have been (but not definitely) artificially generated.
        Current models may face challenges in accurately predicting the class for real-world examples that are extremely vibrant and of exceptionally high quality. 
        In such cases, the richness of colors and fine details may lead to misclassifications due to the complexity of the input. This could potentially cause the model to focus on visual aspects that are not necessarily indicative of the true class.
        """
    )

demo.launch()