Spaces:
Running
Running
File size: 5,315 Bytes
fcfe2af f3b2c5b fcfe2af f3b2c5b fcfe2af f3b2c5b fcfe2af d008d64 70bcea5 f3b2c5b 295487b fcfe2af f3b2c5b fcfe2af f3b2c5b fcfe2af d008d64 f3b2c5b 3f0b4ff d008d64 fcfe2af 2b58cb2 fcfe2af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import gradio as gr
from torch.nn import functional as F
from model_loader import ModelType, type_to_transforms, type_to_loaded_model
def get_y(model_type, model, image):
if model_type == ModelType.SYNTHETIC_DETECTOR_V2:
return model.forward(image.unsqueeze(0).to("cpu"))
return model.forward(image[None, ...])
def predict(raw_image, model_name):
if model_name not in ModelType.get_list():
return {'error': [0.]}
model_type = ModelType[str(model_name).upper()].value
model = type_to_loaded_model[model_type]
tfm = type_to_transforms[model_type]
image = tfm(raw_image)
y = get_y(model_type, model, image)
y_1 = F.softmax(y, dim=1)[:, 1].cpu().detach().numpy()
y_2 = F.softmax(y, dim=1)[:, 0].cpu().detach().numpy()
return {'created by AI': y_1.tolist(),
'created by human': y_2.tolist()}
general_examples = [
["images/general/img_1.jpg"],
["images/general/img_2.jpg"],
["images/general/img_3.jpg"],
["images/general/img_4.jpg"],
["images/general/img_5.jpg"],
["images/general/img_6.jpg"],
["images/general/img_7.jpg"],
["images/general/img_8.jpg"],
["images/general/img_9.jpg"],
["images/general/img_10.jpg"],
]
optic_examples = [
["images/optic/img_1.jpg"],
["images/optic/img_2.jpg"],
["images/optic/img_3.jpg"],
["images/optic/img_4.jpg"],
["images/optic/img_5.jpg"],
]
famous_deepfake_examples = [
["images/famous_deepfakes/img_1.jpg"],
["images/famous_deepfakes/img_2.jpg"],
["images/famous_deepfakes/img_3.jpg"],
["images/famous_deepfakes/img_4.webp"],
]
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(
"""
<h1 style="text-align: center;">For Fake's Sake: a set of models for detecting generated and synthetic images</h3>
This is a demo space for synthetic image detectors:
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a> (Aug, 2023),
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-2.0'>synthetic_detector_v2</a> (Sep, 2023).
<br>We provide several detectors for images generated by popular tools, such as Midjourney and Stable Diffusion.<br>
Please refer to model cards for evaluation metrics and limitations.
"""
)
with gr.Row():
with gr.Column():
image_input = gr.Image(type="pil")
drop_down = gr.Dropdown(ModelType.get_list(), type="value", label="Model", value=ModelType.SYNTHETIC_DETECTOR_V2)
with gr.Row():
gr.ClearButton(components=[image_input])
submit_button = gr.Button("Submit", variant="primary")
with gr.Column():
result_score = gr.Label(label='result', num_top_classes=2)
with gr.Tab("Examples"):
gr.Examples(examples=general_examples, inputs=image_input)
# with gr.Tab("More examples"):
# gr.Examples(examples=optic_examples, inputs=image_input)
with gr.Tab("Widely known deepfakes"):
gr.Examples(examples=famous_deepfake_examples, inputs=image_input)
submit_button.click(predict, inputs=[image_input, drop_down], outputs=result_score)
gr.Markdown(
"""
<h3>Models</h3>
<p><code>*_200M</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>640x640</code></p>
<p><code>*_5M</code> models are based on <code>tf_mobilenetv3_large_100.in1k</code> with image size <code>224x224</code></p>
<p><code>synthetic_detector_2.0</code> models are based on <code>convnext_large_mlp.clip_laion2b_soup_ft_in12k_in1k_384</code> with image size <code>384x384</code></p>
<h3>Details</h3>
<li>Model cards: <a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_200'>midjourney200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_mj_5'>midjourney5M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_200'>diffusions200M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-1.0_sd_5'>diffusions5M</a>,
<a href='https://huggingface.co/Sumsub/Sumsub-ffs-synthetic-2.0'>synthetic_detector_v2</a>.
</li>
<li>License: CC-By-SA-3.0</li>
<h3>Limitations</h3>
The model output should only be used as an indication that an image may have been (but not definitely) artificially generated.
Current models may face challenges in accurately predicting the class for real-world examples that are extremely vibrant and of exceptionally high quality.
In such cases, the richness of colors and fine details may lead to misclassifications due to the complexity of the input. This could potentially cause the model to focus on visual aspects that are not necessarily indicative of the true class.
"""
)
demo.launch() |