nouamanetazi HF staff commited on
Commit
028a426
1 Parent(s): ff43e05

add app.py

Browse files
Files changed (2) hide show
  1. app.py +86 -0
  2. requirements.txt +1 -0
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import re
2
+ import glob
3
+ import pickle
4
+ import os
5
+ import torch
6
+ import numpy as np
7
+ from utils.audio import load_spectrograms
8
+ from utils.compute_args import compute_args
9
+ from utils.tokenize import tokenize, create_dict, sent_to_ix, cmumosei_2, cmumosei_7, pad_feature
10
+ from model_LA import Model_LA
11
+
12
+ device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
13
+
14
+ working_dir = "."
15
+
16
+ # load model
17
+ ckpts_path = os.path.join(working_dir, 'ckpt')
18
+ model_name = "Model_LA_e"
19
+ # Listing sorted checkpoints
20
+ ckpts = sorted(glob.glob(os.path.join(ckpts_path, model_name,'best*')), reverse=True)
21
+
22
+ # Load original args
23
+ args = torch.load(ckpts[0], map_location=torch.device(device))['args']
24
+ args = compute_args(args)
25
+ pretrained_emb = np.load("train_glove.npy")
26
+ token_to_ix = pickle.load(open("token_to_ix.pkl", "rb"))
27
+ state_dict = torch.load(ckpts[0], map_location=torch.device(device))['state_dict']
28
+
29
+ net = Model_LA(args, len(token_to_ix), pretrained_emb).to(device)
30
+ net.load_state_dict(state_dict)
31
+
32
+
33
+
34
+ def inference(video_path, text):
35
+
36
+ # data preprocessing
37
+ # text
38
+ def clean(w):
39
+ return re.sub(
40
+ r"([.,'!?\"()*#:;])",
41
+ '',
42
+ w.lower()
43
+ ).replace('-', ' ').replace('/', ' ')
44
+
45
+ s = [clean(w) for w in text.split() if clean(w) != '']
46
+
47
+ # Sound
48
+ _, mel, mag = load_spectrograms(video_path)
49
+
50
+ l_max_len = args.lang_seq_len
51
+ a_max_len = args.audio_seq_len
52
+ v_max_len = args.video_seq_len
53
+ L = sent_to_ix(s, token_to_ix, max_token=l_max_len)
54
+ A = pad_feature(mel, a_max_len)
55
+ V = pad_feature(mel, v_max_len)
56
+ # print shapes
57
+ print("Processed text shape: ", L.shape)
58
+ print("Processed audio shape: ", A.shape)
59
+ print("Processed video shape: ", V.shape)
60
+ return out
61
+
62
+
63
+ title="Emotion Recognition"
64
+ description="This is a demo implementation of EfficientNetV2 Deepfakes Image Detector by using frame-by-frame detection. \
65
+ To use it, simply upload your video, or click one of the examples to load them.\
66
+ This demo and model represent the work of \"Achieving Face Swapped Deepfakes Detection Using EfficientNetV2\" by Lee Sheng Yeh. \
67
+ The examples were extracted from Celeb-DF(V2)(Li et al, 2020) and FaceForensics++(Rossler et al., 2019). Full reference details is available in \"references.txt.\" \
68
+ The examples are used under fair use to demo the working of the model only. If any copyright is infringed, please contact the researcher via this email: tp054565@mail.apu.edu.my, the researcher will immediately take down the examples used.\
69
+ "
70
+
71
+ examples = [
72
+ ['Video1-fake-1-ff.mp4'],
73
+ ['Video6-real-1-ff.mp4'],
74
+ ['Video3-fake-3-ff.mp4'],
75
+ ['Video8-real-3-ff.mp4'],
76
+ ['real-1.mp4'],
77
+ ['fake-1.mp4'],
78
+ ]
79
+
80
+ gr.Interface(inference,
81
+ inputs = ["video", "text"],
82
+ outputs=["text","text", gr.outputs.Video(label="Detected face sequence")],
83
+ title=title,
84
+ description=description,
85
+ examples=examples
86
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ https://github.com/explosion/spacy-models/releases/download/en_vectors_web_lg-2.1.0/en_vectors_web_lg-2.1.0.tar.gz