emedinac commited on
Commit
2f1078d
1 Parent(s): df0a8b1

adding amass and h36m models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +17 -0
  2. amass_h36m_models/CISTGCN_M16_AMASS.tar +3 -0
  3. amass_h36m_models/CISTGCN_M16_H36M.tar +3 -0
  4. amass_h36m_models/CISTGCN_M32_AMASS.tar +3 -0
  5. amass_h36m_models/CISTGCN_M32_H36M.tar +3 -0
  6. amass_h36m_models/CISTGCN_M64_H36M.tar +3 -0
  7. amass_h36m_models/CISTGCN_M8_H36M.tar +3 -0
  8. amass_h36m_models/CISTGCN_best.pth.tar +3 -0
  9. amass_h36m_models/short-CISTGCN-400ms-16-best.pth.tar +3 -0
  10. amass_h36m_models/short-CISTGCN-400ms-32-best.pth.tar +3 -0
  11. h36m_detailed/16/files/CISTGCN-benchmark-best.pth.tar +3 -0
  12. h36m_detailed/16/files/CISTGCN-benchmark-last.pth.tar +3 -0
  13. h36m_detailed/16/files/config-20221118_0919-id0862.yaml +105 -0
  14. h36m_detailed/16/files/model.py +597 -0
  15. h36m_detailed/16/metric_full_original_test.xlsx +3 -0
  16. h36m_detailed/16/metric_original_test.xlsx +3 -0
  17. h36m_detailed/16/metric_test.xlsx +3 -0
  18. h36m_detailed/16/metric_train.xlsx +3 -0
  19. h36m_detailed/16/sample_original_test.xlsx +3 -0
  20. h36m_detailed/32/files/CISTGCN-benchmark-best.pth.tar +3 -0
  21. h36m_detailed/32/files/CISTGCN-benchmark-last.pth.tar +3 -0
  22. h36m_detailed/32/files/config-20221111_1223-id0734.yaml +105 -0
  23. h36m_detailed/32/files/model.py +597 -0
  24. h36m_detailed/32/metrics_original_test.xlsx +3 -0
  25. h36m_detailed/32/samples_original_test.xlsx +3 -0
  26. h36m_detailed/64/files/CISTGCN-benchmark-best.pth.tar +3 -0
  27. h36m_detailed/64/files/CISTGCN-benchmark-last.pth.tar +3 -0
  28. h36m_detailed/64/files/config-20221114_2127-id9542.yaml +105 -0
  29. h36m_detailed/64/files/model.py +597 -0
  30. h36m_detailed/64/metric_full_original_test.xlsx +3 -0
  31. h36m_detailed/64/metric_original_test.xlsx +3 -0
  32. h36m_detailed/64/metric_test.xlsx +3 -0
  33. h36m_detailed/64/metric_train.xlsx +3 -0
  34. h36m_detailed/64/sample_original_test.xlsx +3 -0
  35. h36m_detailed/8/files/CISTGCN-benchmark-best.pth.tar +3 -0
  36. h36m_detailed/8/files/CISTGCN-benchmark-last.pth.tar +3 -0
  37. h36m_detailed/8/files/config-20221116_2202-id6444.yaml +105 -0
  38. h36m_detailed/8/files/model.py +597 -0
  39. h36m_detailed/8/metric_full_original_test.xlsx +3 -0
  40. h36m_detailed/8/metric_original_test.xlsx +3 -0
  41. h36m_detailed/8/metric_test.xlsx +3 -0
  42. h36m_detailed/8/metric_train.xlsx +3 -0
  43. h36m_detailed/8/sample_original_test.xlsx +3 -0
  44. h36m_detailed/short-400ms/16/files/config-20230104_1806-id2293.yaml +106 -0
  45. h36m_detailed/short-400ms/16/files/model.py +597 -0
  46. h36m_detailed/short-400ms/16/files/short-STSGCN-20230104_1806-id2293_best.pth.tar +3 -0
  47. h36m_detailed/short-400ms/16/files/short-STSGCN-20230104_1806-id2293_last.pth.tar +3 -0
  48. h36m_detailed/short-400ms/32/files/config-20230105_1400-id6760.yaml +105 -0
  49. h36m_detailed/short-400ms/32/files/model.py +597 -0
  50. h36m_detailed/short-400ms/32/files/short-STSGCN-20230105_1400-id6760_best.pth.tar +3 -0
.gitattributes CHANGED
@@ -33,3 +33,20 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ h36m_detailed/16/metric_full_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
37
+ h36m_detailed/16/metric_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
38
+ h36m_detailed/16/metric_test.xlsx filter=lfs diff=lfs merge=lfs -text
39
+ h36m_detailed/16/metric_train.xlsx filter=lfs diff=lfs merge=lfs -text
40
+ h36m_detailed/16/sample_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
41
+ h36m_detailed/32/metrics_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
42
+ h36m_detailed/32/samples_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
43
+ h36m_detailed/64/metric_full_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
44
+ h36m_detailed/64/metric_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
45
+ h36m_detailed/64/metric_test.xlsx filter=lfs diff=lfs merge=lfs -text
46
+ h36m_detailed/64/metric_train.xlsx filter=lfs diff=lfs merge=lfs -text
47
+ h36m_detailed/64/sample_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
48
+ h36m_detailed/8/metric_full_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
49
+ h36m_detailed/8/metric_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
50
+ h36m_detailed/8/metric_test.xlsx filter=lfs diff=lfs merge=lfs -text
51
+ h36m_detailed/8/metric_train.xlsx filter=lfs diff=lfs merge=lfs -text
52
+ h36m_detailed/8/sample_original_test.xlsx filter=lfs diff=lfs merge=lfs -text
amass_h36m_models/CISTGCN_M16_AMASS.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f56dbfb8a5f34a0984c6de9f1ee795b94862c8c3c20aac38c0956204503c47a
3
+ size 4354193
amass_h36m_models/CISTGCN_M16_H36M.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d99e17e43f39c998b141adf81c261a8b1eb78084feed8bf9de722a51111aef
3
+ size 5935307
amass_h36m_models/CISTGCN_M32_AMASS.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:228b649b126a634ed070c7a60bcbeb37504ab5f2b4948acd2556edcf342aac1f
3
+ size 6327249
amass_h36m_models/CISTGCN_M32_H36M.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1d356c1b73f1bc6d0d056e643f345a4727373779fe8e9aabbd23b58c3ca343
3
+ size 8133899
amass_h36m_models/CISTGCN_M64_H36M.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb41d06736803c4e7b0aa66e36820440d5125b072739610481d1c06c23cedb5a
3
+ size 16582347
amass_h36m_models/CISTGCN_M8_H36M.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b28248ab629ce18f5908f0c39c1d4700d12c5539f64828ffe4b73ee9c3c5af
3
+ size 5339339
amass_h36m_models/CISTGCN_best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1d356c1b73f1bc6d0d056e643f345a4727373779fe8e9aabbd23b58c3ca343
3
+ size 8133899
amass_h36m_models/short-CISTGCN-400ms-16-best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c161bc7186d800db0d372133d13ac4bdf01ca89ca7d165e22386890088e64e6
3
+ size 3827665
amass_h36m_models/short-CISTGCN-400ms-32-best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565aa3f07715a52021a481065af53bf6b6f2e438a1fb8ea1cc5ea3ed0ccbd715
3
+ size 6026705
h36m_detailed/16/files/CISTGCN-benchmark-best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d99e17e43f39c998b141adf81c261a8b1eb78084feed8bf9de722a51111aef
3
+ size 5935307
h36m_detailed/16/files/CISTGCN-benchmark-last.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66ed4eb9b213f8b042fff19f85c1b34f7530176d65c52da4e21c96d24692f13f
3
+ size 5929419
h36m_detailed/16/files/config-20221118_0919-id0862.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture_config:
2
+ model: MlpMixer_ext
3
+ model_params:
4
+ input_n: 10
5
+ joints: 22
6
+ output_n: 25
7
+ n_txcnn_layers: 4
8
+ txc_kernel_size: 3
9
+ reduction: 8
10
+ hidden_dim: 64
11
+ input_gcn:
12
+ model_complexity:
13
+ - 16
14
+ - 16
15
+ - 16
16
+ - 16
17
+ interpretable:
18
+ - true
19
+ - true
20
+ - true
21
+ - true
22
+ - true
23
+ output_gcn:
24
+ model_complexity:
25
+ - 3
26
+ interpretable:
27
+ - true
28
+ clipping: 15
29
+ learning_config:
30
+ WarmUp: 100
31
+ normalize: false
32
+ dropout: 0.1
33
+ weight_decay: 1e-4
34
+ epochs: 50
35
+ lr: 0.01
36
+ # max_norm: 3
37
+ scheduler:
38
+ type: StepLR
39
+ params:
40
+ step_size: 3000
41
+ gamma: 0.8
42
+ loss:
43
+ weights: ""
44
+ type: "mpjpe"
45
+ augmentations:
46
+ random_scale:
47
+ x:
48
+ - 0.95
49
+ - 1.05
50
+ y:
51
+ - 0.90
52
+ - 1.10
53
+ z:
54
+ - 0.95
55
+ - 1.05
56
+ random_noise: ""
57
+ random_flip:
58
+ x: true
59
+ y: ""
60
+ z: true
61
+ random_rotation:
62
+ x:
63
+ - -5
64
+ - 5
65
+ y:
66
+ - -180
67
+ - 180
68
+ z:
69
+ - -5
70
+ - 5
71
+ random_translation:
72
+ x:
73
+ - -0.10
74
+ - 0.10
75
+ y:
76
+ - -0.10
77
+ - 0.10
78
+ z:
79
+ - -0.10
80
+ - 0.10
81
+ environment_config:
82
+ actions: all
83
+ evaluate_from: 0
84
+ is_norm: true
85
+ job: 16
86
+ sample_rate: 2
87
+ return_all_joints: true
88
+ save_grads: false
89
+ test_batch: 128
90
+ train_batch: 128
91
+ general_config:
92
+ data_dir: /ai-research/datasets/attention/ann_h3.6m/
93
+ experiment_name: STSGCN-tests
94
+ load_model_path: ''
95
+ log_path: /ai-research/notebooks/testing_repos/logdir/
96
+ model_name_rel_path: STSGCN-benchmark
97
+ save_all_intermediate_models: false
98
+ save_models: true
99
+ tensorboard:
100
+ num_mesh: 4
101
+ meta_config:
102
+ comment: Testing a new architecture based on STSGCN paper.
103
+ project: Attention
104
+ task: 3d keypoint prediction
105
+ version: 0.1.1
h36m_detailed/16/files/model.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ from ..layers import deformable_conv, SE
8
+
9
+ torch.manual_seed(0)
10
+
11
+
12
+ # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension)
13
+ class CNN_layer(nn.Module):
14
+ def __init__(self,
15
+ in_ch,
16
+ out_ch,
17
+ kernel_size,
18
+ dropout,
19
+ bias=True):
20
+ super(CNN_layer, self).__init__()
21
+ self.kernel_size = kernel_size
22
+ padding = (
23
+ (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained
24
+ assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1
25
+
26
+ self.block1 = [nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=(1, 1)),
27
+ nn.BatchNorm2d(out_ch),
28
+ nn.Dropout(dropout, inplace=True),
29
+ ]
30
+
31
+ self.block1 = nn.Sequential(*self.block1)
32
+
33
+ def forward(self, x):
34
+ output = self.block1(x)
35
+ return output
36
+
37
+
38
+ class FPN(nn.Module):
39
+ def __init__(self, in_ch,
40
+ out_ch,
41
+ kernel, # (3,1)
42
+ dropout,
43
+ reduction,
44
+ ):
45
+ super(FPN, self).__init__()
46
+ kernel_size = kernel if isinstance(kernel, (tuple, list)) else (kernel, kernel)
47
+ padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2)
48
+ pad1 = (padding[0], padding[1])
49
+ pad2 = (padding[0] + pad1[0], padding[1] + pad1[1])
50
+ pad3 = (padding[0] + pad2[0], padding[1] + pad2[1])
51
+ dil1 = (1, 1)
52
+ dil2 = (1 + pad1[0], 1 + pad1[1])
53
+ dil3 = (1 + pad2[0], 1 + pad2[1])
54
+ self.block1 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad1, dilation=dil1),
55
+ nn.BatchNorm2d(out_ch),
56
+ nn.Dropout(dropout, inplace=True),
57
+ nn.PReLU(),
58
+ )
59
+ self.block2 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad2, dilation=dil2),
60
+ nn.BatchNorm2d(out_ch),
61
+ nn.Dropout(dropout, inplace=True),
62
+ nn.PReLU(),
63
+ )
64
+ self.block3 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad3, dilation=dil3),
65
+ nn.BatchNorm2d(out_ch),
66
+ nn.Dropout(dropout, inplace=True),
67
+ nn.PReLU(),
68
+ )
69
+ self.pooling = nn.AdaptiveAvgPool2d((1, 1)) # Action Context.
70
+ self.compress = nn.Conv2d(out_ch * 3 + in_ch,
71
+ out_ch,
72
+ kernel_size=(1, 1)) # PRELU is outside the loop, check at the end of the code.
73
+
74
+ def forward(self, x):
75
+ b, dim, joints, seq = x.shape
76
+ global_action = F.interpolate(self.pooling(x), (joints, seq))
77
+ out = torch.cat((self.block1(x), self.block2(x), self.block3(x), global_action), dim=1)
78
+ out = self.compress(out)
79
+ return out
80
+
81
+
82
+ def mish(x):
83
+ return (x * torch.tanh(F.softplus(x)))
84
+
85
+
86
+ class ConvTemporalGraphical(nn.Module):
87
+ # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py
88
+ r"""The basic module for applying a graph convolution.
89
+ Args:
90
+ Shape:
91
+ - Input: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
92
+ - Output: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
93
+ where
94
+ :math:`N` is a batch size,
95
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
96
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
97
+ :math:`V` is the number of graph nodes.
98
+ """
99
+
100
+ def __init__(self, time_dim, joints_dim, domain, interpratable):
101
+ super(ConvTemporalGraphical, self).__init__()
102
+
103
+ if domain == "time":
104
+ # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix)
105
+ size = joints_dim
106
+ if not interpratable:
107
+ self.A = nn.Parameter(torch.FloatTensor(time_dim, size, size))
108
+ self.domain = 'nctv,tvw->nctw'
109
+ else:
110
+ self.domain = 'nctv,ntvw->nctw'
111
+ elif domain == "space":
112
+ size = time_dim
113
+ if not interpratable:
114
+ self.A = nn.Parameter(torch.FloatTensor(joints_dim, size, size))
115
+ self.domain = 'nctv,vtq->ncqv'
116
+ else:
117
+ self.domain = 'nctv,nvtq->ncqv'
118
+ if not interpratable:
119
+ stdv = 1. / math.sqrt(self.A.size(1))
120
+ self.A.data.uniform_(-stdv, stdv)
121
+
122
+ def forward(self, x):
123
+ x = torch.einsum(self.domain, (x, self.A))
124
+ return x.contiguous()
125
+
126
+
127
+ class Map2Adj(nn.Module):
128
+ def __init__(self,
129
+ in_ch,
130
+ time_dim,
131
+ joints_dim,
132
+ domain,
133
+ dropout,
134
+ ):
135
+ super(Map2Adj, self).__init__()
136
+ self.domain = domain
137
+ inter_ch = in_ch // 2
138
+ self.time_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
139
+ nn.BatchNorm2d(inter_ch),
140
+ nn.PReLU(),
141
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(time_dim, 1), bias=False),
142
+ nn.BatchNorm2d(inter_ch),
143
+ nn.Dropout(dropout, inplace=True),
144
+ nn.Conv2d(inter_ch, time_dim, kernel_size=1, bias=False),
145
+ )
146
+ self.joint_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
147
+ nn.BatchNorm2d(inter_ch),
148
+ nn.PReLU(),
149
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(1, joints_dim), bias=False),
150
+ nn.BatchNorm2d(inter_ch),
151
+ nn.Dropout(dropout, inplace=True),
152
+ nn.Conv2d(inter_ch, joints_dim, kernel_size=1, bias=False),
153
+ )
154
+
155
+ if self.domain == "space":
156
+ ch = joints_dim
157
+ self.perm1 = (0, 1, 2, 3)
158
+ self.perm2 = (0, 3, 2, 1)
159
+ if self.domain == "time":
160
+ ch = time_dim
161
+ self.perm1 = (0, 2, 1, 3)
162
+ self.perm2 = (0, 1, 2, 3)
163
+
164
+ inter_ch = ch # // 2
165
+ self.expansor = nn.Sequential(nn.Conv2d(ch, inter_ch, kernel_size=1, bias=False),
166
+ nn.BatchNorm2d(inter_ch),
167
+ nn.Dropout(dropout, inplace=True),
168
+ nn.PReLU(),
169
+ nn.Conv2d(inter_ch, ch, kernel_size=1, bias=False),
170
+ )
171
+ self.time_compress.apply(self._init_weights)
172
+ self.joint_compress.apply(self._init_weights)
173
+ self.expansor.apply(self._init_weights)
174
+
175
+ def _init_weights(self, m, gain=0.05):
176
+ if isinstance(m, nn.Linear):
177
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
178
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
179
+ torch.nn.init.xavier_normal_(m.weight, gain=gain)
180
+ if isinstance(m, nn.PReLU):
181
+ torch.nn.init.constant_(m.weight, 0.25)
182
+
183
+ def forward(self, x):
184
+ b, dims, seq, joints = x.shape
185
+ dim_seq = self.time_compress(x)
186
+ dim_space = self.joint_compress(x)
187
+ o = torch.matmul(dim_space.permute(self.perm1), dim_seq.permute(self.perm2))
188
+ Adj = self.expansor(o)
189
+ return Adj
190
+
191
+
192
+ class Domain_GCNN_layer(nn.Module):
193
+ """
194
+ Shape:
195
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
196
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
197
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
198
+ where
199
+ :math:`N` is a batch size,
200
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
201
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
202
+ :math:`V` is the number of graph nodes.
203
+ :in_ch= dimension of coordinates
204
+ : out_ch=dimension of coordinates
205
+ +
206
+ """
207
+
208
+ def __init__(self,
209
+ in_ch,
210
+ out_ch,
211
+ kernel_size,
212
+ stride,
213
+ time_dim,
214
+ joints_dim,
215
+ domain,
216
+ interpratable,
217
+ dropout,
218
+ bias=True):
219
+
220
+ super(Domain_GCNN_layer, self).__init__()
221
+ self.kernel_size = kernel_size
222
+ assert self.kernel_size[0] % 2 == 1
223
+ assert self.kernel_size[1] % 2 == 1
224
+ padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2)
225
+ self.interpratable = interpratable
226
+ self.domain = domain
227
+
228
+ self.gcn = ConvTemporalGraphical(time_dim, joints_dim, domain, interpratable)
229
+ self.tcn = nn.Sequential(nn.Conv2d(in_ch,
230
+ out_ch,
231
+ (self.kernel_size[0], self.kernel_size[1]),
232
+ (stride, stride),
233
+ padding,
234
+ ),
235
+ nn.BatchNorm2d(out_ch),
236
+ nn.Dropout(dropout, inplace=True),
237
+ )
238
+
239
+ if stride != 1 or in_ch != out_ch:
240
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
241
+ out_ch,
242
+ kernel_size=1,
243
+ stride=(1, 1)),
244
+ nn.BatchNorm2d(out_ch),
245
+ )
246
+ else:
247
+ self.residual = nn.Identity()
248
+ if self.interpratable:
249
+ self.map_to_adj = Map2Adj(in_ch,
250
+ time_dim,
251
+ joints_dim,
252
+ domain,
253
+ dropout,
254
+ )
255
+ else:
256
+ self.map_to_adj = nn.Identity()
257
+ self.prelu = nn.PReLU()
258
+
259
+ def forward(self, x):
260
+ # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size)
261
+ res = self.residual(x)
262
+ self.Adj = self.map_to_adj(x)
263
+ if self.interpratable:
264
+ self.gcn.A = self.Adj
265
+ x1 = self.gcn(x)
266
+ x2 = self.tcn(x1)
267
+ x3 = x2 + res
268
+ x4 = self.prelu(x3)
269
+ return x4
270
+
271
+
272
+ # Dynamic SpatioTemporal Decompose Graph Convolutions (DSTD-GC)
273
+ class DSTD_GC(nn.Module):
274
+ """
275
+ Shape:
276
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
277
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
278
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
279
+ where
280
+ :math:`N` is a batch size,
281
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
282
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
283
+ :math:`V` is the number of graph nodes.
284
+ : in_ch= dimension of coordinates
285
+ : out_ch=dimension of coordinates
286
+ +
287
+ """
288
+
289
+ def __init__(self,
290
+ in_ch,
291
+ out_ch,
292
+ interpratable,
293
+ kernel_size,
294
+ stride,
295
+ time_dim,
296
+ joints_dim,
297
+ reduction,
298
+ dropout):
299
+ super(DSTD_GC, self).__init__()
300
+ self.dsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
301
+ time_dim, joints_dim, "space", interpratable, dropout)
302
+ self.tsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
303
+ time_dim, joints_dim, "time", interpratable, dropout)
304
+
305
+ self.compressor = nn.Sequential(nn.Conv2d(out_ch * 2, out_ch, 1, bias=False),
306
+ nn.BatchNorm2d(out_ch),
307
+ nn.PReLU(),
308
+ SE.SELayer2d(out_ch, reduction=reduction),
309
+ )
310
+ if stride != 1 or in_ch != out_ch:
311
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
312
+ out_ch,
313
+ kernel_size=1,
314
+ stride=(1, 1)),
315
+ nn.BatchNorm2d(out_ch),
316
+ )
317
+ else:
318
+ self.residual = nn.Identity()
319
+
320
+ # Weighting features
321
+ out_ch_c = out_ch // 2 if out_ch // 2 > 1 else 1
322
+ self.global_norm = nn.BatchNorm2d(in_ch)
323
+ self.conv_s = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
324
+ nn.BatchNorm2d(out_ch_c),
325
+ nn.Dropout(dropout, inplace=True),
326
+ nn.PReLU(),
327
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
328
+ nn.BatchNorm2d(out_ch),
329
+ nn.Dropout(dropout, inplace=True),
330
+ nn.PReLU(),
331
+ )
332
+ self.conv_t = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
333
+ nn.BatchNorm2d(out_ch_c),
334
+ nn.Dropout(dropout, inplace=True),
335
+ nn.PReLU(),
336
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
337
+ nn.BatchNorm2d(out_ch),
338
+ nn.Dropout(dropout, inplace=True),
339
+ nn.PReLU(),
340
+ )
341
+ self.map_s = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
342
+ nn.BatchNorm1d(out_ch),
343
+ nn.Dropout(dropout, inplace=True),
344
+ nn.PReLU(),
345
+ nn.Linear(out_ch, out_ch, bias=False),
346
+ )
347
+ self.map_t = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
348
+ nn.BatchNorm1d(out_ch),
349
+ nn.Dropout(dropout, inplace=True),
350
+ nn.PReLU(),
351
+ nn.Linear(out_ch, out_ch, bias=False),
352
+ )
353
+ self.prelu1 = nn.Sequential(nn.BatchNorm2d(out_ch),
354
+ nn.PReLU(),
355
+ )
356
+ self.prelu2 = nn.Sequential(nn.BatchNorm2d(out_ch),
357
+ nn.PReLU(),
358
+ )
359
+
360
+ def _get_stats_(self, x):
361
+ global_avg_pool = x.mean((3, 2)).mean(1, keepdims=True)
362
+ global_avg_pool_features = x.mean(3).mean(1)
363
+ global_std_pool = x.std((3, 2)).std(1, keepdims=True)
364
+ global_std_pool_features = x.std(3).std(1)
365
+ return torch.cat((
366
+ global_avg_pool,
367
+ global_avg_pool_features,
368
+ global_std_pool,
369
+ global_std_pool_features,
370
+ ),
371
+ dim=1)
372
+
373
+ def forward(self, x):
374
+ b, dim, seq, joints = x.shape # 64, 3, 10, 22
375
+ xn = self.global_norm(x)
376
+
377
+ stats = self._get_stats_(xn)
378
+ w1 = torch.cat((self.conv_s(xn).view(b, -1), stats), dim=1)
379
+ stats = self._get_stats_(xn)
380
+ w2 = torch.cat((self.conv_t(xn).view(b, -1), stats), dim=1)
381
+ self.w1 = self.map_s(w1)
382
+ self.w2 = self.map_t(w2)
383
+ w1 = self.w1[..., None, None]
384
+ w2 = self.w2[..., None, None]
385
+
386
+ x1 = self.dsgn(xn)
387
+ x2 = self.tsgn(xn)
388
+ out = torch.cat((self.prelu1(w1 * x1), self.prelu2(w2 * x2)), dim=1)
389
+ out = self.compressor(out)
390
+ return torch.clip(out + self.residual(xn), -1e5, 1e5)
391
+
392
+
393
+ class ContextLayer(nn.Module):
394
+ def __init__(self,
395
+ in_ch,
396
+ hidden_ch,
397
+ output_seq,
398
+ input_seq,
399
+ joints,
400
+ dims=3,
401
+ reduction=8,
402
+ dropout=0.1,
403
+ ):
404
+ super(ContextLayer, self).__init__()
405
+ self.n_output = output_seq
406
+ self.n_joints = joints
407
+ self.n_input = input_seq
408
+ self.context_conv1 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
409
+ nn.BatchNorm2d(hidden_ch),
410
+ nn.PReLU(),
411
+ )
412
+
413
+ self.context_conv2 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, (input_seq, 1), bias=False),
414
+ nn.BatchNorm2d(hidden_ch),
415
+ nn.PReLU(),
416
+ )
417
+ self.context_conv3 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
418
+ nn.BatchNorm2d(hidden_ch),
419
+ nn.PReLU(),
420
+ )
421
+ self.map1 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
422
+ nn.Dropout(dropout, inplace=True),
423
+ nn.PReLU(),
424
+ )
425
+ self.map2 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
426
+ nn.Dropout(dropout, inplace=True),
427
+ nn.PReLU(),
428
+ )
429
+ self.map3 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
430
+ nn.Dropout(dropout, inplace=True),
431
+ nn.PReLU(),
432
+ )
433
+
434
+ self.fmap_s = nn.Sequential(nn.Linear(self.n_output * 3, self.n_joints, bias=False),
435
+ nn.BatchNorm1d(self.n_joints),
436
+ nn.Dropout(dropout, inplace=True), )
437
+
438
+ self.fmap_t = nn.Sequential(nn.Linear(self.n_output * 3, self.n_output, bias=False),
439
+ nn.BatchNorm1d(self.n_output),
440
+ nn.Dropout(dropout, inplace=True), )
441
+
442
+ # inter_ch = self.n_joints # // 2
443
+ self.norm_map = nn.Sequential(nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
444
+ nn.BatchNorm1d(self.n_output),
445
+ nn.Dropout(dropout, inplace=True),
446
+ nn.PReLU(),
447
+ SE.SELayer1d(self.n_output, reduction=reduction),
448
+ nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
449
+ nn.BatchNorm1d(self.n_output),
450
+ nn.Dropout(dropout, inplace=True),
451
+ nn.PReLU(),
452
+ )
453
+
454
+ self.fconv = nn.Sequential(nn.Conv2d(1, dims, 1, bias=False),
455
+ nn.BatchNorm2d(dims),
456
+ nn.PReLU(),
457
+ nn.Conv2d(dims, dims, 1, bias=False),
458
+ nn.BatchNorm2d(dims),
459
+ nn.PReLU(),
460
+ )
461
+ self.SE = SE.SELayer2d(self.n_output, reduction=reduction)
462
+
463
+ def forward(self, x):
464
+ b, _, seq, joint_dim = x.shape
465
+ y1 = self.context_conv1(x).max(-1)[0].max(-1)[0]
466
+ y2 = self.context_conv2(x).view(b, -1, joint_dim).max(-1)[0]
467
+ ym = self.context_conv3(x).mean((2, 3))
468
+ y = torch.cat((self.map1(y1), self.map2(y2), self.map3(ym)), dim=1)
469
+ self.joints = self.fmap_s(y)
470
+ self.displacements = self.fmap_t(y) # .cumsum(1)
471
+ self.seq_joints = torch.bmm(self.displacements.unsqueeze(2), self.joints.unsqueeze(1))
472
+ self.seq_joints_n = self.norm_map(self.seq_joints)
473
+ self.seq_joints_dims = self.fconv(self.seq_joints_n.view(b, 1, self.n_output, self.n_joints))
474
+ o = self.SE(self.seq_joints_dims.permute(0, 2, 3, 1))
475
+ return o
476
+
477
+
478
+ class MlpMixer_ext(nn.Module):
479
+ """
480
+ Shape:
481
+ - Input[0]: Input sequence in :math:`(N, in_ch,T_in, V)` format
482
+ - Output[0]: Output sequence in :math:`(N,T_out,in_ch, V)` format
483
+ where
484
+ :math:`N` is a batch size,
485
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
486
+ :math:`V` is the number of graph nodes.
487
+ :in_ch=number of channels for the coordiantes(default=3)
488
+ +
489
+ """
490
+
491
+ def __init__(self, arch, learn):
492
+ super(MlpMixer_ext, self).__init__()
493
+ self.clipping = arch.model_params.clipping
494
+
495
+ self.n_input = arch.model_params.input_n
496
+ self.n_output = arch.model_params.output_n
497
+ self.n_joints = arch.model_params.joints
498
+ self.n_txcnn_layers = arch.model_params.n_txcnn_layers
499
+ self.txc_kernel_size = [arch.model_params.txc_kernel_size] * 2
500
+ self.input_gcn = arch.model_params.input_gcn
501
+ self.output_gcn = arch.model_params.output_gcn
502
+ self.reduction = arch.model_params.reduction
503
+ self.hidden_dim = arch.model_params.hidden_dim
504
+
505
+ self.st_gcnns = nn.ModuleList()
506
+ self.txcnns = nn.ModuleList()
507
+ self.se = nn.ModuleList()
508
+
509
+ self.in_conv = nn.ModuleList()
510
+ self.context_layer = nn.ModuleList()
511
+ self.trans = nn.ModuleList()
512
+ self.in_ch = 10
513
+ self.model_tx = self.input_gcn.model_complexity.copy()
514
+ self.model_tx.insert(0, 1) # add 1 in the position 0.
515
+
516
+ self.input_gcn.model_complexity.insert(0, self.in_ch)
517
+ self.input_gcn.model_complexity.append(self.in_ch)
518
+ # self.input_gcn.interpretable.insert(0, True)
519
+ # self.input_gcn.interpretable.append(False)
520
+ for i in range(len(self.input_gcn.model_complexity) - 1):
521
+ self.st_gcnns.append(DSTD_GC(self.input_gcn.model_complexity[i],
522
+ self.input_gcn.model_complexity[i + 1],
523
+ self.input_gcn.interpretable[i],
524
+ [1, 1], 1, self.n_input, self.n_joints, self.reduction, learn.dropout))
525
+
526
+ self.context_layer = ContextLayer(1, self.hidden_dim,
527
+ self.n_output, self.n_output, self.n_joints,
528
+ 3, self.reduction, learn.dropout
529
+ )
530
+
531
+ # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V)
532
+ # with kernel_size[3,3] the dimensions of C,V will be maintained
533
+ self.txcnns.append(FPN(self.n_input, self.n_output, self.txc_kernel_size, 0., self.reduction))
534
+ for i in range(1, self.n_txcnn_layers):
535
+ self.txcnns.append(FPN(self.n_output, self.n_output, self.txc_kernel_size, 0., self.reduction))
536
+
537
+ self.prelus = nn.ModuleList()
538
+ for j in range(self.n_txcnn_layers):
539
+ self.prelus.append(nn.PReLU())
540
+
541
+ self.dim_conversor = nn.Sequential(nn.Conv2d(self.in_ch, 3, 1, bias=False),
542
+ nn.BatchNorm2d(3),
543
+ nn.PReLU(),
544
+ nn.Conv2d(3, 3, 1, bias=False),
545
+ nn.PReLU(3), )
546
+
547
+ self.st_gcnns_o = nn.ModuleList()
548
+ self.output_gcn.model_complexity.insert(0, 3)
549
+ for i in range(len(self.output_gcn.model_complexity) - 1):
550
+ self.st_gcnns_o.append(DSTD_GC(self.output_gcn.model_complexity[i],
551
+ self.output_gcn.model_complexity[i + 1],
552
+ self.output_gcn.interpretable[i],
553
+ [1, 1], 1, self.n_joints, self.n_output, self.reduction, learn.dropout))
554
+
555
+ self.st_gcnns_o.apply(self._init_weights)
556
+ self.st_gcnns.apply(self._init_weights)
557
+ self.txcnns.apply(self._init_weights)
558
+
559
+ def _init_weights(self, m, gain=0.1):
560
+ if isinstance(m, nn.Linear):
561
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
562
+ # if isinstance(m, (nn.Conv2d, nn.Conv1d)):
563
+ # torch.nn.init.xavier_normal_(m.weight, gain=gain)
564
+ if isinstance(m, nn.PReLU):
565
+ torch.nn.init.constant_(m.weight, 0.25)
566
+
567
+ def forward(self, x):
568
+ b, seq, joints, dim = x.shape
569
+ vel = torch.zeros_like(x)
570
+ vel[:, :-1] = torch.diff(x, dim=1)
571
+ vel[:, -1] = x[:, -1]
572
+ acc = torch.zeros_like(x)
573
+ acc[:, :-1] = torch.diff(vel, dim=1)
574
+ acc[:, -1] = vel[:, -1]
575
+ x1 = torch.cat((x, acc, vel, torch.norm(vel, dim=-1, keepdim=True)), dim=-1)
576
+ x2 = x1.permute((0, 3, 1, 2)) # (torch.Size([64, 10, 22, 7])
577
+ x3 = x2
578
+
579
+ for i in range(len(self.st_gcnns)):
580
+ x3 = self.st_gcnns[i](x3)
581
+
582
+ x5 = x3.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV)
583
+
584
+ x6 = self.prelus[0](self.txcnns[0](x5))
585
+ for i in range(1, self.n_txcnn_layers):
586
+ x6 = self.prelus[i](self.txcnns[i](x6)) + x6 # residual connection
587
+
588
+ x6 = self.dim_conversor(x6.permute(0, 2, 1, 3)).permute(0, 2, 3, 1)
589
+ x7 = x6.cumsum(1)
590
+
591
+ act = self.context_layer(x7.reshape(b, 1, self.n_output, joints * x7.shape[-1]))
592
+ x8 = x7.permute(0, 3, 2, 1)
593
+ for i in range(len(self.st_gcnns_o)):
594
+ x8 = self.st_gcnns_o[i](x8)
595
+ x9 = x8.permute(0, 3, 2, 1) + act
596
+
597
+ return x[:, -1:] + x9,
h36m_detailed/16/metric_full_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5da750156c6ce72e0a130f4fe2b8610a18bea6966ab8a03dafe39e9349b638cc
3
+ size 2049706
h36m_detailed/16/metric_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:125c475dd472bfa25df2d197c231fbd70efe418418eb5360f8dafaaad7368110
3
+ size 2052431
h36m_detailed/16/metric_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:527caffa58e94cec2ae96719ef93b2e32360b7c267751ed413c6f7054f2b8c3b
3
+ size 2052609
h36m_detailed/16/metric_train.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a890ffa3e5da0b224111a39d9458ca20364090624b0527a9b7acbb8c585e7ecb
3
+ size 2033364
h36m_detailed/16/sample_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d3213309871efe19a835db7b26279cdcc7088eb23d153bc150acd6f9f10be31
3
+ size 29579719
h36m_detailed/32/files/CISTGCN-benchmark-best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d1d356c1b73f1bc6d0d056e643f345a4727373779fe8e9aabbd23b58c3ca343
3
+ size 8133899
h36m_detailed/32/files/CISTGCN-benchmark-last.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a806dce099cf22d3b2989ae971e7922bfb050f3f134f74e9765c2e37e81ebb7
3
+ size 8127691
h36m_detailed/32/files/config-20221111_1223-id0734.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture_config:
2
+ model: MlpMixer_ext_1
3
+ model_params:
4
+ input_n: 10
5
+ joints: 22
6
+ output_n: 25
7
+ n_txcnn_layers: 4
8
+ txc_kernel_size: 3
9
+ reduction: 8
10
+ hidden_dim: 64
11
+ input_gcn:
12
+ model_complexity:
13
+ - 32
14
+ - 32
15
+ - 32
16
+ - 32
17
+ interpretable:
18
+ - true
19
+ - true
20
+ - true
21
+ - true
22
+ - true
23
+ output_gcn:
24
+ model_complexity:
25
+ - 3
26
+ interpretable:
27
+ - true
28
+ clipping: 15
29
+ learning_config:
30
+ WarmUp: 100
31
+ normalize: false
32
+ dropout: 0.1
33
+ weight_decay: 1e-4
34
+ epochs: 50
35
+ lr: 0.01
36
+ # max_norm: 3
37
+ scheduler:
38
+ type: StepLR
39
+ params:
40
+ step_size: 3000
41
+ gamma: 0.8
42
+ loss:
43
+ weights: ""
44
+ type: "mpjpe"
45
+ augmentations:
46
+ random_scale:
47
+ x:
48
+ - 0.95
49
+ - 1.05
50
+ y:
51
+ - 0.90
52
+ - 1.10
53
+ z:
54
+ - 0.95
55
+ - 1.05
56
+ random_noise: ""
57
+ random_flip:
58
+ x: true
59
+ y: ""
60
+ z: true
61
+ random_rotation:
62
+ x:
63
+ - -5
64
+ - 5
65
+ y:
66
+ - -180
67
+ - 180
68
+ z:
69
+ - -5
70
+ - 5
71
+ random_translation:
72
+ x:
73
+ - -0.10
74
+ - 0.10
75
+ y:
76
+ - -0.10
77
+ - 0.10
78
+ z:
79
+ - -0.10
80
+ - 0.10
81
+ environment_config:
82
+ actions: all
83
+ evaluate_from: 0
84
+ is_norm: true
85
+ job: 16
86
+ sample_rate: 2
87
+ return_all_joints: true
88
+ save_grads: false
89
+ test_batch: 128
90
+ train_batch: 128
91
+ general_config:
92
+ data_dir: /ai-research/datasets/attention/ann_h3.6m/
93
+ experiment_name: STSGCN-tests
94
+ load_model_path: ''
95
+ log_path: /ai-research/notebooks/testing_repos/logdir/
96
+ model_name_rel_path: STSGCN-benchmark
97
+ save_all_intermediate_models: false
98
+ save_models: true
99
+ tensorboard:
100
+ num_mesh: 4
101
+ meta_config:
102
+ comment: Testing a new architecture based on STSGCN paper.
103
+ project: Attention
104
+ task: 3d keypoint prediction
105
+ version: 0.1.1
h36m_detailed/32/files/model.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ from ..layers import deformable_conv, SE
8
+
9
+ torch.manual_seed(0)
10
+
11
+
12
+ # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension)
13
+ class CNN_layer(nn.Module):
14
+ def __init__(self,
15
+ in_ch,
16
+ out_ch,
17
+ kernel_size,
18
+ dropout,
19
+ bias=True):
20
+ super(CNN_layer, self).__init__()
21
+ self.kernel_size = kernel_size
22
+ padding = (
23
+ (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained
24
+ assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1
25
+
26
+ self.block1 = [nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=(1, 1)),
27
+ nn.BatchNorm2d(out_ch),
28
+ nn.Dropout(dropout, inplace=True),
29
+ ]
30
+
31
+ self.block1 = nn.Sequential(*self.block1)
32
+
33
+ def forward(self, x):
34
+ output = self.block1(x)
35
+ return output
36
+
37
+
38
+ class FPN(nn.Module):
39
+ def __init__(self, in_ch,
40
+ out_ch,
41
+ kernel, # (3,1)
42
+ dropout,
43
+ reduction,
44
+ ):
45
+ super(FPN, self).__init__()
46
+ kernel_size = kernel if isinstance(kernel, (tuple, list)) else (kernel, kernel)
47
+ padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2)
48
+ pad1 = (padding[0], padding[1])
49
+ pad2 = (padding[0] + pad1[0], padding[1] + pad1[1])
50
+ pad3 = (padding[0] + pad2[0], padding[1] + pad2[1])
51
+ dil1 = (1, 1)
52
+ dil2 = (1 + pad1[0], 1 + pad1[1])
53
+ dil3 = (1 + pad2[0], 1 + pad2[1])
54
+ self.block1 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad1, dilation=dil1),
55
+ nn.BatchNorm2d(out_ch),
56
+ nn.Dropout(dropout, inplace=True),
57
+ nn.PReLU(),
58
+ )
59
+ self.block2 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad2, dilation=dil2),
60
+ nn.BatchNorm2d(out_ch),
61
+ nn.Dropout(dropout, inplace=True),
62
+ nn.PReLU(),
63
+ )
64
+ self.block3 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad3, dilation=dil3),
65
+ nn.BatchNorm2d(out_ch),
66
+ nn.Dropout(dropout, inplace=True),
67
+ nn.PReLU(),
68
+ )
69
+ self.pooling = nn.AdaptiveAvgPool2d((1, 1)) # Action Context.
70
+ self.compress = nn.Conv2d(out_ch * 3 + in_ch,
71
+ out_ch,
72
+ kernel_size=(1, 1)) # PRELU is outside the loop, check at the end of the code.
73
+
74
+ def forward(self, x):
75
+ b, dim, joints, seq = x.shape
76
+ global_action = F.interpolate(self.pooling(x), (joints, seq))
77
+ out = torch.cat((self.block1(x), self.block2(x), self.block3(x), global_action), dim=1)
78
+ out = self.compress(out)
79
+ return out
80
+
81
+
82
+ def mish(x):
83
+ return (x * torch.tanh(F.softplus(x)))
84
+
85
+
86
+ class ConvTemporalGraphical(nn.Module):
87
+ # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py
88
+ r"""The basic module for applying a graph convolution.
89
+ Args:
90
+ Shape:
91
+ - Input: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
92
+ - Output: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
93
+ where
94
+ :math:`N` is a batch size,
95
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
96
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
97
+ :math:`V` is the number of graph nodes.
98
+ """
99
+
100
+ def __init__(self, time_dim, joints_dim, domain, interpratable):
101
+ super(ConvTemporalGraphical, self).__init__()
102
+
103
+ if domain == "time":
104
+ # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix)
105
+ size = joints_dim
106
+ if not interpratable:
107
+ self.A = nn.Parameter(torch.FloatTensor(time_dim, size, size))
108
+ self.domain = 'nctv,tvw->nctw'
109
+ else:
110
+ self.domain = 'nctv,ntvw->nctw'
111
+ elif domain == "space":
112
+ size = time_dim
113
+ if not interpratable:
114
+ self.A = nn.Parameter(torch.FloatTensor(joints_dim, size, size))
115
+ self.domain = 'nctv,vtq->ncqv'
116
+ else:
117
+ self.domain = 'nctv,nvtq->ncqv'
118
+ if not interpratable:
119
+ stdv = 1. / math.sqrt(self.A.size(1))
120
+ self.A.data.uniform_(-stdv, stdv)
121
+
122
+ def forward(self, x):
123
+ x = torch.einsum(self.domain, (x, self.A))
124
+ return x.contiguous()
125
+
126
+
127
+ class Map2Adj(nn.Module):
128
+ def __init__(self,
129
+ in_ch,
130
+ time_dim,
131
+ joints_dim,
132
+ domain,
133
+ dropout,
134
+ ):
135
+ super(Map2Adj, self).__init__()
136
+ self.domain = domain
137
+ inter_ch = in_ch // 2
138
+ self.time_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
139
+ nn.BatchNorm2d(inter_ch),
140
+ nn.PReLU(),
141
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(time_dim, 1), bias=False),
142
+ nn.BatchNorm2d(inter_ch),
143
+ nn.Dropout(dropout, inplace=True),
144
+ nn.Conv2d(inter_ch, time_dim, kernel_size=1, bias=False),
145
+ )
146
+ self.joint_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
147
+ nn.BatchNorm2d(inter_ch),
148
+ nn.PReLU(),
149
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(1, joints_dim), bias=False),
150
+ nn.BatchNorm2d(inter_ch),
151
+ nn.Dropout(dropout, inplace=True),
152
+ nn.Conv2d(inter_ch, joints_dim, kernel_size=1, bias=False),
153
+ )
154
+
155
+ if self.domain == "space":
156
+ ch = joints_dim
157
+ self.perm1 = (0, 1, 2, 3)
158
+ self.perm2 = (0, 3, 2, 1)
159
+ if self.domain == "time":
160
+ ch = time_dim
161
+ self.perm1 = (0, 2, 1, 3)
162
+ self.perm2 = (0, 1, 2, 3)
163
+
164
+ inter_ch = ch # // 2
165
+ self.expansor = nn.Sequential(nn.Conv2d(ch, inter_ch, kernel_size=1, bias=False),
166
+ nn.BatchNorm2d(inter_ch),
167
+ nn.Dropout(dropout, inplace=True),
168
+ nn.PReLU(),
169
+ nn.Conv2d(inter_ch, ch, kernel_size=1, bias=False),
170
+ )
171
+ self.time_compress.apply(self._init_weights)
172
+ self.joint_compress.apply(self._init_weights)
173
+ self.expansor.apply(self._init_weights)
174
+
175
+ def _init_weights(self, m, gain=0.05):
176
+ if isinstance(m, nn.Linear):
177
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
178
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
179
+ torch.nn.init.xavier_normal_(m.weight, gain=gain)
180
+ if isinstance(m, nn.PReLU):
181
+ torch.nn.init.constant_(m.weight, 0.25)
182
+
183
+ def forward(self, x):
184
+ b, dims, seq, joints = x.shape
185
+ dim_seq = self.time_compress(x)
186
+ dim_space = self.joint_compress(x)
187
+ o = torch.matmul(dim_space.permute(self.perm1), dim_seq.permute(self.perm2))
188
+ Adj = self.expansor(o)
189
+ return Adj
190
+
191
+
192
+ class Domain_GCNN_layer(nn.Module):
193
+ """
194
+ Shape:
195
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
196
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
197
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
198
+ where
199
+ :math:`N` is a batch size,
200
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
201
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
202
+ :math:`V` is the number of graph nodes.
203
+ :in_ch= dimension of coordinates
204
+ : out_ch=dimension of coordinates
205
+ +
206
+ """
207
+
208
+ def __init__(self,
209
+ in_ch,
210
+ out_ch,
211
+ kernel_size,
212
+ stride,
213
+ time_dim,
214
+ joints_dim,
215
+ domain,
216
+ interpratable,
217
+ dropout,
218
+ bias=True):
219
+
220
+ super(Domain_GCNN_layer, self).__init__()
221
+ self.kernel_size = kernel_size
222
+ assert self.kernel_size[0] % 2 == 1
223
+ assert self.kernel_size[1] % 2 == 1
224
+ padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2)
225
+ self.interpratable = interpratable
226
+ self.domain = domain
227
+
228
+ self.gcn = ConvTemporalGraphical(time_dim, joints_dim, domain, interpratable)
229
+ self.tcn = nn.Sequential(nn.Conv2d(in_ch,
230
+ out_ch,
231
+ (self.kernel_size[0], self.kernel_size[1]),
232
+ (stride, stride),
233
+ padding,
234
+ ),
235
+ nn.BatchNorm2d(out_ch),
236
+ nn.Dropout(dropout, inplace=True),
237
+ )
238
+
239
+ if stride != 1 or in_ch != out_ch:
240
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
241
+ out_ch,
242
+ kernel_size=1,
243
+ stride=(1, 1)),
244
+ nn.BatchNorm2d(out_ch),
245
+ )
246
+ else:
247
+ self.residual = nn.Identity()
248
+ if self.interpratable:
249
+ self.map_to_adj = Map2Adj(in_ch,
250
+ time_dim,
251
+ joints_dim,
252
+ domain,
253
+ dropout,
254
+ )
255
+ else:
256
+ self.map_to_adj = nn.Identity()
257
+ self.prelu = nn.PReLU()
258
+
259
+ def forward(self, x):
260
+ # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size)
261
+ res = self.residual(x)
262
+ self.Adj = self.map_to_adj(x)
263
+ if self.interpratable:
264
+ self.gcn.A = self.Adj
265
+ x1 = self.gcn(x)
266
+ x2 = self.tcn(x1)
267
+ x3 = x2 + res
268
+ x4 = self.prelu(x3)
269
+ return x4
270
+
271
+
272
+ # Dynamic SpatioTemporal Decompose Graph Convolutions (DSTD-GC)
273
+ class DSTD_GC(nn.Module):
274
+ """
275
+ Shape:
276
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
277
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
278
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
279
+ where
280
+ :math:`N` is a batch size,
281
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
282
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
283
+ :math:`V` is the number of graph nodes.
284
+ : in_ch= dimension of coordinates
285
+ : out_ch=dimension of coordinates
286
+ +
287
+ """
288
+
289
+ def __init__(self,
290
+ in_ch,
291
+ out_ch,
292
+ interpratable,
293
+ kernel_size,
294
+ stride,
295
+ time_dim,
296
+ joints_dim,
297
+ reduction,
298
+ dropout):
299
+ super(DSTD_GC, self).__init__()
300
+ self.dsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
301
+ time_dim, joints_dim, "space", interpratable, dropout)
302
+ self.tsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
303
+ time_dim, joints_dim, "time", interpratable, dropout)
304
+
305
+ self.compressor = nn.Sequential(nn.Conv2d(out_ch * 2, out_ch, 1, bias=False),
306
+ nn.BatchNorm2d(out_ch),
307
+ nn.PReLU(),
308
+ SE.SELayer2d(out_ch, reduction=reduction),
309
+ )
310
+ if stride != 1 or in_ch != out_ch:
311
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
312
+ out_ch,
313
+ kernel_size=1,
314
+ stride=(1, 1)),
315
+ nn.BatchNorm2d(out_ch),
316
+ )
317
+ else:
318
+ self.residual = nn.Identity()
319
+
320
+ # Weighting features
321
+ out_ch_c = out_ch // 2 if out_ch // 2 > 1 else 1
322
+ self.global_norm = nn.BatchNorm2d(in_ch)
323
+ self.conv_s = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
324
+ nn.BatchNorm2d(out_ch_c),
325
+ nn.Dropout(dropout, inplace=True),
326
+ nn.PReLU(),
327
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
328
+ nn.BatchNorm2d(out_ch),
329
+ nn.Dropout(dropout, inplace=True),
330
+ nn.PReLU(),
331
+ )
332
+ self.conv_t = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
333
+ nn.BatchNorm2d(out_ch_c),
334
+ nn.Dropout(dropout, inplace=True),
335
+ nn.PReLU(),
336
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
337
+ nn.BatchNorm2d(out_ch),
338
+ nn.Dropout(dropout, inplace=True),
339
+ nn.PReLU(),
340
+ )
341
+ self.map_s = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
342
+ nn.BatchNorm1d(out_ch),
343
+ nn.Dropout(dropout, inplace=True),
344
+ nn.PReLU(),
345
+ nn.Linear(out_ch, out_ch, bias=False),
346
+ )
347
+ self.map_t = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
348
+ nn.BatchNorm1d(out_ch),
349
+ nn.Dropout(dropout, inplace=True),
350
+ nn.PReLU(),
351
+ nn.Linear(out_ch, out_ch, bias=False),
352
+ )
353
+ self.prelu1 = nn.Sequential(nn.BatchNorm2d(out_ch),
354
+ nn.PReLU(),
355
+ )
356
+ self.prelu2 = nn.Sequential(nn.BatchNorm2d(out_ch),
357
+ nn.PReLU(),
358
+ )
359
+
360
+ def _get_stats_(self, x):
361
+ global_avg_pool = x.mean((3, 2)).mean(1, keepdims=True)
362
+ global_avg_pool_features = x.mean(3).mean(1)
363
+ global_std_pool = x.std((3, 2)).std(1, keepdims=True)
364
+ global_std_pool_features = x.std(3).std(1)
365
+ return torch.cat((
366
+ global_avg_pool,
367
+ global_avg_pool_features,
368
+ global_std_pool,
369
+ global_std_pool_features,
370
+ ),
371
+ dim=1)
372
+
373
+ def forward(self, x):
374
+ b, dim, seq, joints = x.shape # 64, 3, 10, 22
375
+ xn = self.global_norm(x)
376
+
377
+ stats = self._get_stats_(xn)
378
+ w1 = torch.cat((self.conv_s(xn).view(b, -1), stats), dim=1)
379
+ stats = self._get_stats_(xn)
380
+ w2 = torch.cat((self.conv_t(xn).view(b, -1), stats), dim=1)
381
+ self.w1 = self.map_s(w1)
382
+ self.w2 = self.map_t(w2)
383
+ w1 = self.w1[..., None, None]
384
+ w2 = self.w2[..., None, None]
385
+
386
+ x1 = self.dsgn(xn)
387
+ x2 = self.tsgn(xn)
388
+ out = torch.cat((self.prelu1(w1 * x1), self.prelu2(w2 * x2)), dim=1)
389
+ out = self.compressor(out)
390
+ return out + self.residual(xn)
391
+
392
+
393
+ class ContextLayer(nn.Module):
394
+ def __init__(self,
395
+ in_ch,
396
+ hidden_ch,
397
+ output_seq,
398
+ input_seq,
399
+ joints,
400
+ dims=3,
401
+ reduction=8,
402
+ dropout=0.1,
403
+ ):
404
+ super(ContextLayer, self).__init__()
405
+ self.n_output = output_seq
406
+ self.n_joints = joints
407
+ self.n_input = input_seq
408
+ self.context_conv1 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
409
+ nn.BatchNorm2d(hidden_ch),
410
+ nn.PReLU(),
411
+ )
412
+
413
+ self.context_conv2 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, (input_seq, 1), bias=False),
414
+ nn.BatchNorm2d(hidden_ch),
415
+ nn.PReLU(),
416
+ )
417
+ self.context_conv3 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
418
+ nn.BatchNorm2d(hidden_ch),
419
+ nn.PReLU(),
420
+ )
421
+ self.map1 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
422
+ nn.Dropout(dropout, inplace=True),
423
+ nn.PReLU(),
424
+ )
425
+ self.map2 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
426
+ nn.Dropout(dropout, inplace=True),
427
+ nn.PReLU(),
428
+ )
429
+ self.map3 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
430
+ nn.Dropout(dropout, inplace=True),
431
+ nn.PReLU(),
432
+ )
433
+
434
+ self.fmap_s = nn.Sequential(nn.Linear(self.n_output * 3, self.n_joints, bias=False),
435
+ nn.BatchNorm1d(self.n_joints),
436
+ nn.Dropout(dropout, inplace=True), )
437
+
438
+ self.fmap_t = nn.Sequential(nn.Linear(self.n_output * 3, self.n_output, bias=False),
439
+ nn.BatchNorm1d(self.n_output),
440
+ nn.Dropout(dropout, inplace=True), )
441
+
442
+ # inter_ch = self.n_joints # // 2
443
+ self.norm_map = nn.Sequential(nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
444
+ nn.BatchNorm1d(self.n_output),
445
+ nn.Dropout(dropout, inplace=True),
446
+ nn.PReLU(),
447
+ SE.SELayer1d(self.n_output, reduction=reduction),
448
+ nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
449
+ nn.BatchNorm1d(self.n_output),
450
+ nn.Dropout(dropout, inplace=True),
451
+ nn.PReLU(),
452
+ )
453
+
454
+ self.fconv = nn.Sequential(nn.Conv2d(1, dims, 1, bias=False),
455
+ nn.BatchNorm2d(dims),
456
+ nn.PReLU(),
457
+ nn.Conv2d(dims, dims, 1, bias=False),
458
+ nn.BatchNorm2d(dims),
459
+ nn.PReLU(),
460
+ )
461
+ self.SE = SE.SELayer2d(self.n_output, reduction=reduction)
462
+
463
+ def forward(self, x):
464
+ b, _, seq, joint_dim = x.shape
465
+ y1 = self.context_conv1(x).max(-1)[0].max(-1)[0]
466
+ y2 = self.context_conv2(x).view(b, -1, joint_dim).max(-1)[0]
467
+ ym = self.context_conv3(x).mean((2, 3))
468
+ y = torch.cat((self.map1(y1), self.map2(y2), self.map3(ym)), dim=1)
469
+ self.joints = self.fmap_s(y)
470
+ self.displacements = self.fmap_t(y) # .cumsum(1)
471
+ self.seq_joints = torch.bmm(self.displacements.unsqueeze(2), self.joints.unsqueeze(1))
472
+ self.seq_joints_n = self.norm_map(self.seq_joints)
473
+ self.seq_joints_dims = self.fconv(self.seq_joints_n.view(b, 1, self.n_output, self.n_joints))
474
+ o = self.SE(self.seq_joints_dims.permute(0, 2, 3, 1))
475
+ return o
476
+
477
+
478
+ class MlpMixer_ext(nn.Module):
479
+ """
480
+ Shape:
481
+ - Input[0]: Input sequence in :math:`(N, in_ch,T_in, V)` format
482
+ - Output[0]: Output sequence in :math:`(N,T_out,in_ch, V)` format
483
+ where
484
+ :math:`N` is a batch size,
485
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
486
+ :math:`V` is the number of graph nodes.
487
+ :in_ch=number of channels for the coordiantes(default=3)
488
+ +
489
+ """
490
+
491
+ def __init__(self, arch, learn):
492
+ super(MlpMixer_ext, self).__init__()
493
+ self.clipping = arch.model_params.clipping
494
+
495
+ self.n_input = arch.model_params.input_n
496
+ self.n_output = arch.model_params.output_n
497
+ self.n_joints = arch.model_params.joints
498
+ self.n_txcnn_layers = arch.model_params.n_txcnn_layers
499
+ self.txc_kernel_size = [arch.model_params.txc_kernel_size] * 2
500
+ self.input_gcn = arch.model_params.input_gcn
501
+ self.output_gcn = arch.model_params.output_gcn
502
+ self.reduction = arch.model_params.reduction
503
+ self.hidden_dim = arch.model_params.hidden_dim
504
+
505
+ self.st_gcnns = nn.ModuleList()
506
+ self.txcnns = nn.ModuleList()
507
+ self.se = nn.ModuleList()
508
+
509
+ self.in_conv = nn.ModuleList()
510
+ self.context_layer = nn.ModuleList()
511
+ self.trans = nn.ModuleList()
512
+ self.in_ch = 10
513
+ self.model_tx = self.input_gcn.model_complexity.copy()
514
+ self.model_tx.insert(0, 1) # add 1 in the position 0.
515
+
516
+ self.input_gcn.model_complexity.insert(0, self.in_ch)
517
+ self.input_gcn.model_complexity.append(self.in_ch)
518
+ # self.input_gcn.interpretable.insert(0, True)
519
+ # self.input_gcn.interpretable.append(False)
520
+ for i in range(len(self.input_gcn.model_complexity) - 1):
521
+ self.st_gcnns.append(DSTD_GC(self.input_gcn.model_complexity[i],
522
+ self.input_gcn.model_complexity[i + 1],
523
+ self.input_gcn.interpretable[i],
524
+ [1, 1], 1, self.n_input, self.n_joints, self.reduction, learn.dropout))
525
+
526
+ self.context_layer = ContextLayer(1, self.hidden_dim,
527
+ self.n_output, self.n_output, self.n_joints,
528
+ 3, self.reduction, learn.dropout
529
+ )
530
+
531
+ # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V)
532
+ # with kernel_size[3,3] the dimensions of C,V will be maintained
533
+ self.txcnns.append(FPN(self.n_input, self.n_output, self.txc_kernel_size, 0., self.reduction))
534
+ for i in range(1, self.n_txcnn_layers):
535
+ self.txcnns.append(FPN(self.n_output, self.n_output, self.txc_kernel_size, 0., self.reduction))
536
+
537
+ self.prelus = nn.ModuleList()
538
+ for j in range(self.n_txcnn_layers):
539
+ self.prelus.append(nn.PReLU())
540
+
541
+ self.dim_conversor = nn.Sequential(nn.Conv2d(self.in_ch, 3, 1, bias=False),
542
+ nn.BatchNorm2d(3),
543
+ nn.PReLU(),
544
+ nn.Conv2d(3, 3, 1, bias=False),
545
+ nn.PReLU(3), )
546
+
547
+ self.st_gcnns_o = nn.ModuleList()
548
+ self.output_gcn.model_complexity.insert(0, 3)
549
+ for i in range(len(self.output_gcn.model_complexity) - 1):
550
+ self.st_gcnns_o.append(DSTD_GC(self.output_gcn.model_complexity[i],
551
+ self.output_gcn.model_complexity[i + 1],
552
+ self.output_gcn.interpretable[i],
553
+ [1, 1], 1, self.n_joints, self.n_output, self.reduction, learn.dropout))
554
+
555
+ self.st_gcnns_o.apply(self._init_weights)
556
+ self.st_gcnns.apply(self._init_weights)
557
+ self.txcnns.apply(self._init_weights)
558
+
559
+ def _init_weights(self, m, gain=0.1):
560
+ if isinstance(m, nn.Linear):
561
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
562
+ # if isinstance(m, (nn.Conv2d, nn.Conv1d)):
563
+ # torch.nn.init.xavier_normal_(m.weight, gain=gain)
564
+ if isinstance(m, nn.PReLU):
565
+ torch.nn.init.constant_(m.weight, 0.25)
566
+
567
+ def forward(self, x):
568
+ b, seq, joints, dim = x.shape
569
+ vel = torch.zeros_like(x)
570
+ vel[:, :-1] = torch.diff(x, dim=1)
571
+ vel[:, -1] = x[:, -1]
572
+ acc = torch.zeros_like(x)
573
+ acc[:, :-1] = torch.diff(vel, dim=1)
574
+ acc[:, -1] = vel[:, -1]
575
+ x1 = torch.cat((x, acc, vel, torch.norm(vel, dim=-1, keepdim=True)), dim=-1)
576
+ x2 = x1.permute((0, 3, 1, 2)) # (torch.Size([64, 10, 22, 7])
577
+ x3 = x2
578
+
579
+ for i in range(len(self.st_gcnns)):
580
+ x3 = self.st_gcnns[i](x3)
581
+
582
+ x5 = x3.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV)
583
+
584
+ x6 = self.prelus[0](self.txcnns[0](x5))
585
+ for i in range(1, self.n_txcnn_layers):
586
+ x6 = self.prelus[i](self.txcnns[i](x6)) + x6 # residual connection
587
+
588
+ x6 = self.dim_conversor(x6.permute(0, 2, 1, 3)).permute(0, 2, 3, 1)
589
+ x7 = x6.cumsum(1)
590
+
591
+ act = self.context_layer(x7.reshape(b, 1, self.n_output, joints * x7.shape[-1]))
592
+ x8 = x7.permute(0, 3, 2, 1)
593
+ for i in range(len(self.st_gcnns_o)):
594
+ x8 = self.st_gcnns_o[i](x8)
595
+ x9 = x8.permute(0, 3, 2, 1) + act
596
+
597
+ return x[:, -1:] + x9,
h36m_detailed/32/metrics_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ec4d54347d739ccaab307244384406ffcc96b7f4b44e68ffc2704f11b38d1200
3
+ size 2052735
h36m_detailed/32/samples_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1db05cc9b6ffb40208811b40ab486490755702331ff3e22355f100d963f984dd
3
+ size 28078149
h36m_detailed/64/files/CISTGCN-benchmark-best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb41d06736803c4e7b0aa66e36820440d5125b072739610481d1c06c23cedb5a
3
+ size 16582347
h36m_detailed/64/files/CISTGCN-benchmark-last.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8b5ce6e7fc0cbfceacf731ab896290352f7b4d929b80b3bf8d516bdfc02e704
3
+ size 16584139
h36m_detailed/64/files/config-20221114_2127-id9542.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture_config:
2
+ model: MlpMixer_ext_1
3
+ model_params:
4
+ input_n: 10
5
+ joints: 22
6
+ output_n: 25
7
+ n_txcnn_layers: 4
8
+ txc_kernel_size: 3
9
+ reduction: 8
10
+ hidden_dim: 64
11
+ input_gcn:
12
+ model_complexity:
13
+ - 64
14
+ - 64
15
+ - 64
16
+ - 64
17
+ interpretable:
18
+ - true
19
+ - true
20
+ - true
21
+ - true
22
+ - true
23
+ output_gcn:
24
+ model_complexity:
25
+ - 3
26
+ interpretable:
27
+ - true
28
+ clipping: 15
29
+ learning_config:
30
+ WarmUp: 100
31
+ normalize: false
32
+ dropout: 0.1
33
+ weight_decay: 1e-4
34
+ epochs: 50
35
+ lr: 0.01
36
+ # max_norm: 3
37
+ scheduler:
38
+ type: StepLR
39
+ params:
40
+ step_size: 3000
41
+ gamma: 0.8
42
+ loss:
43
+ weights: ""
44
+ type: "mpjpe"
45
+ augmentations:
46
+ random_scale:
47
+ x:
48
+ - 0.95
49
+ - 1.05
50
+ y:
51
+ - 0.90
52
+ - 1.10
53
+ z:
54
+ - 0.95
55
+ - 1.05
56
+ random_noise: ""
57
+ random_flip:
58
+ x: true
59
+ y: ""
60
+ z: true
61
+ random_rotation:
62
+ x:
63
+ - -5
64
+ - 5
65
+ y:
66
+ - -180
67
+ - 180
68
+ z:
69
+ - -5
70
+ - 5
71
+ random_translation:
72
+ x:
73
+ - -0.10
74
+ - 0.10
75
+ y:
76
+ - -0.10
77
+ - 0.10
78
+ z:
79
+ - -0.10
80
+ - 0.10
81
+ environment_config:
82
+ actions: all
83
+ evaluate_from: 0
84
+ is_norm: true
85
+ job: 16
86
+ sample_rate: 2
87
+ return_all_joints: true
88
+ save_grads: false
89
+ test_batch: 128
90
+ train_batch: 128
91
+ general_config:
92
+ data_dir: /ai-research/datasets/attention/ann_h3.6m/
93
+ experiment_name: STSGCN-tests
94
+ load_model_path: ''
95
+ log_path: /ai-research/notebooks/testing_repos/logdir/
96
+ model_name_rel_path: STSGCN-benchmark
97
+ save_all_intermediate_models: false
98
+ save_models: true
99
+ tensorboard:
100
+ num_mesh: 4
101
+ meta_config:
102
+ comment: Testing a new architecture based on STSGCN paper.
103
+ project: Attention
104
+ task: 3d keypoint prediction
105
+ version: 0.1.1
h36m_detailed/64/files/model.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ from ..layers import deformable_conv, SE
8
+
9
+ torch.manual_seed(0)
10
+
11
+
12
+ # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension)
13
+ class CNN_layer(nn.Module):
14
+ def __init__(self,
15
+ in_ch,
16
+ out_ch,
17
+ kernel_size,
18
+ dropout,
19
+ bias=True):
20
+ super(CNN_layer, self).__init__()
21
+ self.kernel_size = kernel_size
22
+ padding = (
23
+ (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained
24
+ assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1
25
+
26
+ self.block1 = [nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=(1, 1)),
27
+ nn.BatchNorm2d(out_ch),
28
+ nn.Dropout(dropout, inplace=True),
29
+ ]
30
+
31
+ self.block1 = nn.Sequential(*self.block1)
32
+
33
+ def forward(self, x):
34
+ output = self.block1(x)
35
+ return output
36
+
37
+
38
+ class FPN(nn.Module):
39
+ def __init__(self, in_ch,
40
+ out_ch,
41
+ kernel, # (3,1)
42
+ dropout,
43
+ reduction,
44
+ ):
45
+ super(FPN, self).__init__()
46
+ kernel_size = kernel if isinstance(kernel, (tuple, list)) else (kernel, kernel)
47
+ padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2)
48
+ pad1 = (padding[0], padding[1])
49
+ pad2 = (padding[0] + pad1[0], padding[1] + pad1[1])
50
+ pad3 = (padding[0] + pad2[0], padding[1] + pad2[1])
51
+ dil1 = (1, 1)
52
+ dil2 = (1 + pad1[0], 1 + pad1[1])
53
+ dil3 = (1 + pad2[0], 1 + pad2[1])
54
+ self.block1 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad1, dilation=dil1),
55
+ nn.BatchNorm2d(out_ch),
56
+ nn.Dropout(dropout, inplace=True),
57
+ nn.PReLU(),
58
+ )
59
+ self.block2 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad2, dilation=dil2),
60
+ nn.BatchNorm2d(out_ch),
61
+ nn.Dropout(dropout, inplace=True),
62
+ nn.PReLU(),
63
+ )
64
+ self.block3 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad3, dilation=dil3),
65
+ nn.BatchNorm2d(out_ch),
66
+ nn.Dropout(dropout, inplace=True),
67
+ nn.PReLU(),
68
+ )
69
+ self.pooling = nn.AdaptiveAvgPool2d((1, 1)) # Action Context.
70
+ self.compress = nn.Conv2d(out_ch * 3 + in_ch,
71
+ out_ch,
72
+ kernel_size=(1, 1)) # PRELU is outside the loop, check at the end of the code.
73
+
74
+ def forward(self, x):
75
+ b, dim, joints, seq = x.shape
76
+ global_action = F.interpolate(self.pooling(x), (joints, seq))
77
+ out = torch.cat((self.block1(x), self.block2(x), self.block3(x), global_action), dim=1)
78
+ out = self.compress(out)
79
+ return out
80
+
81
+
82
+ def mish(x):
83
+ return (x * torch.tanh(F.softplus(x)))
84
+
85
+
86
+ class ConvTemporalGraphical(nn.Module):
87
+ # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py
88
+ r"""The basic module for applying a graph convolution.
89
+ Args:
90
+ Shape:
91
+ - Input: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
92
+ - Output: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
93
+ where
94
+ :math:`N` is a batch size,
95
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
96
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
97
+ :math:`V` is the number of graph nodes.
98
+ """
99
+
100
+ def __init__(self, time_dim, joints_dim, domain, interpratable):
101
+ super(ConvTemporalGraphical, self).__init__()
102
+
103
+ if domain == "time":
104
+ # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix)
105
+ size = joints_dim
106
+ if not interpratable:
107
+ self.A = nn.Parameter(torch.FloatTensor(time_dim, size, size))
108
+ self.domain = 'nctv,tvw->nctw'
109
+ else:
110
+ self.domain = 'nctv,ntvw->nctw'
111
+ elif domain == "space":
112
+ size = time_dim
113
+ if not interpratable:
114
+ self.A = nn.Parameter(torch.FloatTensor(joints_dim, size, size))
115
+ self.domain = 'nctv,vtq->ncqv'
116
+ else:
117
+ self.domain = 'nctv,nvtq->ncqv'
118
+ if not interpratable:
119
+ stdv = 1. / math.sqrt(self.A.size(1))
120
+ self.A.data.uniform_(-stdv, stdv)
121
+
122
+ def forward(self, x):
123
+ x = torch.einsum(self.domain, (x, self.A))
124
+ return x.contiguous()
125
+
126
+
127
+ class Map2Adj(nn.Module):
128
+ def __init__(self,
129
+ in_ch,
130
+ time_dim,
131
+ joints_dim,
132
+ domain,
133
+ dropout,
134
+ ):
135
+ super(Map2Adj, self).__init__()
136
+ self.domain = domain
137
+ inter_ch = in_ch // 2
138
+ self.time_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
139
+ nn.BatchNorm2d(inter_ch),
140
+ nn.PReLU(),
141
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(time_dim, 1), bias=False),
142
+ nn.BatchNorm2d(inter_ch),
143
+ nn.Dropout(dropout, inplace=True),
144
+ nn.Conv2d(inter_ch, time_dim, kernel_size=1, bias=False),
145
+ )
146
+ self.joint_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
147
+ nn.BatchNorm2d(inter_ch),
148
+ nn.PReLU(),
149
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(1, joints_dim), bias=False),
150
+ nn.BatchNorm2d(inter_ch),
151
+ nn.Dropout(dropout, inplace=True),
152
+ nn.Conv2d(inter_ch, joints_dim, kernel_size=1, bias=False),
153
+ )
154
+
155
+ if self.domain == "space":
156
+ ch = joints_dim
157
+ self.perm1 = (0, 1, 2, 3)
158
+ self.perm2 = (0, 3, 2, 1)
159
+ if self.domain == "time":
160
+ ch = time_dim
161
+ self.perm1 = (0, 2, 1, 3)
162
+ self.perm2 = (0, 1, 2, 3)
163
+
164
+ inter_ch = ch # // 2
165
+ self.expansor = nn.Sequential(nn.Conv2d(ch, inter_ch, kernel_size=1, bias=False),
166
+ nn.BatchNorm2d(inter_ch),
167
+ nn.Dropout(dropout, inplace=True),
168
+ nn.PReLU(),
169
+ nn.Conv2d(inter_ch, ch, kernel_size=1, bias=False),
170
+ )
171
+ self.time_compress.apply(self._init_weights)
172
+ self.joint_compress.apply(self._init_weights)
173
+ self.expansor.apply(self._init_weights)
174
+
175
+ def _init_weights(self, m, gain=0.05):
176
+ if isinstance(m, nn.Linear):
177
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
178
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
179
+ torch.nn.init.xavier_normal_(m.weight, gain=gain)
180
+ if isinstance(m, nn.PReLU):
181
+ torch.nn.init.constant_(m.weight, 0.25)
182
+
183
+ def forward(self, x):
184
+ b, dims, seq, joints = x.shape
185
+ dim_seq = self.time_compress(x)
186
+ dim_space = self.joint_compress(x)
187
+ o = torch.matmul(dim_space.permute(self.perm1), dim_seq.permute(self.perm2))
188
+ Adj = self.expansor(o)
189
+ return Adj
190
+
191
+
192
+ class Domain_GCNN_layer(nn.Module):
193
+ """
194
+ Shape:
195
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
196
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
197
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
198
+ where
199
+ :math:`N` is a batch size,
200
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
201
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
202
+ :math:`V` is the number of graph nodes.
203
+ :in_ch= dimension of coordinates
204
+ : out_ch=dimension of coordinates
205
+ +
206
+ """
207
+
208
+ def __init__(self,
209
+ in_ch,
210
+ out_ch,
211
+ kernel_size,
212
+ stride,
213
+ time_dim,
214
+ joints_dim,
215
+ domain,
216
+ interpratable,
217
+ dropout,
218
+ bias=True):
219
+
220
+ super(Domain_GCNN_layer, self).__init__()
221
+ self.kernel_size = kernel_size
222
+ assert self.kernel_size[0] % 2 == 1
223
+ assert self.kernel_size[1] % 2 == 1
224
+ padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2)
225
+ self.interpratable = interpratable
226
+ self.domain = domain
227
+
228
+ self.gcn = ConvTemporalGraphical(time_dim, joints_dim, domain, interpratable)
229
+ self.tcn = nn.Sequential(nn.Conv2d(in_ch,
230
+ out_ch,
231
+ (self.kernel_size[0], self.kernel_size[1]),
232
+ (stride, stride),
233
+ padding,
234
+ ),
235
+ nn.BatchNorm2d(out_ch),
236
+ nn.Dropout(dropout, inplace=True),
237
+ )
238
+
239
+ if stride != 1 or in_ch != out_ch:
240
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
241
+ out_ch,
242
+ kernel_size=1,
243
+ stride=(1, 1)),
244
+ nn.BatchNorm2d(out_ch),
245
+ )
246
+ else:
247
+ self.residual = nn.Identity()
248
+ if self.interpratable:
249
+ self.map_to_adj = Map2Adj(in_ch,
250
+ time_dim,
251
+ joints_dim,
252
+ domain,
253
+ dropout,
254
+ )
255
+ else:
256
+ self.map_to_adj = nn.Identity()
257
+ self.prelu = nn.PReLU()
258
+
259
+ def forward(self, x):
260
+ # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size)
261
+ res = self.residual(x)
262
+ self.Adj = self.map_to_adj(x)
263
+ if self.interpratable:
264
+ self.gcn.A = self.Adj
265
+ x1 = self.gcn(x)
266
+ x2 = self.tcn(x1)
267
+ x3 = x2 + res
268
+ x4 = self.prelu(x3)
269
+ return x4
270
+
271
+
272
+ # Dynamic SpatioTemporal Decompose Graph Convolutions (DSTD-GC)
273
+ class DSTD_GC(nn.Module):
274
+ """
275
+ Shape:
276
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
277
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
278
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
279
+ where
280
+ :math:`N` is a batch size,
281
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
282
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
283
+ :math:`V` is the number of graph nodes.
284
+ : in_ch= dimension of coordinates
285
+ : out_ch=dimension of coordinates
286
+ +
287
+ """
288
+
289
+ def __init__(self,
290
+ in_ch,
291
+ out_ch,
292
+ interpratable,
293
+ kernel_size,
294
+ stride,
295
+ time_dim,
296
+ joints_dim,
297
+ reduction,
298
+ dropout):
299
+ super(DSTD_GC, self).__init__()
300
+ self.dsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
301
+ time_dim, joints_dim, "space", interpratable, dropout)
302
+ self.tsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
303
+ time_dim, joints_dim, "time", interpratable, dropout)
304
+
305
+ self.compressor = nn.Sequential(nn.Conv2d(out_ch * 2, out_ch, 1, bias=False),
306
+ nn.BatchNorm2d(out_ch),
307
+ nn.PReLU(),
308
+ SE.SELayer2d(out_ch, reduction=reduction),
309
+ )
310
+ if stride != 1 or in_ch != out_ch:
311
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
312
+ out_ch,
313
+ kernel_size=1,
314
+ stride=(1, 1)),
315
+ nn.BatchNorm2d(out_ch),
316
+ )
317
+ else:
318
+ self.residual = nn.Identity()
319
+
320
+ # Weighting features
321
+ out_ch_c = out_ch // 2 if out_ch // 2 > 1 else 1
322
+ self.global_norm = nn.BatchNorm2d(in_ch)
323
+ self.conv_s = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
324
+ nn.BatchNorm2d(out_ch_c),
325
+ nn.Dropout(dropout, inplace=True),
326
+ nn.PReLU(),
327
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
328
+ nn.BatchNorm2d(out_ch),
329
+ nn.Dropout(dropout, inplace=True),
330
+ nn.PReLU(),
331
+ )
332
+ self.conv_t = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
333
+ nn.BatchNorm2d(out_ch_c),
334
+ nn.Dropout(dropout, inplace=True),
335
+ nn.PReLU(),
336
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
337
+ nn.BatchNorm2d(out_ch),
338
+ nn.Dropout(dropout, inplace=True),
339
+ nn.PReLU(),
340
+ )
341
+ self.map_s = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
342
+ nn.BatchNorm1d(out_ch),
343
+ nn.Dropout(dropout, inplace=True),
344
+ nn.PReLU(),
345
+ nn.Linear(out_ch, out_ch, bias=False),
346
+ )
347
+ self.map_t = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
348
+ nn.BatchNorm1d(out_ch),
349
+ nn.Dropout(dropout, inplace=True),
350
+ nn.PReLU(),
351
+ nn.Linear(out_ch, out_ch, bias=False),
352
+ )
353
+ self.prelu1 = nn.Sequential(nn.BatchNorm2d(out_ch),
354
+ nn.PReLU(),
355
+ )
356
+ self.prelu2 = nn.Sequential(nn.BatchNorm2d(out_ch),
357
+ nn.PReLU(),
358
+ )
359
+
360
+ def _get_stats_(self, x):
361
+ global_avg_pool = x.mean((3, 2)).mean(1, keepdims=True)
362
+ global_avg_pool_features = x.mean(3).mean(1)
363
+ global_std_pool = x.std((3, 2)).std(1, keepdims=True)
364
+ global_std_pool_features = x.std(3).std(1)
365
+ return torch.cat((
366
+ global_avg_pool,
367
+ global_avg_pool_features,
368
+ global_std_pool,
369
+ global_std_pool_features,
370
+ ),
371
+ dim=1)
372
+
373
+ def forward(self, x):
374
+ b, dim, seq, joints = x.shape # 64, 3, 10, 22
375
+ xn = self.global_norm(x)
376
+
377
+ stats = self._get_stats_(xn)
378
+ w1 = torch.cat((self.conv_s(xn).view(b, -1), stats), dim=1)
379
+ stats = self._get_stats_(xn)
380
+ w2 = torch.cat((self.conv_t(xn).view(b, -1), stats), dim=1)
381
+ self.w1 = self.map_s(w1)
382
+ self.w2 = self.map_t(w2)
383
+ w1 = self.w1[..., None, None]
384
+ w2 = self.w2[..., None, None]
385
+
386
+ x1 = self.dsgn(xn)
387
+ x2 = self.tsgn(xn)
388
+ out = torch.cat((self.prelu1(w1 * x1), self.prelu2(w2 * x2)), dim=1)
389
+ out = self.compressor(out)
390
+ return out + self.residual(xn)
391
+
392
+
393
+ class ContextLayer(nn.Module):
394
+ def __init__(self,
395
+ in_ch,
396
+ hidden_ch,
397
+ output_seq,
398
+ input_seq,
399
+ joints,
400
+ dims=3,
401
+ reduction=8,
402
+ dropout=0.1,
403
+ ):
404
+ super(ContextLayer, self).__init__()
405
+ self.n_output = output_seq
406
+ self.n_joints = joints
407
+ self.n_input = input_seq
408
+ self.context_conv1 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
409
+ nn.BatchNorm2d(hidden_ch),
410
+ nn.PReLU(),
411
+ )
412
+
413
+ self.context_conv2 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, (input_seq, 1), bias=False),
414
+ nn.BatchNorm2d(hidden_ch),
415
+ nn.PReLU(),
416
+ )
417
+ self.context_conv3 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
418
+ nn.BatchNorm2d(hidden_ch),
419
+ nn.PReLU(),
420
+ )
421
+ self.map1 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
422
+ nn.Dropout(dropout, inplace=True),
423
+ nn.PReLU(),
424
+ )
425
+ self.map2 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
426
+ nn.Dropout(dropout, inplace=True),
427
+ nn.PReLU(),
428
+ )
429
+ self.map3 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
430
+ nn.Dropout(dropout, inplace=True),
431
+ nn.PReLU(),
432
+ )
433
+
434
+ self.fmap_s = nn.Sequential(nn.Linear(self.n_output * 3, self.n_joints, bias=False),
435
+ nn.BatchNorm1d(self.n_joints),
436
+ nn.Dropout(dropout, inplace=True), )
437
+
438
+ self.fmap_t = nn.Sequential(nn.Linear(self.n_output * 3, self.n_output, bias=False),
439
+ nn.BatchNorm1d(self.n_output),
440
+ nn.Dropout(dropout, inplace=True), )
441
+
442
+ # inter_ch = self.n_joints # // 2
443
+ self.norm_map = nn.Sequential(nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
444
+ nn.BatchNorm1d(self.n_output),
445
+ nn.Dropout(dropout, inplace=True),
446
+ nn.PReLU(),
447
+ SE.SELayer1d(self.n_output, reduction=reduction),
448
+ nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
449
+ nn.BatchNorm1d(self.n_output),
450
+ nn.Dropout(dropout, inplace=True),
451
+ nn.PReLU(),
452
+ )
453
+
454
+ self.fconv = nn.Sequential(nn.Conv2d(1, dims, 1, bias=False),
455
+ nn.BatchNorm2d(dims),
456
+ nn.PReLU(),
457
+ nn.Conv2d(dims, dims, 1, bias=False),
458
+ nn.BatchNorm2d(dims),
459
+ nn.PReLU(),
460
+ )
461
+ self.SE = SE.SELayer2d(self.n_output, reduction=reduction)
462
+
463
+ def forward(self, x):
464
+ b, _, seq, joint_dim = x.shape
465
+ y1 = self.context_conv1(x).max(-1)[0].max(-1)[0]
466
+ y2 = self.context_conv2(x).view(b, -1, joint_dim).max(-1)[0]
467
+ ym = self.context_conv3(x).mean((2, 3))
468
+ y = torch.cat((self.map1(y1), self.map2(y2), self.map3(ym)), dim=1)
469
+ self.joints = self.fmap_s(y)
470
+ self.displacements = self.fmap_t(y) # .cumsum(1)
471
+ self.seq_joints = torch.bmm(self.displacements.unsqueeze(2), self.joints.unsqueeze(1))
472
+ self.seq_joints_n = self.norm_map(self.seq_joints)
473
+ self.seq_joints_dims = self.fconv(self.seq_joints_n.view(b, 1, self.n_output, self.n_joints))
474
+ o = self.SE(self.seq_joints_dims.permute(0, 2, 3, 1))
475
+ return o
476
+
477
+
478
+ class MlpMixer_ext(nn.Module):
479
+ """
480
+ Shape:
481
+ - Input[0]: Input sequence in :math:`(N, in_ch,T_in, V)` format
482
+ - Output[0]: Output sequence in :math:`(N,T_out,in_ch, V)` format
483
+ where
484
+ :math:`N` is a batch size,
485
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
486
+ :math:`V` is the number of graph nodes.
487
+ :in_ch=number of channels for the coordiantes(default=3)
488
+ +
489
+ """
490
+
491
+ def __init__(self, arch, learn):
492
+ super(MlpMixer_ext, self).__init__()
493
+ self.clipping = arch.model_params.clipping
494
+
495
+ self.n_input = arch.model_params.input_n
496
+ self.n_output = arch.model_params.output_n
497
+ self.n_joints = arch.model_params.joints
498
+ self.n_txcnn_layers = arch.model_params.n_txcnn_layers
499
+ self.txc_kernel_size = [arch.model_params.txc_kernel_size] * 2
500
+ self.input_gcn = arch.model_params.input_gcn
501
+ self.output_gcn = arch.model_params.output_gcn
502
+ self.reduction = arch.model_params.reduction
503
+ self.hidden_dim = arch.model_params.hidden_dim
504
+
505
+ self.st_gcnns = nn.ModuleList()
506
+ self.txcnns = nn.ModuleList()
507
+ self.se = nn.ModuleList()
508
+
509
+ self.in_conv = nn.ModuleList()
510
+ self.context_layer = nn.ModuleList()
511
+ self.trans = nn.ModuleList()
512
+ self.in_ch = 10
513
+ self.model_tx = self.input_gcn.model_complexity.copy()
514
+ self.model_tx.insert(0, 1) # add 1 in the position 0.
515
+
516
+ self.input_gcn.model_complexity.insert(0, self.in_ch)
517
+ self.input_gcn.model_complexity.append(self.in_ch)
518
+ # self.input_gcn.interpretable.insert(0, True)
519
+ # self.input_gcn.interpretable.append(False)
520
+ for i in range(len(self.input_gcn.model_complexity) - 1):
521
+ self.st_gcnns.append(DSTD_GC(self.input_gcn.model_complexity[i],
522
+ self.input_gcn.model_complexity[i + 1],
523
+ self.input_gcn.interpretable[i],
524
+ [1, 1], 1, self.n_input, self.n_joints, self.reduction, learn.dropout))
525
+
526
+ self.context_layer = ContextLayer(1, self.hidden_dim,
527
+ self.n_output, self.n_output, self.n_joints,
528
+ 3, self.reduction, learn.dropout
529
+ )
530
+
531
+ # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V)
532
+ # with kernel_size[3,3] the dimensions of C,V will be maintained
533
+ self.txcnns.append(FPN(self.n_input, self.n_output, self.txc_kernel_size, 0., self.reduction))
534
+ for i in range(1, self.n_txcnn_layers):
535
+ self.txcnns.append(FPN(self.n_output, self.n_output, self.txc_kernel_size, 0., self.reduction))
536
+
537
+ self.prelus = nn.ModuleList()
538
+ for j in range(self.n_txcnn_layers):
539
+ self.prelus.append(nn.PReLU())
540
+
541
+ self.dim_conversor = nn.Sequential(nn.Conv2d(self.in_ch, 3, 1, bias=False),
542
+ nn.BatchNorm2d(3),
543
+ nn.PReLU(),
544
+ nn.Conv2d(3, 3, 1, bias=False),
545
+ nn.PReLU(3), )
546
+
547
+ self.st_gcnns_o = nn.ModuleList()
548
+ self.output_gcn.model_complexity.insert(0, 3)
549
+ for i in range(len(self.output_gcn.model_complexity) - 1):
550
+ self.st_gcnns_o.append(DSTD_GC(self.output_gcn.model_complexity[i],
551
+ self.output_gcn.model_complexity[i + 1],
552
+ self.output_gcn.interpretable[i],
553
+ [1, 1], 1, self.n_joints, self.n_output, self.reduction, learn.dropout))
554
+
555
+ self.st_gcnns_o.apply(self._init_weights)
556
+ self.st_gcnns.apply(self._init_weights)
557
+ self.txcnns.apply(self._init_weights)
558
+
559
+ def _init_weights(self, m, gain=0.1):
560
+ if isinstance(m, nn.Linear):
561
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
562
+ # if isinstance(m, (nn.Conv2d, nn.Conv1d)):
563
+ # torch.nn.init.xavier_normal_(m.weight, gain=gain)
564
+ if isinstance(m, nn.PReLU):
565
+ torch.nn.init.constant_(m.weight, 0.25)
566
+
567
+ def forward(self, x):
568
+ b, seq, joints, dim = x.shape
569
+ vel = torch.zeros_like(x)
570
+ vel[:, :-1] = torch.diff(x, dim=1)
571
+ vel[:, -1] = x[:, -1]
572
+ acc = torch.zeros_like(x)
573
+ acc[:, :-1] = torch.diff(vel, dim=1)
574
+ acc[:, -1] = vel[:, -1]
575
+ x1 = torch.cat((x, acc, vel, torch.norm(vel, dim=-1, keepdim=True)), dim=-1)
576
+ x2 = x1.permute((0, 3, 1, 2)) # (torch.Size([64, 10, 22, 7])
577
+ x3 = x2
578
+
579
+ for i in range(len(self.st_gcnns)):
580
+ x3 = self.st_gcnns[i](x3)
581
+
582
+ x5 = x3.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV)
583
+
584
+ x6 = self.prelus[0](self.txcnns[0](x5))
585
+ for i in range(1, self.n_txcnn_layers):
586
+ x6 = self.prelus[i](self.txcnns[i](x6)) + x6 # residual connection
587
+
588
+ x6 = self.dim_conversor(x6.permute(0, 2, 1, 3)).permute(0, 2, 3, 1)
589
+ x7 = x6.cumsum(1)
590
+
591
+ act = self.context_layer(x7.reshape(b, 1, self.n_output, joints * x7.shape[-1]))
592
+ x8 = x7.permute(0, 3, 2, 1)
593
+ for i in range(len(self.st_gcnns_o)):
594
+ x8 = self.st_gcnns_o[i](x8)
595
+ x9 = x8.permute(0, 3, 2, 1) + act
596
+
597
+ return x[:, -1:] + x9,
h36m_detailed/64/metric_full_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aefe76333ce12af037e4f29835216bc0db80886e20b960fd57e45d470109553
3
+ size 2048676
h36m_detailed/64/metric_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da19096e8911209f49ac526dc4872e037a1b7d9f4eeee11b687767a6810692c5
3
+ size 2050608
h36m_detailed/64/metric_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:997d02572ea23ab0ef99b70bcb2b9345333a705d5cd3689b2dab68359c1aecb1
3
+ size 2049626
h36m_detailed/64/metric_train.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffcfdea4a175e5155f74f620170ade50139b455d338a51ffa64d84b0f923a1df
3
+ size 1844301
h36m_detailed/64/sample_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a343ba69d0bea916b9e4825e1f2bf27621c008665699bd3d2645d01fbacf8826
3
+ size 29608760
h36m_detailed/8/files/CISTGCN-benchmark-best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47b28248ab629ce18f5908f0c39c1d4700d12c5539f64828ffe4b73ee9c3c5af
3
+ size 5339339
h36m_detailed/8/files/CISTGCN-benchmark-last.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e275f51a3e51882421ab65244fe61a41109e9b60ab88df2aad79b4bbb676d75f
3
+ size 5343499
h36m_detailed/8/files/config-20221116_2202-id6444.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture_config:
2
+ model: MlpMixer_ext_1
3
+ model_params:
4
+ input_n: 10
5
+ joints: 22
6
+ output_n: 25
7
+ n_txcnn_layers: 4
8
+ txc_kernel_size: 3
9
+ reduction: 8
10
+ hidden_dim: 64
11
+ input_gcn:
12
+ model_complexity:
13
+ - 8
14
+ - 8
15
+ - 8
16
+ - 8
17
+ interpretable:
18
+ - true
19
+ - true
20
+ - true
21
+ - true
22
+ - true
23
+ output_gcn:
24
+ model_complexity:
25
+ - 3
26
+ interpretable:
27
+ - true
28
+ clipping: 15
29
+ learning_config:
30
+ WarmUp: 100
31
+ normalize: false
32
+ dropout: 0.1
33
+ weight_decay: 1e-4
34
+ epochs: 50
35
+ lr: 0.01
36
+ # max_norm: 3
37
+ scheduler:
38
+ type: StepLR
39
+ params:
40
+ step_size: 3000
41
+ gamma: 0.8
42
+ loss:
43
+ weights: ""
44
+ type: "mpjpe"
45
+ augmentations:
46
+ random_scale:
47
+ x:
48
+ - 0.95
49
+ - 1.05
50
+ y:
51
+ - 0.90
52
+ - 1.10
53
+ z:
54
+ - 0.95
55
+ - 1.05
56
+ random_noise: ""
57
+ random_flip:
58
+ x: true
59
+ y: ""
60
+ z: true
61
+ random_rotation:
62
+ x:
63
+ - -5
64
+ - 5
65
+ y:
66
+ - -180
67
+ - 180
68
+ z:
69
+ - -5
70
+ - 5
71
+ random_translation:
72
+ x:
73
+ - -0.10
74
+ - 0.10
75
+ y:
76
+ - -0.10
77
+ - 0.10
78
+ z:
79
+ - -0.10
80
+ - 0.10
81
+ environment_config:
82
+ actions: all
83
+ evaluate_from: 0
84
+ is_norm: true
85
+ job: 16
86
+ sample_rate: 2
87
+ return_all_joints: true
88
+ save_grads: false
89
+ test_batch: 128
90
+ train_batch: 128
91
+ general_config:
92
+ data_dir: /ai-research/datasets/attention/ann_h3.6m/
93
+ experiment_name: STSGCN-tests
94
+ load_model_path: ''
95
+ log_path: /ai-research/notebooks/testing_repos/logdir/
96
+ model_name_rel_path: STSGCN-benchmark
97
+ save_all_intermediate_models: false
98
+ save_models: true
99
+ tensorboard:
100
+ num_mesh: 4
101
+ meta_config:
102
+ comment: Testing a new architecture based on STSGCN paper.
103
+ project: Attention
104
+ task: 3d keypoint prediction
105
+ version: 0.1.1
h36m_detailed/8/files/model.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ from ..layers import deformable_conv, SE
8
+
9
+ torch.manual_seed(0)
10
+
11
+
12
+ # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension)
13
+ class CNN_layer(nn.Module):
14
+ def __init__(self,
15
+ in_ch,
16
+ out_ch,
17
+ kernel_size,
18
+ dropout,
19
+ bias=True):
20
+ super(CNN_layer, self).__init__()
21
+ self.kernel_size = kernel_size
22
+ padding = (
23
+ (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained
24
+ assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1
25
+
26
+ self.block1 = [nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=(1, 1)),
27
+ nn.BatchNorm2d(out_ch),
28
+ nn.Dropout(dropout, inplace=True),
29
+ ]
30
+
31
+ self.block1 = nn.Sequential(*self.block1)
32
+
33
+ def forward(self, x):
34
+ output = self.block1(x)
35
+ return output
36
+
37
+
38
+ class FPN(nn.Module):
39
+ def __init__(self, in_ch,
40
+ out_ch,
41
+ kernel, # (3,1)
42
+ dropout,
43
+ reduction,
44
+ ):
45
+ super(FPN, self).__init__()
46
+ kernel_size = kernel if isinstance(kernel, (tuple, list)) else (kernel, kernel)
47
+ padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2)
48
+ pad1 = (padding[0], padding[1])
49
+ pad2 = (padding[0] + pad1[0], padding[1] + pad1[1])
50
+ pad3 = (padding[0] + pad2[0], padding[1] + pad2[1])
51
+ dil1 = (1, 1)
52
+ dil2 = (1 + pad1[0], 1 + pad1[1])
53
+ dil3 = (1 + pad2[0], 1 + pad2[1])
54
+ self.block1 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad1, dilation=dil1),
55
+ nn.BatchNorm2d(out_ch),
56
+ nn.Dropout(dropout, inplace=True),
57
+ nn.PReLU(),
58
+ )
59
+ self.block2 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad2, dilation=dil2),
60
+ nn.BatchNorm2d(out_ch),
61
+ nn.Dropout(dropout, inplace=True),
62
+ nn.PReLU(),
63
+ )
64
+ self.block3 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad3, dilation=dil3),
65
+ nn.BatchNorm2d(out_ch),
66
+ nn.Dropout(dropout, inplace=True),
67
+ nn.PReLU(),
68
+ )
69
+ self.pooling = nn.AdaptiveAvgPool2d((1, 1)) # Action Context.
70
+ self.compress = nn.Conv2d(out_ch * 3 + in_ch,
71
+ out_ch,
72
+ kernel_size=(1, 1)) # PRELU is outside the loop, check at the end of the code.
73
+
74
+ def forward(self, x):
75
+ b, dim, joints, seq = x.shape
76
+ global_action = F.interpolate(self.pooling(x), (joints, seq))
77
+ out = torch.cat((self.block1(x), self.block2(x), self.block3(x), global_action), dim=1)
78
+ out = self.compress(out)
79
+ return out
80
+
81
+
82
+ def mish(x):
83
+ return (x * torch.tanh(F.softplus(x)))
84
+
85
+
86
+ class ConvTemporalGraphical(nn.Module):
87
+ # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py
88
+ r"""The basic module for applying a graph convolution.
89
+ Args:
90
+ Shape:
91
+ - Input: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
92
+ - Output: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
93
+ where
94
+ :math:`N` is a batch size,
95
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
96
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
97
+ :math:`V` is the number of graph nodes.
98
+ """
99
+
100
+ def __init__(self, time_dim, joints_dim, domain, interpratable):
101
+ super(ConvTemporalGraphical, self).__init__()
102
+
103
+ if domain == "time":
104
+ # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix)
105
+ size = joints_dim
106
+ if not interpratable:
107
+ self.A = nn.Parameter(torch.FloatTensor(time_dim, size, size))
108
+ self.domain = 'nctv,tvw->nctw'
109
+ else:
110
+ self.domain = 'nctv,ntvw->nctw'
111
+ elif domain == "space":
112
+ size = time_dim
113
+ if not interpratable:
114
+ self.A = nn.Parameter(torch.FloatTensor(joints_dim, size, size))
115
+ self.domain = 'nctv,vtq->ncqv'
116
+ else:
117
+ self.domain = 'nctv,nvtq->ncqv'
118
+ if not interpratable:
119
+ stdv = 1. / math.sqrt(self.A.size(1))
120
+ self.A.data.uniform_(-stdv, stdv)
121
+
122
+ def forward(self, x):
123
+ x = torch.einsum(self.domain, (x, self.A))
124
+ return x.contiguous()
125
+
126
+
127
+ class Map2Adj(nn.Module):
128
+ def __init__(self,
129
+ in_ch,
130
+ time_dim,
131
+ joints_dim,
132
+ domain,
133
+ dropout,
134
+ ):
135
+ super(Map2Adj, self).__init__()
136
+ self.domain = domain
137
+ inter_ch = in_ch // 2
138
+ self.time_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
139
+ nn.BatchNorm2d(inter_ch),
140
+ nn.PReLU(),
141
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(time_dim, 1), bias=False),
142
+ nn.BatchNorm2d(inter_ch),
143
+ nn.Dropout(dropout, inplace=True),
144
+ nn.Conv2d(inter_ch, time_dim, kernel_size=1, bias=False),
145
+ )
146
+ self.joint_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
147
+ nn.BatchNorm2d(inter_ch),
148
+ nn.PReLU(),
149
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(1, joints_dim), bias=False),
150
+ nn.BatchNorm2d(inter_ch),
151
+ nn.Dropout(dropout, inplace=True),
152
+ nn.Conv2d(inter_ch, joints_dim, kernel_size=1, bias=False),
153
+ )
154
+
155
+ if self.domain == "space":
156
+ ch = joints_dim
157
+ self.perm1 = (0, 1, 2, 3)
158
+ self.perm2 = (0, 3, 2, 1)
159
+ if self.domain == "time":
160
+ ch = time_dim
161
+ self.perm1 = (0, 2, 1, 3)
162
+ self.perm2 = (0, 1, 2, 3)
163
+
164
+ inter_ch = ch # // 2
165
+ self.expansor = nn.Sequential(nn.Conv2d(ch, inter_ch, kernel_size=1, bias=False),
166
+ nn.BatchNorm2d(inter_ch),
167
+ nn.Dropout(dropout, inplace=True),
168
+ nn.PReLU(),
169
+ nn.Conv2d(inter_ch, ch, kernel_size=1, bias=False),
170
+ )
171
+ self.time_compress.apply(self._init_weights)
172
+ self.joint_compress.apply(self._init_weights)
173
+ self.expansor.apply(self._init_weights)
174
+
175
+ def _init_weights(self, m, gain=0.05):
176
+ if isinstance(m, nn.Linear):
177
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
178
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
179
+ torch.nn.init.xavier_normal_(m.weight, gain=gain)
180
+ if isinstance(m, nn.PReLU):
181
+ torch.nn.init.constant_(m.weight, 0.25)
182
+
183
+ def forward(self, x):
184
+ b, dims, seq, joints = x.shape
185
+ dim_seq = self.time_compress(x)
186
+ dim_space = self.joint_compress(x)
187
+ o = torch.matmul(dim_space.permute(self.perm1), dim_seq.permute(self.perm2))
188
+ Adj = self.expansor(o)
189
+ return Adj
190
+
191
+
192
+ class Domain_GCNN_layer(nn.Module):
193
+ """
194
+ Shape:
195
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
196
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
197
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
198
+ where
199
+ :math:`N` is a batch size,
200
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
201
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
202
+ :math:`V` is the number of graph nodes.
203
+ :in_ch= dimension of coordinates
204
+ : out_ch=dimension of coordinates
205
+ +
206
+ """
207
+
208
+ def __init__(self,
209
+ in_ch,
210
+ out_ch,
211
+ kernel_size,
212
+ stride,
213
+ time_dim,
214
+ joints_dim,
215
+ domain,
216
+ interpratable,
217
+ dropout,
218
+ bias=True):
219
+
220
+ super(Domain_GCNN_layer, self).__init__()
221
+ self.kernel_size = kernel_size
222
+ assert self.kernel_size[0] % 2 == 1
223
+ assert self.kernel_size[1] % 2 == 1
224
+ padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2)
225
+ self.interpratable = interpratable
226
+ self.domain = domain
227
+
228
+ self.gcn = ConvTemporalGraphical(time_dim, joints_dim, domain, interpratable)
229
+ self.tcn = nn.Sequential(nn.Conv2d(in_ch,
230
+ out_ch,
231
+ (self.kernel_size[0], self.kernel_size[1]),
232
+ (stride, stride),
233
+ padding,
234
+ ),
235
+ nn.BatchNorm2d(out_ch),
236
+ nn.Dropout(dropout, inplace=True),
237
+ )
238
+
239
+ if stride != 1 or in_ch != out_ch:
240
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
241
+ out_ch,
242
+ kernel_size=1,
243
+ stride=(1, 1)),
244
+ nn.BatchNorm2d(out_ch),
245
+ )
246
+ else:
247
+ self.residual = nn.Identity()
248
+ if self.interpratable:
249
+ self.map_to_adj = Map2Adj(in_ch,
250
+ time_dim,
251
+ joints_dim,
252
+ domain,
253
+ dropout,
254
+ )
255
+ else:
256
+ self.map_to_adj = nn.Identity()
257
+ self.prelu = nn.PReLU()
258
+
259
+ def forward(self, x):
260
+ # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size)
261
+ res = self.residual(x)
262
+ self.Adj = self.map_to_adj(x)
263
+ if self.interpratable:
264
+ self.gcn.A = self.Adj
265
+ x1 = self.gcn(x)
266
+ x2 = self.tcn(x1)
267
+ x3 = x2 + res
268
+ x4 = self.prelu(x3)
269
+ return x4
270
+
271
+
272
+ # Dynamic SpatioTemporal Decompose Graph Convolutions (DSTD-GC)
273
+ class DSTD_GC(nn.Module):
274
+ """
275
+ Shape:
276
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
277
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
278
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
279
+ where
280
+ :math:`N` is a batch size,
281
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
282
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
283
+ :math:`V` is the number of graph nodes.
284
+ : in_ch= dimension of coordinates
285
+ : out_ch=dimension of coordinates
286
+ +
287
+ """
288
+
289
+ def __init__(self,
290
+ in_ch,
291
+ out_ch,
292
+ interpratable,
293
+ kernel_size,
294
+ stride,
295
+ time_dim,
296
+ joints_dim,
297
+ reduction,
298
+ dropout):
299
+ super(DSTD_GC, self).__init__()
300
+ self.dsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
301
+ time_dim, joints_dim, "space", interpratable, dropout)
302
+ self.tsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
303
+ time_dim, joints_dim, "time", interpratable, dropout)
304
+
305
+ self.compressor = nn.Sequential(nn.Conv2d(out_ch * 2, out_ch, 1, bias=False),
306
+ nn.BatchNorm2d(out_ch),
307
+ nn.PReLU(),
308
+ SE.SELayer2d(out_ch, reduction=reduction),
309
+ )
310
+ if stride != 1 or in_ch != out_ch:
311
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
312
+ out_ch,
313
+ kernel_size=1,
314
+ stride=(1, 1)),
315
+ nn.BatchNorm2d(out_ch),
316
+ )
317
+ else:
318
+ self.residual = nn.Identity()
319
+
320
+ # Weighting features
321
+ out_ch_c = out_ch // 2 if out_ch // 2 > 1 else 1
322
+ self.global_norm = nn.BatchNorm2d(in_ch)
323
+ self.conv_s = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
324
+ nn.BatchNorm2d(out_ch_c),
325
+ nn.Dropout(dropout, inplace=True),
326
+ nn.PReLU(),
327
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
328
+ nn.BatchNorm2d(out_ch),
329
+ nn.Dropout(dropout, inplace=True),
330
+ nn.PReLU(),
331
+ )
332
+ self.conv_t = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
333
+ nn.BatchNorm2d(out_ch_c),
334
+ nn.Dropout(dropout, inplace=True),
335
+ nn.PReLU(),
336
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
337
+ nn.BatchNorm2d(out_ch),
338
+ nn.Dropout(dropout, inplace=True),
339
+ nn.PReLU(),
340
+ )
341
+ self.map_s = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
342
+ nn.BatchNorm1d(out_ch),
343
+ nn.Dropout(dropout, inplace=True),
344
+ nn.PReLU(),
345
+ nn.Linear(out_ch, out_ch, bias=False),
346
+ )
347
+ self.map_t = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
348
+ nn.BatchNorm1d(out_ch),
349
+ nn.Dropout(dropout, inplace=True),
350
+ nn.PReLU(),
351
+ nn.Linear(out_ch, out_ch, bias=False),
352
+ )
353
+ self.prelu1 = nn.Sequential(nn.BatchNorm2d(out_ch),
354
+ nn.PReLU(),
355
+ )
356
+ self.prelu2 = nn.Sequential(nn.BatchNorm2d(out_ch),
357
+ nn.PReLU(),
358
+ )
359
+
360
+ def _get_stats_(self, x):
361
+ global_avg_pool = x.mean((3, 2)).mean(1, keepdims=True)
362
+ global_avg_pool_features = x.mean(3).mean(1)
363
+ global_std_pool = x.std((3, 2)).std(1, keepdims=True)
364
+ global_std_pool_features = x.std(3).std(1)
365
+ return torch.cat((
366
+ global_avg_pool,
367
+ global_avg_pool_features,
368
+ global_std_pool,
369
+ global_std_pool_features,
370
+ ),
371
+ dim=1)
372
+
373
+ def forward(self, x):
374
+ b, dim, seq, joints = x.shape # 64, 3, 10, 22
375
+ xn = self.global_norm(x)
376
+
377
+ stats = self._get_stats_(xn)
378
+ w1 = torch.cat((self.conv_s(xn).view(b, -1), stats), dim=1)
379
+ stats = self._get_stats_(xn)
380
+ w2 = torch.cat((self.conv_t(xn).view(b, -1), stats), dim=1)
381
+ self.w1 = self.map_s(w1)
382
+ self.w2 = self.map_t(w2)
383
+ w1 = self.w1[..., None, None]
384
+ w2 = self.w2[..., None, None]
385
+
386
+ x1 = self.dsgn(xn)
387
+ x2 = self.tsgn(xn)
388
+ out = torch.cat((self.prelu1(w1 * x1), self.prelu2(w2 * x2)), dim=1)
389
+ out = self.compressor(out)
390
+ return torch.clip(out + self.residual(xn), -1e5, 1e5)
391
+
392
+
393
+ class ContextLayer(nn.Module):
394
+ def __init__(self,
395
+ in_ch,
396
+ hidden_ch,
397
+ output_seq,
398
+ input_seq,
399
+ joints,
400
+ dims=3,
401
+ reduction=8,
402
+ dropout=0.1,
403
+ ):
404
+ super(ContextLayer, self).__init__()
405
+ self.n_output = output_seq
406
+ self.n_joints = joints
407
+ self.n_input = input_seq
408
+ self.context_conv1 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
409
+ nn.BatchNorm2d(hidden_ch),
410
+ nn.PReLU(),
411
+ )
412
+
413
+ self.context_conv2 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, (input_seq, 1), bias=False),
414
+ nn.BatchNorm2d(hidden_ch),
415
+ nn.PReLU(),
416
+ )
417
+ self.context_conv3 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
418
+ nn.BatchNorm2d(hidden_ch),
419
+ nn.PReLU(),
420
+ )
421
+ self.map1 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
422
+ nn.Dropout(dropout, inplace=True),
423
+ nn.PReLU(),
424
+ )
425
+ self.map2 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
426
+ nn.Dropout(dropout, inplace=True),
427
+ nn.PReLU(),
428
+ )
429
+ self.map3 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
430
+ nn.Dropout(dropout, inplace=True),
431
+ nn.PReLU(),
432
+ )
433
+
434
+ self.fmap_s = nn.Sequential(nn.Linear(self.n_output * 3, self.n_joints, bias=False),
435
+ nn.BatchNorm1d(self.n_joints),
436
+ nn.Dropout(dropout, inplace=True), )
437
+
438
+ self.fmap_t = nn.Sequential(nn.Linear(self.n_output * 3, self.n_output, bias=False),
439
+ nn.BatchNorm1d(self.n_output),
440
+ nn.Dropout(dropout, inplace=True), )
441
+
442
+ # inter_ch = self.n_joints # // 2
443
+ self.norm_map = nn.Sequential(nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
444
+ nn.BatchNorm1d(self.n_output),
445
+ nn.Dropout(dropout, inplace=True),
446
+ nn.PReLU(),
447
+ SE.SELayer1d(self.n_output, reduction=reduction),
448
+ nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
449
+ nn.BatchNorm1d(self.n_output),
450
+ nn.Dropout(dropout, inplace=True),
451
+ nn.PReLU(),
452
+ )
453
+
454
+ self.fconv = nn.Sequential(nn.Conv2d(1, dims, 1, bias=False),
455
+ nn.BatchNorm2d(dims),
456
+ nn.PReLU(),
457
+ nn.Conv2d(dims, dims, 1, bias=False),
458
+ nn.BatchNorm2d(dims),
459
+ nn.PReLU(),
460
+ )
461
+ self.SE = SE.SELayer2d(self.n_output, reduction=reduction)
462
+
463
+ def forward(self, x):
464
+ b, _, seq, joint_dim = x.shape
465
+ y1 = self.context_conv1(x).max(-1)[0].max(-1)[0]
466
+ y2 = self.context_conv2(x).view(b, -1, joint_dim).max(-1)[0]
467
+ ym = self.context_conv3(x).mean((2, 3))
468
+ y = torch.cat((self.map1(y1), self.map2(y2), self.map3(ym)), dim=1)
469
+ self.joints = self.fmap_s(y)
470
+ self.displacements = self.fmap_t(y) # .cumsum(1)
471
+ self.seq_joints = torch.bmm(self.displacements.unsqueeze(2), self.joints.unsqueeze(1))
472
+ self.seq_joints_n = self.norm_map(self.seq_joints)
473
+ self.seq_joints_dims = self.fconv(self.seq_joints_n.view(b, 1, self.n_output, self.n_joints))
474
+ o = self.SE(self.seq_joints_dims.permute(0, 2, 3, 1))
475
+ return o
476
+
477
+
478
+ class MlpMixer_ext(nn.Module):
479
+ """
480
+ Shape:
481
+ - Input[0]: Input sequence in :math:`(N, in_ch,T_in, V)` format
482
+ - Output[0]: Output sequence in :math:`(N,T_out,in_ch, V)` format
483
+ where
484
+ :math:`N` is a batch size,
485
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
486
+ :math:`V` is the number of graph nodes.
487
+ :in_ch=number of channels for the coordiantes(default=3)
488
+ +
489
+ """
490
+
491
+ def __init__(self, arch, learn):
492
+ super(MlpMixer_ext, self).__init__()
493
+ self.clipping = arch.model_params.clipping
494
+
495
+ self.n_input = arch.model_params.input_n
496
+ self.n_output = arch.model_params.output_n
497
+ self.n_joints = arch.model_params.joints
498
+ self.n_txcnn_layers = arch.model_params.n_txcnn_layers
499
+ self.txc_kernel_size = [arch.model_params.txc_kernel_size] * 2
500
+ self.input_gcn = arch.model_params.input_gcn
501
+ self.output_gcn = arch.model_params.output_gcn
502
+ self.reduction = arch.model_params.reduction
503
+ self.hidden_dim = arch.model_params.hidden_dim
504
+
505
+ self.st_gcnns = nn.ModuleList()
506
+ self.txcnns = nn.ModuleList()
507
+ self.se = nn.ModuleList()
508
+
509
+ self.in_conv = nn.ModuleList()
510
+ self.context_layer = nn.ModuleList()
511
+ self.trans = nn.ModuleList()
512
+ self.in_ch = 10
513
+ self.model_tx = self.input_gcn.model_complexity.copy()
514
+ self.model_tx.insert(0, 1) # add 1 in the position 0.
515
+
516
+ self.input_gcn.model_complexity.insert(0, self.in_ch)
517
+ self.input_gcn.model_complexity.append(self.in_ch)
518
+ # self.input_gcn.interpretable.insert(0, True)
519
+ # self.input_gcn.interpretable.append(False)
520
+ for i in range(len(self.input_gcn.model_complexity) - 1):
521
+ self.st_gcnns.append(DSTD_GC(self.input_gcn.model_complexity[i],
522
+ self.input_gcn.model_complexity[i + 1],
523
+ self.input_gcn.interpretable[i],
524
+ [1, 1], 1, self.n_input, self.n_joints, self.reduction, learn.dropout))
525
+
526
+ self.context_layer = ContextLayer(1, self.hidden_dim,
527
+ self.n_output, self.n_output, self.n_joints,
528
+ 3, self.reduction, learn.dropout
529
+ )
530
+
531
+ # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V)
532
+ # with kernel_size[3,3] the dimensions of C,V will be maintained
533
+ self.txcnns.append(FPN(self.n_input, self.n_output, self.txc_kernel_size, 0., self.reduction))
534
+ for i in range(1, self.n_txcnn_layers):
535
+ self.txcnns.append(FPN(self.n_output, self.n_output, self.txc_kernel_size, 0., self.reduction))
536
+
537
+ self.prelus = nn.ModuleList()
538
+ for j in range(self.n_txcnn_layers):
539
+ self.prelus.append(nn.PReLU())
540
+
541
+ self.dim_conversor = nn.Sequential(nn.Conv2d(self.in_ch, 3, 1, bias=False),
542
+ nn.BatchNorm2d(3),
543
+ nn.PReLU(),
544
+ nn.Conv2d(3, 3, 1, bias=False),
545
+ nn.PReLU(3), )
546
+
547
+ self.st_gcnns_o = nn.ModuleList()
548
+ self.output_gcn.model_complexity.insert(0, 3)
549
+ for i in range(len(self.output_gcn.model_complexity) - 1):
550
+ self.st_gcnns_o.append(DSTD_GC(self.output_gcn.model_complexity[i],
551
+ self.output_gcn.model_complexity[i + 1],
552
+ self.output_gcn.interpretable[i],
553
+ [1, 1], 1, self.n_joints, self.n_output, self.reduction, learn.dropout))
554
+
555
+ self.st_gcnns_o.apply(self._init_weights)
556
+ self.st_gcnns.apply(self._init_weights)
557
+ self.txcnns.apply(self._init_weights)
558
+
559
+ def _init_weights(self, m, gain=0.1):
560
+ if isinstance(m, nn.Linear):
561
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
562
+ # if isinstance(m, (nn.Conv2d, nn.Conv1d)):
563
+ # torch.nn.init.xavier_normal_(m.weight, gain=gain)
564
+ if isinstance(m, nn.PReLU):
565
+ torch.nn.init.constant_(m.weight, 0.25)
566
+
567
+ def forward(self, x):
568
+ b, seq, joints, dim = x.shape
569
+ vel = torch.zeros_like(x)
570
+ vel[:, :-1] = torch.diff(x, dim=1)
571
+ vel[:, -1] = x[:, -1]
572
+ acc = torch.zeros_like(x)
573
+ acc[:, :-1] = torch.diff(vel, dim=1)
574
+ acc[:, -1] = vel[:, -1]
575
+ x1 = torch.cat((x, acc, vel, torch.norm(vel, dim=-1, keepdim=True)), dim=-1)
576
+ x2 = x1.permute((0, 3, 1, 2)) # (torch.Size([64, 10, 22, 7])
577
+ x3 = x2
578
+
579
+ for i in range(len(self.st_gcnns)):
580
+ x3 = self.st_gcnns[i](x3)
581
+
582
+ x5 = x3.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV)
583
+
584
+ x6 = self.prelus[0](self.txcnns[0](x5))
585
+ for i in range(1, self.n_txcnn_layers):
586
+ x6 = self.prelus[i](self.txcnns[i](x6)) + x6 # residual connection
587
+
588
+ x6 = self.dim_conversor(x6.permute(0, 2, 1, 3)).permute(0, 2, 3, 1)
589
+ x7 = x6.cumsum(1)
590
+
591
+ act = self.context_layer(x7.reshape(b, 1, self.n_output, joints * x7.shape[-1]))
592
+ x8 = x7.permute(0, 3, 2, 1)
593
+ for i in range(len(self.st_gcnns_o)):
594
+ x8 = self.st_gcnns_o[i](x8)
595
+ x9 = x8.permute(0, 3, 2, 1) + act
596
+
597
+ return x[:, -1:] + x9,
h36m_detailed/8/metric_full_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16a38c585d516b280c90903d153360888df3095c405b65d0b9c08d9016d0cc64
3
+ size 2048156
h36m_detailed/8/metric_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5945740c0478dbc8abcd9475bb8a345783130c1c222a64ae2a448f5929a8c626
3
+ size 2051725
h36m_detailed/8/metric_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a707cc259f4fab0438ac0ab986cefde36430ecb99a2459800b9ed0eb74e4efc
3
+ size 2050259
h36m_detailed/8/metric_train.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:888b454adf3d8d974f97db5eb2bba1964fd2a01899625a7569198654c4db73df
3
+ size 1899301
h36m_detailed/8/sample_original_test.xlsx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca4d06df4567333f2d18034b39df732c3f0ea390663d9ef1fc6724b797fef964
3
+ size 29585393
h36m_detailed/short-400ms/16/files/config-20230104_1806-id2293.yaml ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture_config:
2
+ model: CISTGCN_0
3
+ model_params:
4
+ input_n: 10
5
+ joints: 22
6
+ output_n: 10
7
+ n_txcnn_layers: 4
8
+ txc_kernel_size: 3
9
+ reduction: 8
10
+ hidden_dim: 64
11
+ input_gcn:
12
+ model_complexity:
13
+ - 16
14
+ - 16
15
+ - 16
16
+ - 16
17
+ interpretable:
18
+ - true
19
+ - true
20
+ - true
21
+ - true
22
+ - true
23
+ output_gcn:
24
+ model_complexity:
25
+ - 3
26
+ interpretable:
27
+ - true
28
+ clipping: 15
29
+ learning_config:
30
+ WarmUp: 100
31
+ normalize: false
32
+ dropout: 0.1
33
+ weight_decay: 1e-4
34
+ epochs: 50
35
+ lr: 0.01
36
+ # max_norm: 3
37
+ scheduler:
38
+ type: StepLR
39
+ params:
40
+ step_size: 3000
41
+ gamma: 0.8
42
+ loss:
43
+ weights: ""
44
+ type: "mpjpe"
45
+ augmentations:
46
+ random_scale:
47
+ x:
48
+ - 0.95
49
+ - 1.05
50
+ y:
51
+ - 0.90
52
+ - 1.10
53
+ z:
54
+ - 0.95
55
+ - 1.05
56
+ random_noise: ""
57
+ random_flip:
58
+ x: true
59
+ y: ""
60
+ z: true
61
+ random_rotation:
62
+ x:
63
+ - -5
64
+ - 5
65
+ y:
66
+ - -180
67
+ - 180
68
+ z:
69
+ - -5
70
+ - 5
71
+ random_translation:
72
+ x:
73
+ - -0.10
74
+ - 0.10
75
+ y:
76
+ - -0.10
77
+ - 0.10
78
+ z:
79
+ - -0.10
80
+ - 0.10
81
+ environment_config:
82
+ actions: all
83
+ protocol: "pro1" # only on ExPI 'pro1: common action split; 0-6: single action split; pro3: unseen action split'
84
+ evaluate_from: 0
85
+ is_norm: true
86
+ job: 16
87
+ sample_rate: 2
88
+ return_all_joints: true
89
+ save_grads: false
90
+ test_batch: 128
91
+ train_batch: 128
92
+ general_config:
93
+ data_dir: /ai-research/datasets/attention/ann_h3.6m/
94
+ experiment_name: short-STSGCN
95
+ load_model_path: ''
96
+ log_path: /ai-research/notebooks/testing_repos/logdir/
97
+ model_name_rel_path: short-STSGCN
98
+ save_all_intermediate_models: false
99
+ save_models: true
100
+ tensorboard:
101
+ num_mesh: 4
102
+ meta_config:
103
+ comment: Adding Benchmarking for H3.6M, AMASS, CMU and 3DPW, ExPI on our new architecture
104
+ project: Attention
105
+ task: 3d motion prediction on 18, 22 and 25 joints testing on 18 and 32 joints
106
+ version: 0.1.3
h36m_detailed/short-400ms/16/files/model.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ from ..layers import deformable_conv, SE
8
+
9
+ torch.manual_seed(0)
10
+
11
+
12
+ # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension)
13
+ class CNN_layer(nn.Module):
14
+ def __init__(self,
15
+ in_ch,
16
+ out_ch,
17
+ kernel_size,
18
+ dropout,
19
+ bias=True):
20
+ super(CNN_layer, self).__init__()
21
+ self.kernel_size = kernel_size
22
+ padding = (
23
+ (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained
24
+ assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1
25
+
26
+ self.block1 = [nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=(1, 1)),
27
+ nn.BatchNorm2d(out_ch),
28
+ nn.Dropout(dropout, inplace=True),
29
+ ]
30
+
31
+ self.block1 = nn.Sequential(*self.block1)
32
+
33
+ def forward(self, x):
34
+ output = self.block1(x)
35
+ return output
36
+
37
+
38
+ class FPN(nn.Module):
39
+ def __init__(self, in_ch,
40
+ out_ch,
41
+ kernel, # (3,1)
42
+ dropout,
43
+ reduction,
44
+ ):
45
+ super(FPN, self).__init__()
46
+ kernel_size = kernel if isinstance(kernel, (tuple, list)) else (kernel, kernel)
47
+ padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2)
48
+ pad1 = (padding[0], padding[1])
49
+ pad2 = (padding[0] + pad1[0], padding[1] + pad1[1])
50
+ pad3 = (padding[0] + pad2[0], padding[1] + pad2[1])
51
+ dil1 = (1, 1)
52
+ dil2 = (1 + pad1[0], 1 + pad1[1])
53
+ dil3 = (1 + pad2[0], 1 + pad2[1])
54
+ self.block1 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad1, dilation=dil1),
55
+ nn.BatchNorm2d(out_ch),
56
+ nn.Dropout(dropout, inplace=True),
57
+ nn.PReLU(),
58
+ )
59
+ self.block2 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad2, dilation=dil2),
60
+ nn.BatchNorm2d(out_ch),
61
+ nn.Dropout(dropout, inplace=True),
62
+ nn.PReLU(),
63
+ )
64
+ self.block3 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad3, dilation=dil3),
65
+ nn.BatchNorm2d(out_ch),
66
+ nn.Dropout(dropout, inplace=True),
67
+ nn.PReLU(),
68
+ )
69
+ self.pooling = nn.AdaptiveAvgPool2d((1, 1)) # Action Context.
70
+ self.compress = nn.Conv2d(out_ch * 3 + in_ch,
71
+ out_ch,
72
+ kernel_size=(1, 1)) # PRELU is outside the loop, check at the end of the code.
73
+
74
+ def forward(self, x):
75
+ b, dim, joints, seq = x.shape
76
+ global_action = F.interpolate(self.pooling(x), (joints, seq))
77
+ out = torch.cat((self.block1(x), self.block2(x), self.block3(x), global_action), dim=1)
78
+ out = self.compress(out)
79
+ return out
80
+
81
+
82
+ def mish(x):
83
+ return (x * torch.tanh(F.softplus(x)))
84
+
85
+
86
+ class ConvTemporalGraphical(nn.Module):
87
+ # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py
88
+ r"""The basic module for applying a graph convolution.
89
+ Args:
90
+ Shape:
91
+ - Input: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
92
+ - Output: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
93
+ where
94
+ :math:`N` is a batch size,
95
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
96
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
97
+ :math:`V` is the number of graph nodes.
98
+ """
99
+
100
+ def __init__(self, time_dim, joints_dim, domain, interpratable):
101
+ super(ConvTemporalGraphical, self).__init__()
102
+
103
+ if domain == "time":
104
+ # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix)
105
+ size = joints_dim
106
+ if not interpratable:
107
+ self.A = nn.Parameter(torch.FloatTensor(time_dim, size, size))
108
+ self.domain = 'nctv,tvw->nctw'
109
+ else:
110
+ self.domain = 'nctv,ntvw->nctw'
111
+ elif domain == "space":
112
+ size = time_dim
113
+ if not interpratable:
114
+ self.A = nn.Parameter(torch.FloatTensor(joints_dim, size, size))
115
+ self.domain = 'nctv,vtq->ncqv'
116
+ else:
117
+ self.domain = 'nctv,nvtq->ncqv'
118
+ if not interpratable:
119
+ stdv = 1. / math.sqrt(self.A.size(1))
120
+ self.A.data.uniform_(-stdv, stdv)
121
+
122
+ def forward(self, x):
123
+ x = torch.einsum(self.domain, (x, self.A))
124
+ return x.contiguous()
125
+
126
+
127
+ class Map2Adj(nn.Module):
128
+ def __init__(self,
129
+ in_ch,
130
+ time_dim,
131
+ joints_dim,
132
+ domain,
133
+ dropout,
134
+ ):
135
+ super(Map2Adj, self).__init__()
136
+ self.domain = domain
137
+ inter_ch = in_ch // 2
138
+ self.time_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
139
+ nn.BatchNorm2d(inter_ch),
140
+ nn.PReLU(),
141
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(time_dim, 1), bias=False),
142
+ nn.BatchNorm2d(inter_ch),
143
+ nn.Dropout(dropout, inplace=True),
144
+ nn.Conv2d(inter_ch, time_dim, kernel_size=1, bias=False),
145
+ )
146
+ self.joint_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
147
+ nn.BatchNorm2d(inter_ch),
148
+ nn.PReLU(),
149
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(1, joints_dim), bias=False),
150
+ nn.BatchNorm2d(inter_ch),
151
+ nn.Dropout(dropout, inplace=True),
152
+ nn.Conv2d(inter_ch, joints_dim, kernel_size=1, bias=False),
153
+ )
154
+
155
+ if self.domain == "space":
156
+ ch = joints_dim
157
+ self.perm1 = (0, 1, 2, 3)
158
+ self.perm2 = (0, 3, 2, 1)
159
+ if self.domain == "time":
160
+ ch = time_dim
161
+ self.perm1 = (0, 2, 1, 3)
162
+ self.perm2 = (0, 1, 2, 3)
163
+
164
+ inter_ch = ch # // 2
165
+ self.expansor = nn.Sequential(nn.Conv2d(ch, inter_ch, kernel_size=1, bias=False),
166
+ nn.BatchNorm2d(inter_ch),
167
+ nn.Dropout(dropout, inplace=True),
168
+ nn.PReLU(),
169
+ nn.Conv2d(inter_ch, ch, kernel_size=1, bias=False),
170
+ )
171
+ self.time_compress.apply(self._init_weights)
172
+ self.joint_compress.apply(self._init_weights)
173
+ self.expansor.apply(self._init_weights)
174
+
175
+ def _init_weights(self, m, gain=0.05):
176
+ if isinstance(m, nn.Linear):
177
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
178
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
179
+ torch.nn.init.xavier_normal_(m.weight, gain=gain)
180
+ if isinstance(m, nn.PReLU):
181
+ torch.nn.init.constant_(m.weight, 0.25)
182
+
183
+ def forward(self, x):
184
+ b, dims, seq, joints = x.shape
185
+ dim_seq = self.time_compress(x)
186
+ dim_space = self.joint_compress(x)
187
+ o = torch.matmul(dim_space.permute(self.perm1), dim_seq.permute(self.perm2))
188
+ Adj = self.expansor(o)
189
+ return Adj
190
+
191
+
192
+ class Domain_GCNN_layer(nn.Module):
193
+ """
194
+ Shape:
195
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
196
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
197
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
198
+ where
199
+ :math:`N` is a batch size,
200
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
201
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
202
+ :math:`V` is the number of graph nodes.
203
+ :in_ch= dimension of coordinates
204
+ : out_ch=dimension of coordinates
205
+ +
206
+ """
207
+
208
+ def __init__(self,
209
+ in_ch,
210
+ out_ch,
211
+ kernel_size,
212
+ stride,
213
+ time_dim,
214
+ joints_dim,
215
+ domain,
216
+ interpratable,
217
+ dropout,
218
+ bias=True):
219
+
220
+ super(Domain_GCNN_layer, self).__init__()
221
+ self.kernel_size = kernel_size
222
+ assert self.kernel_size[0] % 2 == 1
223
+ assert self.kernel_size[1] % 2 == 1
224
+ padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2)
225
+ self.interpratable = interpratable
226
+ self.domain = domain
227
+
228
+ self.gcn = ConvTemporalGraphical(time_dim, joints_dim, domain, interpratable)
229
+ self.tcn = nn.Sequential(nn.Conv2d(in_ch,
230
+ out_ch,
231
+ (self.kernel_size[0], self.kernel_size[1]),
232
+ (stride, stride),
233
+ padding,
234
+ ),
235
+ nn.BatchNorm2d(out_ch),
236
+ nn.Dropout(dropout, inplace=True),
237
+ )
238
+
239
+ if stride != 1 or in_ch != out_ch:
240
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
241
+ out_ch,
242
+ kernel_size=1,
243
+ stride=(1, 1)),
244
+ nn.BatchNorm2d(out_ch),
245
+ )
246
+ else:
247
+ self.residual = nn.Identity()
248
+ if self.interpratable:
249
+ self.map_to_adj = Map2Adj(in_ch,
250
+ time_dim,
251
+ joints_dim,
252
+ domain,
253
+ dropout,
254
+ )
255
+ else:
256
+ self.map_to_adj = nn.Identity()
257
+ self.prelu = nn.PReLU()
258
+
259
+ def forward(self, x):
260
+ # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size)
261
+ res = self.residual(x)
262
+ self.Adj = self.map_to_adj(x)
263
+ if self.interpratable:
264
+ self.gcn.A = self.Adj
265
+ x1 = self.gcn(x)
266
+ x2 = self.tcn(x1)
267
+ x3 = x2 + res
268
+ x4 = self.prelu(x3)
269
+ return x4
270
+
271
+
272
+ # Dynamic SpatioTemporal Decompose Graph Convolutions (DSTD-GC)
273
+ class DSTD_GC(nn.Module):
274
+ """
275
+ Shape:
276
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
277
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
278
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
279
+ where
280
+ :math:`N` is a batch size,
281
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
282
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
283
+ :math:`V` is the number of graph nodes.
284
+ : in_ch= dimension of coordinates
285
+ : out_ch=dimension of coordinates
286
+ +
287
+ """
288
+
289
+ def __init__(self,
290
+ in_ch,
291
+ out_ch,
292
+ interpratable,
293
+ kernel_size,
294
+ stride,
295
+ time_dim,
296
+ joints_dim,
297
+ reduction,
298
+ dropout):
299
+ super(DSTD_GC, self).__init__()
300
+ self.dsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
301
+ time_dim, joints_dim, "space", interpratable, dropout)
302
+ self.tsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
303
+ time_dim, joints_dim, "time", interpratable, dropout)
304
+
305
+ self.compressor = nn.Sequential(nn.Conv2d(out_ch * 2, out_ch, 1, bias=False),
306
+ nn.BatchNorm2d(out_ch),
307
+ nn.PReLU(),
308
+ SE.SELayer2d(out_ch, reduction=reduction),
309
+ )
310
+ if stride != 1 or in_ch != out_ch:
311
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
312
+ out_ch,
313
+ kernel_size=1,
314
+ stride=(1, 1)),
315
+ nn.BatchNorm2d(out_ch),
316
+ )
317
+ else:
318
+ self.residual = nn.Identity()
319
+
320
+ # Weighting features
321
+ out_ch_c = out_ch // 2 if out_ch // 2 > 1 else 1
322
+ self.global_norm = nn.BatchNorm2d(in_ch)
323
+ self.conv_s = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
324
+ nn.BatchNorm2d(out_ch_c),
325
+ nn.Dropout(dropout, inplace=True),
326
+ nn.PReLU(),
327
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
328
+ nn.BatchNorm2d(out_ch),
329
+ nn.Dropout(dropout, inplace=True),
330
+ nn.PReLU(),
331
+ )
332
+ self.conv_t = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
333
+ nn.BatchNorm2d(out_ch_c),
334
+ nn.Dropout(dropout, inplace=True),
335
+ nn.PReLU(),
336
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
337
+ nn.BatchNorm2d(out_ch),
338
+ nn.Dropout(dropout, inplace=True),
339
+ nn.PReLU(),
340
+ )
341
+ self.map_s = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
342
+ nn.BatchNorm1d(out_ch),
343
+ nn.Dropout(dropout, inplace=True),
344
+ nn.PReLU(),
345
+ nn.Linear(out_ch, out_ch, bias=False),
346
+ )
347
+ self.map_t = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
348
+ nn.BatchNorm1d(out_ch),
349
+ nn.Dropout(dropout, inplace=True),
350
+ nn.PReLU(),
351
+ nn.Linear(out_ch, out_ch, bias=False),
352
+ )
353
+ self.prelu1 = nn.Sequential(nn.BatchNorm2d(out_ch),
354
+ nn.PReLU(),
355
+ )
356
+ self.prelu2 = nn.Sequential(nn.BatchNorm2d(out_ch),
357
+ nn.PReLU(),
358
+ )
359
+
360
+ def _get_stats_(self, x):
361
+ global_avg_pool = x.mean((3, 2)).mean(1, keepdims=True)
362
+ global_avg_pool_features = x.mean(3).mean(1)
363
+ global_std_pool = x.std((3, 2)).std(1, keepdims=True)
364
+ global_std_pool_features = x.std(3).std(1)
365
+ return torch.cat((
366
+ global_avg_pool,
367
+ global_avg_pool_features,
368
+ global_std_pool,
369
+ global_std_pool_features,
370
+ ),
371
+ dim=1)
372
+
373
+ def forward(self, x):
374
+ b, dim, seq, joints = x.shape # 64, 3, 10, 22
375
+ xn = self.global_norm(x)
376
+
377
+ stats = self._get_stats_(xn)
378
+ w1 = torch.cat((self.conv_s(xn).view(b, -1), stats), dim=1)
379
+ stats = self._get_stats_(xn)
380
+ w2 = torch.cat((self.conv_t(xn).view(b, -1), stats), dim=1)
381
+ self.w1 = self.map_s(w1)
382
+ self.w2 = self.map_t(w2)
383
+ w1 = self.w1[..., None, None]
384
+ w2 = self.w2[..., None, None]
385
+
386
+ x1 = self.dsgn(xn)
387
+ x2 = self.tsgn(xn)
388
+ out = torch.cat((self.prelu1(w1 * x1), self.prelu2(w2 * x2)), dim=1)
389
+ out = self.compressor(out)
390
+ return torch.clip(out + self.residual(xn), -1e5, 1e5)
391
+
392
+
393
+ class ContextLayer(nn.Module):
394
+ def __init__(self,
395
+ in_ch,
396
+ hidden_ch,
397
+ output_seq,
398
+ input_seq,
399
+ joints,
400
+ dims=3,
401
+ reduction=8,
402
+ dropout=0.1,
403
+ ):
404
+ super(ContextLayer, self).__init__()
405
+ self.n_output = output_seq
406
+ self.n_joints = joints
407
+ self.n_input = input_seq
408
+ self.context_conv1 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
409
+ nn.BatchNorm2d(hidden_ch),
410
+ nn.PReLU(),
411
+ )
412
+
413
+ self.context_conv2 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, (input_seq, 1), bias=False),
414
+ nn.BatchNorm2d(hidden_ch),
415
+ nn.PReLU(),
416
+ )
417
+ self.context_conv3 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
418
+ nn.BatchNorm2d(hidden_ch),
419
+ nn.PReLU(),
420
+ )
421
+ self.map1 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
422
+ nn.Dropout(dropout, inplace=True),
423
+ nn.PReLU(),
424
+ )
425
+ self.map2 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
426
+ nn.Dropout(dropout, inplace=True),
427
+ nn.PReLU(),
428
+ )
429
+ self.map3 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
430
+ nn.Dropout(dropout, inplace=True),
431
+ nn.PReLU(),
432
+ )
433
+
434
+ self.fmap_s = nn.Sequential(nn.Linear(self.n_output * 3, self.n_joints, bias=False),
435
+ nn.BatchNorm1d(self.n_joints),
436
+ nn.Dropout(dropout, inplace=True), )
437
+
438
+ self.fmap_t = nn.Sequential(nn.Linear(self.n_output * 3, self.n_output, bias=False),
439
+ nn.BatchNorm1d(self.n_output),
440
+ nn.Dropout(dropout, inplace=True), )
441
+
442
+ # inter_ch = self.n_joints # // 2
443
+ self.norm_map = nn.Sequential(nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
444
+ nn.BatchNorm1d(self.n_output),
445
+ nn.Dropout(dropout, inplace=True),
446
+ nn.PReLU(),
447
+ SE.SELayer1d(self.n_output, reduction=reduction),
448
+ nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
449
+ nn.BatchNorm1d(self.n_output),
450
+ nn.Dropout(dropout, inplace=True),
451
+ nn.PReLU(),
452
+ )
453
+
454
+ self.fconv = nn.Sequential(nn.Conv2d(1, dims, 1, bias=False),
455
+ nn.BatchNorm2d(dims),
456
+ nn.PReLU(),
457
+ nn.Conv2d(dims, dims, 1, bias=False),
458
+ nn.BatchNorm2d(dims),
459
+ nn.PReLU(),
460
+ )
461
+ self.SE = SE.SELayer2d(self.n_output, reduction=reduction)
462
+
463
+ def forward(self, x):
464
+ b, _, seq, joint_dim = x.shape
465
+ y1 = self.context_conv1(x).max(-1)[0].max(-1)[0]
466
+ y2 = self.context_conv2(x).view(b, -1, joint_dim).max(-1)[0]
467
+ ym = self.context_conv3(x).mean((2, 3))
468
+ y = torch.cat((self.map1(y1), self.map2(y2), self.map3(ym)), dim=1)
469
+ self.joints = self.fmap_s(y)
470
+ self.displacements = self.fmap_t(y) # .cumsum(1)
471
+ self.seq_joints = torch.bmm(self.displacements.unsqueeze(2), self.joints.unsqueeze(1))
472
+ self.seq_joints_n = self.norm_map(self.seq_joints)
473
+ self.seq_joints_dims = self.fconv(self.seq_joints_n.view(b, 1, self.n_output, self.n_joints))
474
+ o = self.SE(self.seq_joints_dims.permute(0, 2, 3, 1))
475
+ return o
476
+
477
+
478
+ class CISTGCN(nn.Module):
479
+ """
480
+ Shape:
481
+ - Input[0]: Input sequence in :math:`(N, in_ch,T_in, V)` format
482
+ - Output[0]: Output sequence in :math:`(N,T_out,in_ch, V)` format
483
+ where
484
+ :math:`N` is a batch size,
485
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
486
+ :math:`V` is the number of graph nodes.
487
+ :in_ch=number of channels for the coordiantes(default=3)
488
+ +
489
+ """
490
+
491
+ def __init__(self, arch, learn):
492
+ super(CISTGCN, self).__init__()
493
+ self.clipping = arch.model_params.clipping
494
+
495
+ self.n_input = arch.model_params.input_n
496
+ self.n_output = arch.model_params.output_n
497
+ self.n_joints = arch.model_params.joints
498
+ self.n_txcnn_layers = arch.model_params.n_txcnn_layers
499
+ self.txc_kernel_size = [arch.model_params.txc_kernel_size] * 2
500
+ self.input_gcn = arch.model_params.input_gcn
501
+ self.output_gcn = arch.model_params.output_gcn
502
+ self.reduction = arch.model_params.reduction
503
+ self.hidden_dim = arch.model_params.hidden_dim
504
+
505
+ self.st_gcnns = nn.ModuleList()
506
+ self.txcnns = nn.ModuleList()
507
+ self.se = nn.ModuleList()
508
+
509
+ self.in_conv = nn.ModuleList()
510
+ self.context_layer = nn.ModuleList()
511
+ self.trans = nn.ModuleList()
512
+ self.in_ch = 10
513
+ self.model_tx = self.input_gcn.model_complexity.copy()
514
+ self.model_tx.insert(0, 1) # add 1 in the position 0.
515
+
516
+ self.input_gcn.model_complexity.insert(0, self.in_ch)
517
+ self.input_gcn.model_complexity.append(self.in_ch)
518
+ # self.input_gcn.interpretable.insert(0, True)
519
+ # self.input_gcn.interpretable.append(False)
520
+ for i in range(len(self.input_gcn.model_complexity) - 1):
521
+ self.st_gcnns.append(DSTD_GC(self.input_gcn.model_complexity[i],
522
+ self.input_gcn.model_complexity[i + 1],
523
+ self.input_gcn.interpretable[i],
524
+ [1, 1], 1, self.n_input, self.n_joints, self.reduction, learn.dropout))
525
+
526
+ self.context_layer = ContextLayer(1, self.hidden_dim,
527
+ self.n_output, self.n_output, self.n_joints,
528
+ 3, self.reduction, learn.dropout
529
+ )
530
+
531
+ # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V)
532
+ # with kernel_size[3,3] the dimensions of C,V will be maintained
533
+ self.txcnns.append(FPN(self.n_input, self.n_output, self.txc_kernel_size, 0., self.reduction))
534
+ for i in range(1, self.n_txcnn_layers):
535
+ self.txcnns.append(FPN(self.n_output, self.n_output, self.txc_kernel_size, 0., self.reduction))
536
+
537
+ self.prelus = nn.ModuleList()
538
+ for j in range(self.n_txcnn_layers):
539
+ self.prelus.append(nn.PReLU())
540
+
541
+ self.dim_conversor = nn.Sequential(nn.Conv2d(self.in_ch, 3, 1, bias=False),
542
+ nn.BatchNorm2d(3),
543
+ nn.PReLU(),
544
+ nn.Conv2d(3, 3, 1, bias=False),
545
+ nn.PReLU(3), )
546
+
547
+ self.st_gcnns_o = nn.ModuleList()
548
+ self.output_gcn.model_complexity.insert(0, 3)
549
+ for i in range(len(self.output_gcn.model_complexity) - 1):
550
+ self.st_gcnns_o.append(DSTD_GC(self.output_gcn.model_complexity[i],
551
+ self.output_gcn.model_complexity[i + 1],
552
+ self.output_gcn.interpretable[i],
553
+ [1, 1], 1, self.n_joints, self.n_output, self.reduction, learn.dropout))
554
+
555
+ self.st_gcnns_o.apply(self._init_weights)
556
+ self.st_gcnns.apply(self._init_weights)
557
+ self.txcnns.apply(self._init_weights)
558
+
559
+ def _init_weights(self, m, gain=0.1):
560
+ if isinstance(m, nn.Linear):
561
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
562
+ # if isinstance(m, (nn.Conv2d, nn.Conv1d)):
563
+ # torch.nn.init.xavier_normal_(m.weight, gain=gain)
564
+ if isinstance(m, nn.PReLU):
565
+ torch.nn.init.constant_(m.weight, 0.25)
566
+
567
+ def forward(self, x):
568
+ b, seq, joints, dim = x.shape
569
+ vel = torch.zeros_like(x)
570
+ vel[:, :-1] = torch.diff(x, dim=1)
571
+ vel[:, -1] = x[:, -1]
572
+ acc = torch.zeros_like(x)
573
+ acc[:, :-1] = torch.diff(vel, dim=1)
574
+ acc[:, -1] = vel[:, -1]
575
+ x1 = torch.cat((x, acc, vel, torch.norm(vel, dim=-1, keepdim=True)), dim=-1)
576
+ x2 = x1.permute((0, 3, 1, 2)) # (torch.Size([64, 10, 22, 7])
577
+ x3 = x2
578
+
579
+ for i in range(len(self.st_gcnns)):
580
+ x3 = self.st_gcnns[i](x3)
581
+
582
+ x5 = x3.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV)
583
+
584
+ x6 = self.prelus[0](self.txcnns[0](x5))
585
+ for i in range(1, self.n_txcnn_layers):
586
+ x6 = self.prelus[i](self.txcnns[i](x6)) + x6 # residual connection
587
+
588
+ x6 = self.dim_conversor(x6.permute(0, 2, 1, 3)).permute(0, 2, 3, 1)
589
+ x7 = x6.cumsum(1)
590
+
591
+ act = self.context_layer(x7.reshape(b, 1, self.n_output, joints * x7.shape[-1]))
592
+ x8 = x7.permute(0, 3, 2, 1)
593
+ for i in range(len(self.st_gcnns_o)):
594
+ x8 = self.st_gcnns_o[i](x8)
595
+ x9 = x8.permute(0, 3, 2, 1) + act
596
+
597
+ return x[:, -1:] + x9,
h36m_detailed/short-400ms/16/files/short-STSGCN-20230104_1806-id2293_best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c161bc7186d800db0d372133d13ac4bdf01ca89ca7d165e22386890088e64e6
3
+ size 3827665
h36m_detailed/short-400ms/16/files/short-STSGCN-20230104_1806-id2293_last.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c161bc7186d800db0d372133d13ac4bdf01ca89ca7d165e22386890088e64e6
3
+ size 3827665
h36m_detailed/short-400ms/32/files/config-20230105_1400-id6760.yaml ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ architecture_config:
2
+ model: CISTGCN_0
3
+ model_params:
4
+ input_n: 10
5
+ joints: 22
6
+ output_n: 10
7
+ n_txcnn_layers: 4
8
+ txc_kernel_size: 3
9
+ reduction: 8
10
+ hidden_dim: 64
11
+ input_gcn:
12
+ model_complexity:
13
+ - 32
14
+ - 32
15
+ - 32
16
+ - 32
17
+ interpretable:
18
+ - true
19
+ - true
20
+ - true
21
+ - true
22
+ - true
23
+ output_gcn:
24
+ model_complexity:
25
+ - 3
26
+ interpretable:
27
+ - true
28
+ clipping: 15
29
+ learning_config:
30
+ WarmUp: 100
31
+ normalize: false
32
+ dropout: 0.1
33
+ weight_decay: 1e-4
34
+ epochs: 50
35
+ lr: 0.01
36
+ # max_norm: 3
37
+ scheduler:
38
+ type: StepLR
39
+ params:
40
+ step_size: 3000
41
+ gamma: 0.8
42
+ loss:
43
+ weights: ""
44
+ type: "mpjpe"
45
+ augmentations:
46
+ random_scale:
47
+ x:
48
+ - 0.95
49
+ - 1.05
50
+ y:
51
+ - 0.90
52
+ - 1.10
53
+ z:
54
+ - 0.95
55
+ - 1.05
56
+ random_noise: ""
57
+ random_flip:
58
+ x: true
59
+ y: ""
60
+ z: true
61
+ random_rotation:
62
+ x:
63
+ - -5
64
+ - 5
65
+ y:
66
+ - -180
67
+ - 180
68
+ z:
69
+ - -5
70
+ - 5
71
+ random_translation:
72
+ x:
73
+ - -0.10
74
+ - 0.10
75
+ y:
76
+ - -0.10
77
+ - 0.10
78
+ z:
79
+ - -0.10
80
+ - 0.10
81
+ environment_config:
82
+ actions: all
83
+ evaluate_from: 0
84
+ is_norm: true
85
+ job: 16
86
+ sample_rate: 2
87
+ return_all_joints: true
88
+ save_grads: false
89
+ test_batch: 128
90
+ train_batch: 128
91
+ general_config:
92
+ data_dir: /ai-research/datasets/attention/ann_h3.6m/
93
+ experiment_name: short-STSGCN
94
+ load_model_path: ''
95
+ log_path: /ai-research/notebooks/testing_repos/logdir/
96
+ model_name_rel_path: short-STSGCN
97
+ save_all_intermediate_models: false
98
+ save_models: true
99
+ tensorboard:
100
+ num_mesh: 4
101
+ meta_config:
102
+ comment: Adding Benchmarking for H3.6M, AMASS, CMU and 3DPW, ExPI on our new architecture
103
+ project: Attention
104
+ task: 3d motion prediction on 18, 22 and 25 joints testing on 18 and 32 joints
105
+ version: 0.1.3
h36m_detailed/short-400ms/32/files/model.py ADDED
@@ -0,0 +1,597 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
+ import torch
4
+ import torch.nn as nn
5
+ from torch.nn import functional as F
6
+
7
+ from ..layers import deformable_conv, SE
8
+
9
+ torch.manual_seed(0)
10
+
11
+
12
+ # This is the simple CNN layer,that performs a 2-D convolution while maintaining the dimensions of the input(except for the features dimension)
13
+ class CNN_layer(nn.Module):
14
+ def __init__(self,
15
+ in_ch,
16
+ out_ch,
17
+ kernel_size,
18
+ dropout,
19
+ bias=True):
20
+ super(CNN_layer, self).__init__()
21
+ self.kernel_size = kernel_size
22
+ padding = (
23
+ (kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2) # padding so that both dimensions are maintained
24
+ assert kernel_size[0] % 2 == 1 and kernel_size[1] % 2 == 1
25
+
26
+ self.block1 = [nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=padding, dilation=(1, 1)),
27
+ nn.BatchNorm2d(out_ch),
28
+ nn.Dropout(dropout, inplace=True),
29
+ ]
30
+
31
+ self.block1 = nn.Sequential(*self.block1)
32
+
33
+ def forward(self, x):
34
+ output = self.block1(x)
35
+ return output
36
+
37
+
38
+ class FPN(nn.Module):
39
+ def __init__(self, in_ch,
40
+ out_ch,
41
+ kernel, # (3,1)
42
+ dropout,
43
+ reduction,
44
+ ):
45
+ super(FPN, self).__init__()
46
+ kernel_size = kernel if isinstance(kernel, (tuple, list)) else (kernel, kernel)
47
+ padding = ((kernel_size[0] - 1) // 2, (kernel_size[1] - 1) // 2)
48
+ pad1 = (padding[0], padding[1])
49
+ pad2 = (padding[0] + pad1[0], padding[1] + pad1[1])
50
+ pad3 = (padding[0] + pad2[0], padding[1] + pad2[1])
51
+ dil1 = (1, 1)
52
+ dil2 = (1 + pad1[0], 1 + pad1[1])
53
+ dil3 = (1 + pad2[0], 1 + pad2[1])
54
+ self.block1 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad1, dilation=dil1),
55
+ nn.BatchNorm2d(out_ch),
56
+ nn.Dropout(dropout, inplace=True),
57
+ nn.PReLU(),
58
+ )
59
+ self.block2 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad2, dilation=dil2),
60
+ nn.BatchNorm2d(out_ch),
61
+ nn.Dropout(dropout, inplace=True),
62
+ nn.PReLU(),
63
+ )
64
+ self.block3 = nn.Sequential(nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, padding=pad3, dilation=dil3),
65
+ nn.BatchNorm2d(out_ch),
66
+ nn.Dropout(dropout, inplace=True),
67
+ nn.PReLU(),
68
+ )
69
+ self.pooling = nn.AdaptiveAvgPool2d((1, 1)) # Action Context.
70
+ self.compress = nn.Conv2d(out_ch * 3 + in_ch,
71
+ out_ch,
72
+ kernel_size=(1, 1)) # PRELU is outside the loop, check at the end of the code.
73
+
74
+ def forward(self, x):
75
+ b, dim, joints, seq = x.shape
76
+ global_action = F.interpolate(self.pooling(x), (joints, seq))
77
+ out = torch.cat((self.block1(x), self.block2(x), self.block3(x), global_action), dim=1)
78
+ out = self.compress(out)
79
+ return out
80
+
81
+
82
+ def mish(x):
83
+ return (x * torch.tanh(F.softplus(x)))
84
+
85
+
86
+ class ConvTemporalGraphical(nn.Module):
87
+ # Source : https://github.com/yysijie/st-gcn/blob/master/net/st_gcn.py
88
+ r"""The basic module for applying a graph convolution.
89
+ Args:
90
+ Shape:
91
+ - Input: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
92
+ - Output: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
93
+ where
94
+ :math:`N` is a batch size,
95
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
96
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
97
+ :math:`V` is the number of graph nodes.
98
+ """
99
+
100
+ def __init__(self, time_dim, joints_dim, domain, interpratable):
101
+ super(ConvTemporalGraphical, self).__init__()
102
+
103
+ if domain == "time":
104
+ # learnable, graph-agnostic 3-d adjacency matrix(or edge importance matrix)
105
+ size = joints_dim
106
+ if not interpratable:
107
+ self.A = nn.Parameter(torch.FloatTensor(time_dim, size, size))
108
+ self.domain = 'nctv,tvw->nctw'
109
+ else:
110
+ self.domain = 'nctv,ntvw->nctw'
111
+ elif domain == "space":
112
+ size = time_dim
113
+ if not interpratable:
114
+ self.A = nn.Parameter(torch.FloatTensor(joints_dim, size, size))
115
+ self.domain = 'nctv,vtq->ncqv'
116
+ else:
117
+ self.domain = 'nctv,nvtq->ncqv'
118
+ if not interpratable:
119
+ stdv = 1. / math.sqrt(self.A.size(1))
120
+ self.A.data.uniform_(-stdv, stdv)
121
+
122
+ def forward(self, x):
123
+ x = torch.einsum(self.domain, (x, self.A))
124
+ return x.contiguous()
125
+
126
+
127
+ class Map2Adj(nn.Module):
128
+ def __init__(self,
129
+ in_ch,
130
+ time_dim,
131
+ joints_dim,
132
+ domain,
133
+ dropout,
134
+ ):
135
+ super(Map2Adj, self).__init__()
136
+ self.domain = domain
137
+ inter_ch = in_ch // 2
138
+ self.time_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
139
+ nn.BatchNorm2d(inter_ch),
140
+ nn.PReLU(),
141
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(time_dim, 1), bias=False),
142
+ nn.BatchNorm2d(inter_ch),
143
+ nn.Dropout(dropout, inplace=True),
144
+ nn.Conv2d(inter_ch, time_dim, kernel_size=1, bias=False),
145
+ )
146
+ self.joint_compress = nn.Sequential(nn.Conv2d(in_ch, inter_ch, kernel_size=1, bias=False),
147
+ nn.BatchNorm2d(inter_ch),
148
+ nn.PReLU(),
149
+ nn.Conv2d(inter_ch, inter_ch, kernel_size=(1, joints_dim), bias=False),
150
+ nn.BatchNorm2d(inter_ch),
151
+ nn.Dropout(dropout, inplace=True),
152
+ nn.Conv2d(inter_ch, joints_dim, kernel_size=1, bias=False),
153
+ )
154
+
155
+ if self.domain == "space":
156
+ ch = joints_dim
157
+ self.perm1 = (0, 1, 2, 3)
158
+ self.perm2 = (0, 3, 2, 1)
159
+ if self.domain == "time":
160
+ ch = time_dim
161
+ self.perm1 = (0, 2, 1, 3)
162
+ self.perm2 = (0, 1, 2, 3)
163
+
164
+ inter_ch = ch # // 2
165
+ self.expansor = nn.Sequential(nn.Conv2d(ch, inter_ch, kernel_size=1, bias=False),
166
+ nn.BatchNorm2d(inter_ch),
167
+ nn.Dropout(dropout, inplace=True),
168
+ nn.PReLU(),
169
+ nn.Conv2d(inter_ch, ch, kernel_size=1, bias=False),
170
+ )
171
+ self.time_compress.apply(self._init_weights)
172
+ self.joint_compress.apply(self._init_weights)
173
+ self.expansor.apply(self._init_weights)
174
+
175
+ def _init_weights(self, m, gain=0.05):
176
+ if isinstance(m, nn.Linear):
177
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
178
+ if isinstance(m, (nn.Conv2d, nn.Conv1d)):
179
+ torch.nn.init.xavier_normal_(m.weight, gain=gain)
180
+ if isinstance(m, nn.PReLU):
181
+ torch.nn.init.constant_(m.weight, 0.25)
182
+
183
+ def forward(self, x):
184
+ b, dims, seq, joints = x.shape
185
+ dim_seq = self.time_compress(x)
186
+ dim_space = self.joint_compress(x)
187
+ o = torch.matmul(dim_space.permute(self.perm1), dim_seq.permute(self.perm2))
188
+ Adj = self.expansor(o)
189
+ return Adj
190
+
191
+
192
+ class Domain_GCNN_layer(nn.Module):
193
+ """
194
+ Shape:
195
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
196
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
197
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
198
+ where
199
+ :math:`N` is a batch size,
200
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
201
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
202
+ :math:`V` is the number of graph nodes.
203
+ :in_ch= dimension of coordinates
204
+ : out_ch=dimension of coordinates
205
+ +
206
+ """
207
+
208
+ def __init__(self,
209
+ in_ch,
210
+ out_ch,
211
+ kernel_size,
212
+ stride,
213
+ time_dim,
214
+ joints_dim,
215
+ domain,
216
+ interpratable,
217
+ dropout,
218
+ bias=True):
219
+
220
+ super(Domain_GCNN_layer, self).__init__()
221
+ self.kernel_size = kernel_size
222
+ assert self.kernel_size[0] % 2 == 1
223
+ assert self.kernel_size[1] % 2 == 1
224
+ padding = ((self.kernel_size[0] - 1) // 2, (self.kernel_size[1] - 1) // 2)
225
+ self.interpratable = interpratable
226
+ self.domain = domain
227
+
228
+ self.gcn = ConvTemporalGraphical(time_dim, joints_dim, domain, interpratable)
229
+ self.tcn = nn.Sequential(nn.Conv2d(in_ch,
230
+ out_ch,
231
+ (self.kernel_size[0], self.kernel_size[1]),
232
+ (stride, stride),
233
+ padding,
234
+ ),
235
+ nn.BatchNorm2d(out_ch),
236
+ nn.Dropout(dropout, inplace=True),
237
+ )
238
+
239
+ if stride != 1 or in_ch != out_ch:
240
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
241
+ out_ch,
242
+ kernel_size=1,
243
+ stride=(1, 1)),
244
+ nn.BatchNorm2d(out_ch),
245
+ )
246
+ else:
247
+ self.residual = nn.Identity()
248
+ if self.interpratable:
249
+ self.map_to_adj = Map2Adj(in_ch,
250
+ time_dim,
251
+ joints_dim,
252
+ domain,
253
+ dropout,
254
+ )
255
+ else:
256
+ self.map_to_adj = nn.Identity()
257
+ self.prelu = nn.PReLU()
258
+
259
+ def forward(self, x):
260
+ # assert A.shape[0] == self.kernel_size[1], print(A.shape[0],self.kernel_size)
261
+ res = self.residual(x)
262
+ self.Adj = self.map_to_adj(x)
263
+ if self.interpratable:
264
+ self.gcn.A = self.Adj
265
+ x1 = self.gcn(x)
266
+ x2 = self.tcn(x1)
267
+ x3 = x2 + res
268
+ x4 = self.prelu(x3)
269
+ return x4
270
+
271
+
272
+ # Dynamic SpatioTemporal Decompose Graph Convolutions (DSTD-GC)
273
+ class DSTD_GC(nn.Module):
274
+ """
275
+ Shape:
276
+ - Input[0]: Input graph sequence in :math:`(N, in_ch, T_{in}, V)` format
277
+ - Input[1]: Input graph adjacency matrix in :math:`(K, V, V)` format
278
+ - Output[0]: Outpu graph sequence in :math:`(N, out_ch, T_{out}, V)` format
279
+ where
280
+ :math:`N` is a batch size,
281
+ :math:`K` is the spatial kernel size, as :math:`K == kernel_size[1]`,
282
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
283
+ :math:`V` is the number of graph nodes.
284
+ : in_ch= dimension of coordinates
285
+ : out_ch=dimension of coordinates
286
+ +
287
+ """
288
+
289
+ def __init__(self,
290
+ in_ch,
291
+ out_ch,
292
+ interpratable,
293
+ kernel_size,
294
+ stride,
295
+ time_dim,
296
+ joints_dim,
297
+ reduction,
298
+ dropout):
299
+ super(DSTD_GC, self).__init__()
300
+ self.dsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
301
+ time_dim, joints_dim, "space", interpratable, dropout)
302
+ self.tsgn = Domain_GCNN_layer(in_ch, out_ch, kernel_size, stride,
303
+ time_dim, joints_dim, "time", interpratable, dropout)
304
+
305
+ self.compressor = nn.Sequential(nn.Conv2d(out_ch * 2, out_ch, 1, bias=False),
306
+ nn.BatchNorm2d(out_ch),
307
+ nn.PReLU(),
308
+ SE.SELayer2d(out_ch, reduction=reduction),
309
+ )
310
+ if stride != 1 or in_ch != out_ch:
311
+ self.residual = nn.Sequential(nn.Conv2d(in_ch,
312
+ out_ch,
313
+ kernel_size=1,
314
+ stride=(1, 1)),
315
+ nn.BatchNorm2d(out_ch),
316
+ )
317
+ else:
318
+ self.residual = nn.Identity()
319
+
320
+ # Weighting features
321
+ out_ch_c = out_ch // 2 if out_ch // 2 > 1 else 1
322
+ self.global_norm = nn.BatchNorm2d(in_ch)
323
+ self.conv_s = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
324
+ nn.BatchNorm2d(out_ch_c),
325
+ nn.Dropout(dropout, inplace=True),
326
+ nn.PReLU(),
327
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
328
+ nn.BatchNorm2d(out_ch),
329
+ nn.Dropout(dropout, inplace=True),
330
+ nn.PReLU(),
331
+ )
332
+ self.conv_t = nn.Sequential(nn.Conv2d(in_ch, out_ch_c, (time_dim, 1), bias=False),
333
+ nn.BatchNorm2d(out_ch_c),
334
+ nn.Dropout(dropout, inplace=True),
335
+ nn.PReLU(),
336
+ nn.Conv2d(out_ch_c, out_ch, (1, joints_dim), bias=False),
337
+ nn.BatchNorm2d(out_ch),
338
+ nn.Dropout(dropout, inplace=True),
339
+ nn.PReLU(),
340
+ )
341
+ self.map_s = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
342
+ nn.BatchNorm1d(out_ch),
343
+ nn.Dropout(dropout, inplace=True),
344
+ nn.PReLU(),
345
+ nn.Linear(out_ch, out_ch, bias=False),
346
+ )
347
+ self.map_t = nn.Sequential(nn.Linear(out_ch + 2 + time_dim * 2, out_ch, bias=False),
348
+ nn.BatchNorm1d(out_ch),
349
+ nn.Dropout(dropout, inplace=True),
350
+ nn.PReLU(),
351
+ nn.Linear(out_ch, out_ch, bias=False),
352
+ )
353
+ self.prelu1 = nn.Sequential(nn.BatchNorm2d(out_ch),
354
+ nn.PReLU(),
355
+ )
356
+ self.prelu2 = nn.Sequential(nn.BatchNorm2d(out_ch),
357
+ nn.PReLU(),
358
+ )
359
+
360
+ def _get_stats_(self, x):
361
+ global_avg_pool = x.mean((3, 2)).mean(1, keepdims=True)
362
+ global_avg_pool_features = x.mean(3).mean(1)
363
+ global_std_pool = x.std((3, 2)).std(1, keepdims=True)
364
+ global_std_pool_features = x.std(3).std(1)
365
+ return torch.cat((
366
+ global_avg_pool,
367
+ global_avg_pool_features,
368
+ global_std_pool,
369
+ global_std_pool_features,
370
+ ),
371
+ dim=1)
372
+
373
+ def forward(self, x):
374
+ b, dim, seq, joints = x.shape # 64, 3, 10, 22
375
+ xn = self.global_norm(x)
376
+
377
+ stats = self._get_stats_(xn)
378
+ w1 = torch.cat((self.conv_s(xn).view(b, -1), stats), dim=1)
379
+ stats = self._get_stats_(xn)
380
+ w2 = torch.cat((self.conv_t(xn).view(b, -1), stats), dim=1)
381
+ self.w1 = self.map_s(w1)
382
+ self.w2 = self.map_t(w2)
383
+ w1 = self.w1[..., None, None]
384
+ w2 = self.w2[..., None, None]
385
+
386
+ x1 = self.dsgn(xn)
387
+ x2 = self.tsgn(xn)
388
+ out = torch.cat((self.prelu1(w1 * x1), self.prelu2(w2 * x2)), dim=1)
389
+ out = self.compressor(out)
390
+ return torch.clip(out + self.residual(xn), -1e5, 1e5)
391
+
392
+
393
+ class ContextLayer(nn.Module):
394
+ def __init__(self,
395
+ in_ch,
396
+ hidden_ch,
397
+ output_seq,
398
+ input_seq,
399
+ joints,
400
+ dims=3,
401
+ reduction=8,
402
+ dropout=0.1,
403
+ ):
404
+ super(ContextLayer, self).__init__()
405
+ self.n_output = output_seq
406
+ self.n_joints = joints
407
+ self.n_input = input_seq
408
+ self.context_conv1 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
409
+ nn.BatchNorm2d(hidden_ch),
410
+ nn.PReLU(),
411
+ )
412
+
413
+ self.context_conv2 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, (input_seq, 1), bias=False),
414
+ nn.BatchNorm2d(hidden_ch),
415
+ nn.PReLU(),
416
+ )
417
+ self.context_conv3 = nn.Sequential(nn.Conv2d(in_ch, hidden_ch, 1, bias=False),
418
+ nn.BatchNorm2d(hidden_ch),
419
+ nn.PReLU(),
420
+ )
421
+ self.map1 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
422
+ nn.Dropout(dropout, inplace=True),
423
+ nn.PReLU(),
424
+ )
425
+ self.map2 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
426
+ nn.Dropout(dropout, inplace=True),
427
+ nn.PReLU(),
428
+ )
429
+ self.map3 = nn.Sequential(nn.Linear(hidden_ch, self.n_output, bias=False),
430
+ nn.Dropout(dropout, inplace=True),
431
+ nn.PReLU(),
432
+ )
433
+
434
+ self.fmap_s = nn.Sequential(nn.Linear(self.n_output * 3, self.n_joints, bias=False),
435
+ nn.BatchNorm1d(self.n_joints),
436
+ nn.Dropout(dropout, inplace=True), )
437
+
438
+ self.fmap_t = nn.Sequential(nn.Linear(self.n_output * 3, self.n_output, bias=False),
439
+ nn.BatchNorm1d(self.n_output),
440
+ nn.Dropout(dropout, inplace=True), )
441
+
442
+ # inter_ch = self.n_joints # // 2
443
+ self.norm_map = nn.Sequential(nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
444
+ nn.BatchNorm1d(self.n_output),
445
+ nn.Dropout(dropout, inplace=True),
446
+ nn.PReLU(),
447
+ SE.SELayer1d(self.n_output, reduction=reduction),
448
+ nn.Conv1d(self.n_output, self.n_output, 1, bias=False),
449
+ nn.BatchNorm1d(self.n_output),
450
+ nn.Dropout(dropout, inplace=True),
451
+ nn.PReLU(),
452
+ )
453
+
454
+ self.fconv = nn.Sequential(nn.Conv2d(1, dims, 1, bias=False),
455
+ nn.BatchNorm2d(dims),
456
+ nn.PReLU(),
457
+ nn.Conv2d(dims, dims, 1, bias=False),
458
+ nn.BatchNorm2d(dims),
459
+ nn.PReLU(),
460
+ )
461
+ self.SE = SE.SELayer2d(self.n_output, reduction=reduction)
462
+
463
+ def forward(self, x):
464
+ b, _, seq, joint_dim = x.shape
465
+ y1 = self.context_conv1(x).max(-1)[0].max(-1)[0]
466
+ y2 = self.context_conv2(x).view(b, -1, joint_dim).max(-1)[0]
467
+ ym = self.context_conv3(x).mean((2, 3))
468
+ y = torch.cat((self.map1(y1), self.map2(y2), self.map3(ym)), dim=1)
469
+ self.joints = self.fmap_s(y)
470
+ self.displacements = self.fmap_t(y) # .cumsum(1)
471
+ self.seq_joints = torch.bmm(self.displacements.unsqueeze(2), self.joints.unsqueeze(1))
472
+ self.seq_joints_n = self.norm_map(self.seq_joints)
473
+ self.seq_joints_dims = self.fconv(self.seq_joints_n.view(b, 1, self.n_output, self.n_joints))
474
+ o = self.SE(self.seq_joints_dims.permute(0, 2, 3, 1))
475
+ return o
476
+
477
+
478
+ class CISTGCN(nn.Module):
479
+ """
480
+ Shape:
481
+ - Input[0]: Input sequence in :math:`(N, in_ch,T_in, V)` format
482
+ - Output[0]: Output sequence in :math:`(N,T_out,in_ch, V)` format
483
+ where
484
+ :math:`N` is a batch size,
485
+ :math:`T_{in}/T_{out}` is a length of input/output sequence,
486
+ :math:`V` is the number of graph nodes.
487
+ :in_ch=number of channels for the coordiantes(default=3)
488
+ +
489
+ """
490
+
491
+ def __init__(self, arch, learn):
492
+ super(CISTGCN, self).__init__()
493
+ self.clipping = arch.model_params.clipping
494
+
495
+ self.n_input = arch.model_params.input_n
496
+ self.n_output = arch.model_params.output_n
497
+ self.n_joints = arch.model_params.joints
498
+ self.n_txcnn_layers = arch.model_params.n_txcnn_layers
499
+ self.txc_kernel_size = [arch.model_params.txc_kernel_size] * 2
500
+ self.input_gcn = arch.model_params.input_gcn
501
+ self.output_gcn = arch.model_params.output_gcn
502
+ self.reduction = arch.model_params.reduction
503
+ self.hidden_dim = arch.model_params.hidden_dim
504
+
505
+ self.st_gcnns = nn.ModuleList()
506
+ self.txcnns = nn.ModuleList()
507
+ self.se = nn.ModuleList()
508
+
509
+ self.in_conv = nn.ModuleList()
510
+ self.context_layer = nn.ModuleList()
511
+ self.trans = nn.ModuleList()
512
+ self.in_ch = 10
513
+ self.model_tx = self.input_gcn.model_complexity.copy()
514
+ self.model_tx.insert(0, 1) # add 1 in the position 0.
515
+
516
+ self.input_gcn.model_complexity.insert(0, self.in_ch)
517
+ self.input_gcn.model_complexity.append(self.in_ch)
518
+ # self.input_gcn.interpretable.insert(0, True)
519
+ # self.input_gcn.interpretable.append(False)
520
+ for i in range(len(self.input_gcn.model_complexity) - 1):
521
+ self.st_gcnns.append(DSTD_GC(self.input_gcn.model_complexity[i],
522
+ self.input_gcn.model_complexity[i + 1],
523
+ self.input_gcn.interpretable[i],
524
+ [1, 1], 1, self.n_input, self.n_joints, self.reduction, learn.dropout))
525
+
526
+ self.context_layer = ContextLayer(1, self.hidden_dim,
527
+ self.n_output, self.n_output, self.n_joints,
528
+ 3, self.reduction, learn.dropout
529
+ )
530
+
531
+ # at this point, we must permute the dimensions of the gcn network, from (N,C,T,V) into (N,T,C,V)
532
+ # with kernel_size[3,3] the dimensions of C,V will be maintained
533
+ self.txcnns.append(FPN(self.n_input, self.n_output, self.txc_kernel_size, 0., self.reduction))
534
+ for i in range(1, self.n_txcnn_layers):
535
+ self.txcnns.append(FPN(self.n_output, self.n_output, self.txc_kernel_size, 0., self.reduction))
536
+
537
+ self.prelus = nn.ModuleList()
538
+ for j in range(self.n_txcnn_layers):
539
+ self.prelus.append(nn.PReLU())
540
+
541
+ self.dim_conversor = nn.Sequential(nn.Conv2d(self.in_ch, 3, 1, bias=False),
542
+ nn.BatchNorm2d(3),
543
+ nn.PReLU(),
544
+ nn.Conv2d(3, 3, 1, bias=False),
545
+ nn.PReLU(3), )
546
+
547
+ self.st_gcnns_o = nn.ModuleList()
548
+ self.output_gcn.model_complexity.insert(0, 3)
549
+ for i in range(len(self.output_gcn.model_complexity) - 1):
550
+ self.st_gcnns_o.append(DSTD_GC(self.output_gcn.model_complexity[i],
551
+ self.output_gcn.model_complexity[i + 1],
552
+ self.output_gcn.interpretable[i],
553
+ [1, 1], 1, self.n_joints, self.n_output, self.reduction, learn.dropout))
554
+
555
+ self.st_gcnns_o.apply(self._init_weights)
556
+ self.st_gcnns.apply(self._init_weights)
557
+ self.txcnns.apply(self._init_weights)
558
+
559
+ def _init_weights(self, m, gain=0.1):
560
+ if isinstance(m, nn.Linear):
561
+ torch.nn.init.xavier_uniform_(m.weight, gain=gain)
562
+ # if isinstance(m, (nn.Conv2d, nn.Conv1d)):
563
+ # torch.nn.init.xavier_normal_(m.weight, gain=gain)
564
+ if isinstance(m, nn.PReLU):
565
+ torch.nn.init.constant_(m.weight, 0.25)
566
+
567
+ def forward(self, x):
568
+ b, seq, joints, dim = x.shape
569
+ vel = torch.zeros_like(x)
570
+ vel[:, :-1] = torch.diff(x, dim=1)
571
+ vel[:, -1] = x[:, -1]
572
+ acc = torch.zeros_like(x)
573
+ acc[:, :-1] = torch.diff(vel, dim=1)
574
+ acc[:, -1] = vel[:, -1]
575
+ x1 = torch.cat((x, acc, vel, torch.norm(vel, dim=-1, keepdim=True)), dim=-1)
576
+ x2 = x1.permute((0, 3, 1, 2)) # (torch.Size([64, 10, 22, 7])
577
+ x3 = x2
578
+
579
+ for i in range(len(self.st_gcnns)):
580
+ x3 = self.st_gcnns[i](x3)
581
+
582
+ x5 = x3.permute(0, 2, 1, 3) # prepare the input for the Time-Extrapolator-CNN (NCTV->NTCV)
583
+
584
+ x6 = self.prelus[0](self.txcnns[0](x5))
585
+ for i in range(1, self.n_txcnn_layers):
586
+ x6 = self.prelus[i](self.txcnns[i](x6)) + x6 # residual connection
587
+
588
+ x6 = self.dim_conversor(x6.permute(0, 2, 1, 3)).permute(0, 2, 3, 1)
589
+ x7 = x6.cumsum(1)
590
+
591
+ act = self.context_layer(x7.reshape(b, 1, self.n_output, joints * x7.shape[-1]))
592
+ x8 = x7.permute(0, 3, 2, 1)
593
+ for i in range(len(self.st_gcnns_o)):
594
+ x8 = self.st_gcnns_o[i](x8)
595
+ x9 = x8.permute(0, 3, 2, 1) + act
596
+
597
+ return x[:, -1:] + x9,
h36m_detailed/short-400ms/32/files/short-STSGCN-20230105_1400-id6760_best.pth.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565aa3f07715a52021a481065af53bf6b6f2e438a1fb8ea1cc5ea3ed0ccbd715
3
+ size 6026705