Upload 8 files
Browse files- ViT-L-14.pt +3 -0
- diffusion_config.yaml +3 -0
- image_cond.pt +3 -0
- image_cond_config.yaml +13 -0
- text_cond.pt +3 -0
- text_cond_config.yaml +14 -0
- transmitter.pt +3 -0
- transmitter_config.yaml +80 -0
ViT-L-14.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b8cca3fd41ae0c99ba7e8951adf17d267cdb84cd88be6f7c2e0eca1737a03836
|
3 |
+
size 932768134
|
diffusion_config.yaml
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
mean_type: x_start
|
2 |
+
schedule: exp
|
3 |
+
timesteps: 1024
|
image_cond.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cb8072c64bbbcf6910488814d212227de5db291780d4ea99c6152f9346cf12aa
|
3 |
+
size 1263925407
|
image_cond_config.yaml
ADDED
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
d_latent: 1048576
|
2 |
+
inner:
|
3 |
+
cond_drop_prob: 0.1
|
4 |
+
heads: 8
|
5 |
+
init_scale: 0.25
|
6 |
+
layers: 24
|
7 |
+
name: CLIPImageGridPointDiffusionTransformer
|
8 |
+
pos_emb_init_scale: 0.05
|
9 |
+
time_token_cond: true
|
10 |
+
use_pos_emb: true
|
11 |
+
width: 1024
|
12 |
+
latent_ctx: 1024
|
13 |
+
name: SplitVectorDiffusion
|
text_cond.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6b4fa599a7b3c3b16c222d5f5fe56f9db9289ff0b6575fbe5c11bc97106aad4
|
3 |
+
size 1262868003
|
text_cond_config.yaml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
d_latent: 1048576
|
2 |
+
inner:
|
3 |
+
cond_drop_prob: 0.1
|
4 |
+
heads: 16
|
5 |
+
init_scale: 0.25
|
6 |
+
layers: 24
|
7 |
+
name: CLIPImagePointDiffusionTransformer
|
8 |
+
pos_emb_init_scale: 0.05
|
9 |
+
time_token_cond: true
|
10 |
+
token_cond: true
|
11 |
+
use_pos_emb: true
|
12 |
+
width: 1024
|
13 |
+
latent_ctx: 1024
|
14 |
+
name: SplitVectorDiffusion
|
transmitter.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:af02a0b85a8abdfb3919584b63c540ba175f6ad4790f574a7fef4617e5acdc3b
|
3 |
+
size 1776048343
|
transmitter_config.yaml
ADDED
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
encoder:
|
2 |
+
cross_attention_dataset: pcl_and_multiview_pcl
|
3 |
+
d_latent: 1024
|
4 |
+
data_ctx: 1024
|
5 |
+
fps_method: first
|
6 |
+
heads: 8
|
7 |
+
init_scale: 0.25
|
8 |
+
inner_batch_size:
|
9 |
+
- 16384
|
10 |
+
- 20
|
11 |
+
input_channels: 6
|
12 |
+
latent_bottleneck:
|
13 |
+
diffusion:
|
14 |
+
schedule: inv_parabola
|
15 |
+
schedule_args:
|
16 |
+
power: 5.0
|
17 |
+
timesteps: 1024
|
18 |
+
diffusion_prob: 0.1
|
19 |
+
name: clamp_diffusion_noise
|
20 |
+
layers: 12
|
21 |
+
max_depth: 9.0
|
22 |
+
max_unrolls: 1
|
23 |
+
min_unrolls: 1
|
24 |
+
name: PointCloudPerceiverChannelsEncoder
|
25 |
+
params_proj:
|
26 |
+
init_scale: 1.0
|
27 |
+
learned_scale: 0.0625
|
28 |
+
name: channels
|
29 |
+
use_ln: true
|
30 |
+
patch_size: 8
|
31 |
+
pointconv_hidden:
|
32 |
+
- 1024
|
33 |
+
- 1024
|
34 |
+
pointconv_padding_mode: circular
|
35 |
+
pointconv_patch_size: 8
|
36 |
+
pointconv_samples: 64
|
37 |
+
pointconv_stride: 4
|
38 |
+
pos_emb: nerf
|
39 |
+
use_depth: true
|
40 |
+
use_pointconv: true
|
41 |
+
width: 1024
|
42 |
+
name: Transmitter
|
43 |
+
renderer:
|
44 |
+
grid_size: 128
|
45 |
+
n_coarse_samples: 64
|
46 |
+
n_fine_samples: 128
|
47 |
+
name: NeRSTFRenderer
|
48 |
+
nerstf:
|
49 |
+
activation: swish
|
50 |
+
d_hidden: 256
|
51 |
+
density_activation: relu
|
52 |
+
init_scale: 0.25
|
53 |
+
initial_density_bias: 0.1
|
54 |
+
insert_direction_at: 4
|
55 |
+
meta_bias: false
|
56 |
+
meta_parameters: true
|
57 |
+
n_hidden_layers: 6
|
58 |
+
n_meta_layers: 4
|
59 |
+
name: MLPNeRSTFModel
|
60 |
+
posenc_version: nerf
|
61 |
+
separate_coarse_channels: true
|
62 |
+
separate_nerf_channels: true
|
63 |
+
trainable_meta: false
|
64 |
+
separate_shared_samples: true
|
65 |
+
void:
|
66 |
+
background:
|
67 |
+
- 0
|
68 |
+
- 0
|
69 |
+
- 0
|
70 |
+
name: VoidNeRFModel
|
71 |
+
volume:
|
72 |
+
bbox_max:
|
73 |
+
- 1.0
|
74 |
+
- 1.0
|
75 |
+
- 1.0
|
76 |
+
bbox_min:
|
77 |
+
- -1.0
|
78 |
+
- -1.0
|
79 |
+
- -1.0
|
80 |
+
name: BoundingBoxVolume
|