Spaces:
Build error
Build error
add: changes
Browse files- .gitattributes +1 -0
- app.py +32 -1
.gitattributes
CHANGED
@@ -26,3 +26,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
monoscene_kitti.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
monoscene_kitti.ckpt filter=lfs diff=lfs merge=lfs -text
|
29 |
+
monoscene_nyu.ckpt filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
@@ -22,10 +22,41 @@ model = MonoScene.load_from_checkpoint(
|
|
22 |
full_scene_size=(60, 36, 60),
|
23 |
)
|
24 |
|
25 |
-
img_W, img_H = 640, 480
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
|
28 |
def predict(img):
|
|
|
29 |
img = np.array(img, dtype=np.float32, copy=False) / 255.0
|
30 |
|
31 |
normalize_rgb = transforms.Compose(
|
|
|
22 |
full_scene_size=(60, 36, 60),
|
23 |
)
|
24 |
|
|
|
25 |
|
26 |
+
def get_projections(img_W, img_H):
|
27 |
+
scale_3ds = [1, 2]
|
28 |
+
data = {}
|
29 |
+
for scale_3d in scale_3ds:
|
30 |
+
scene_size = (4.8, 4.8, 2.88)
|
31 |
+
vox_origin = np.array([-1.54591799, 0.8907361 , -0.05 ])
|
32 |
+
voxel_size = 0.08
|
33 |
+
|
34 |
+
|
35 |
+
cam_k = np.array([[518.8579, 0, 320], [0, 518.8579, 240], [0, 0, 1]])
|
36 |
+
cam_pose = np.asarray([[ 9.6699458e-01, 4.2662762e-02, 2.5120059e-01, 0.0000000e+00],
|
37 |
+
[-2.5147417e-01, 1.0867463e-03, 9.6786356e-01, 0.0000000e+00],
|
38 |
+
[ 4.1018680e-02, -9.9908894e-01, 1.1779292e-02, 1.1794727e+00],
|
39 |
+
[ 0.0000000e+00, 0.0000000e+00, 0.0000000e+00, 1.0000000e+00]])
|
40 |
+
T_velo_2_cam = np.linalg.inv(cam_pose)
|
41 |
+
|
42 |
+
# compute the 3D-2D mapping
|
43 |
+
projected_pix, fov_mask, pix_z = vox2pix(
|
44 |
+
T_velo_2_cam,
|
45 |
+
cam_k,
|
46 |
+
vox_origin,
|
47 |
+
voxel_size * scale_3d,
|
48 |
+
img_W,
|
49 |
+
img_H,
|
50 |
+
scene_size,
|
51 |
+
)
|
52 |
+
|
53 |
+
data["projected_pix_{}".format(scale_3d)] = projected_pix
|
54 |
+
data["pix_z_{}".format(scale_3d)] = pix_z
|
55 |
+
data["fov_mask_{}".format(scale_3d)] = fov_mask
|
56 |
+
return data
|
57 |
|
58 |
def predict(img):
|
59 |
+
img_W, img_H = 640, 480
|
60 |
img = np.array(img, dtype=np.float32, copy=False) / 255.0
|
61 |
|
62 |
normalize_rgb = transforms.Compose(
|