Yang2001 commited on
Commit
06f810c
·
1 Parent(s): 82350f5

feat: support manual FOV input (deg/rad) with auto MoGe-2 fallback

Browse files
Files changed (5) hide show
  1. app.py +24 -5
  2. app_bak.py +5 -10
  3. app_local.py +18 -5
  4. index.html +59 -1
  5. index_bak.html +63 -3
app.py CHANGED
@@ -368,6 +368,8 @@ def generate_3d(
368
  tex_slat_guidance_rescale: float = 0.0,
369
  tex_slat_sampling_steps: int = 12,
370
  tex_slat_rescale_t: float = 3.0,
 
 
371
  session_id: str = "",
372
  ) -> Dict:
373
  init_models()
@@ -383,11 +385,28 @@ def generate_3d(
383
  temp_processed_path = os.path.join(TMP_DIR, f"temp_proc_{session_id[:8]}_{int(time.time()*1000)}.png")
384
  image_preprocessed.save(temp_processed_path)
385
 
386
- camera_params = get_camera_params_wild_moge(
387
- temp_processed_path, device="cuda",
388
- mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
389
- image_resolution=WILD_IMAGE_RESOLUTION,
390
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
391
  _update_progress("Preprocessing & Camera Estimation", 1, 1)
392
 
393
  ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
 
368
  tex_slat_guidance_rescale: float = 0.0,
369
  tex_slat_sampling_steps: int = 12,
370
  tex_slat_rescale_t: float = 3.0,
371
+ manual_fov: float = -1.0,
372
+ fov_unit: str = "deg",
373
  session_id: str = "",
374
  ) -> Dict:
375
  init_models()
 
385
  temp_processed_path = os.path.join(TMP_DIR, f"temp_proc_{session_id[:8]}_{int(time.time()*1000)}.png")
386
  image_preprocessed.save(temp_processed_path)
387
 
388
+ if manual_fov > 0:
389
+ # Convert to radians based on unit
390
+ if fov_unit == "rad":
391
+ camera_angle_x = float(manual_fov)
392
+ fov_deg = math.degrees(manual_fov)
393
+ else:
394
+ camera_angle_x = math.radians(manual_fov)
395
+ fov_deg = float(manual_fov)
396
+ grid_point = torch.tensor([-1.0, 0.0, 0.0])
397
+ distance = distance_from_fov(
398
+ camera_angle_x, grid_point,
399
+ torch.tensor([0 - WILD_EXTEND_PIXEL, WILD_IMAGE_RESOLUTION - 1 + WILD_EXTEND_PIXEL]),
400
+ WILD_MESH_SCALE, WILD_IMAGE_RESOLUTION
401
+ )["distance_from_x"]
402
+ camera_params = {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': WILD_MESH_SCALE}
403
+ print(f"[Camera] Using manual FOV: {fov_deg:.2f}° ({camera_angle_x:.4f} rad), distance: {distance:.4f}")
404
+ else:
405
+ camera_params = get_camera_params_wild_moge(
406
+ temp_processed_path, device="cuda",
407
+ mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
408
+ image_resolution=WILD_IMAGE_RESOLUTION,
409
+ )
410
  _update_progress("Preprocessing & Camera Estimation", 1, 1)
411
 
412
  ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
app_bak.py CHANGED
@@ -256,28 +256,23 @@ from fastapi import Request
256
  PROGRESS_DIR = os.path.join(TMP_DIR, '_progress')
257
  os.makedirs(PROGRESS_DIR, exist_ok=True)
258
 
259
- _active_session_id = ""
260
- _active_session_lock = threading.Lock()
261
 
262
  def _progress_file(session_id: str) -> str:
263
  """Return path to a session's progress JSON file."""
264
  return os.path.join(PROGRESS_DIR, f"{session_id}.json")
265
 
266
  def _reset_progress(session_id: str):
267
- global _active_session_id
268
- with _active_session_lock:
269
- _active_session_id = session_id
270
  _write_progress_file(session_id, {"stage": "Initializing...", "step": 0, "total": 0, "done": False})
271
 
272
  def _update_progress(stage: str, step: int, total: int):
273
- with _active_session_lock:
274
- session_id = _active_session_id
275
  if session_id:
276
  _write_progress_file(session_id, {"stage": stage, "step": step, "total": total, "done": False})
277
 
278
  def _finish_progress():
279
- with _active_session_lock:
280
- session_id = _active_session_id
281
  if session_id:
282
  _write_progress_file(session_id, {"done": True})
283
 
@@ -495,4 +490,4 @@ if __name__ == "__main__":
495
  # Pre-initialize models before launching the server
496
  init_models()
497
 
498
- app.launch(show_error=True, share=True,server_port=8123)
 
256
  PROGRESS_DIR = os.path.join(TMP_DIR, '_progress')
257
  os.makedirs(PROGRESS_DIR, exist_ok=True)
258
 
259
+ _thread_local = threading.local()
 
260
 
261
  def _progress_file(session_id: str) -> str:
262
  """Return path to a session's progress JSON file."""
263
  return os.path.join(PROGRESS_DIR, f"{session_id}.json")
264
 
265
  def _reset_progress(session_id: str):
266
+ _thread_local.active_session = session_id
 
 
267
  _write_progress_file(session_id, {"stage": "Initializing...", "step": 0, "total": 0, "done": False})
268
 
269
  def _update_progress(stage: str, step: int, total: int):
270
+ session_id = getattr(_thread_local, 'active_session', '')
 
271
  if session_id:
272
  _write_progress_file(session_id, {"stage": stage, "step": step, "total": total, "done": False})
273
 
274
  def _finish_progress():
275
+ session_id = getattr(_thread_local, 'active_session', '')
 
276
  if session_id:
277
  _write_progress_file(session_id, {"done": True})
278
 
 
490
  # Pre-initialize models before launching the server
491
  init_models()
492
 
493
+ app.launch(show_error=True, share=True)
app_local.py CHANGED
@@ -470,6 +470,7 @@ def generate_3d(
470
  tex_slat_guidance_rescale: float = 0.0,
471
  tex_slat_sampling_steps: int = 12,
472
  tex_slat_rescale_t: float = 3.0,
 
473
  session_id: str = "",
474
  ) -> Dict:
475
  with acquire_inference(session_id):
@@ -486,11 +487,23 @@ def generate_3d(
486
  temp_processed_path = os.path.join(TMP_DIR, f"temp_proc_{session_id[:8]}_{int(time.time()*1000)}.png")
487
  image_preprocessed.save(temp_processed_path)
488
 
489
- camera_params = get_camera_params_wild_moge(
490
- temp_processed_path, device="cuda",
491
- mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
492
- image_resolution=WILD_IMAGE_RESOLUTION,
493
- )
 
 
 
 
 
 
 
 
 
 
 
 
494
  _update_progress("Preprocessing & Camera Estimation", 1, 1)
495
 
496
  ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
 
470
  tex_slat_guidance_rescale: float = 0.0,
471
  tex_slat_sampling_steps: int = 12,
472
  tex_slat_rescale_t: float = 3.0,
473
+ manual_fov: float = -1.0,
474
  session_id: str = "",
475
  ) -> Dict:
476
  with acquire_inference(session_id):
 
487
  temp_processed_path = os.path.join(TMP_DIR, f"temp_proc_{session_id[:8]}_{int(time.time()*1000)}.png")
488
  image_preprocessed.save(temp_processed_path)
489
 
490
+ if manual_fov > 0:
491
+ # Use manually specified FOV (in degrees), convert to radians
492
+ camera_angle_x = math.radians(manual_fov)
493
+ grid_point = torch.tensor([-1.0, 0.0, 0.0])
494
+ distance = distance_from_fov(
495
+ camera_angle_x, grid_point,
496
+ torch.tensor([0 - WILD_EXTEND_PIXEL, WILD_IMAGE_RESOLUTION - 1 + WILD_EXTEND_PIXEL]),
497
+ WILD_MESH_SCALE, WILD_IMAGE_RESOLUTION
498
+ )["distance_from_x"]
499
+ camera_params = {'camera_angle_x': camera_angle_x, 'distance': distance, 'mesh_scale': WILD_MESH_SCALE}
500
+ print(f"[Camera] Using manual FOV: {manual_fov}° ({camera_angle_x:.4f} rad), distance: {distance:.4f}")
501
+ else:
502
+ camera_params = get_camera_params_wild_moge(
503
+ temp_processed_path, device="cuda",
504
+ mesh_scale=WILD_MESH_SCALE, extend_pixel=WILD_EXTEND_PIXEL,
505
+ image_resolution=WILD_IMAGE_RESOLUTION,
506
+ )
507
  _update_progress("Preprocessing & Camera Estimation", 1, 1)
508
 
509
  ss_sampler_override = {"steps": ss_sampling_steps, "guidance_strength": ss_guidance_strength,
index.html CHANGED
@@ -665,7 +665,9 @@
665
  3. Download the generated GLB file.
666
  </p>
667
  <p style="font-size: 0.72rem; color: var(--text-dim); line-height: 1.5; margin-top: 0.5rem; opacity: 0.7;">
668
- Note: Camera estimated automatically via MoGe-2.
 
 
669
  </p>
670
  <a href="https://ldyang694.github.io/projects/pixal3d/" target="_blank" class="btn btn-outline" style="margin-top: 1rem; padding: 0.6rem 1rem; font-size: 0.85rem;">
671
  <i data-lucide="globe" style="width: 16px;"></i>
@@ -692,6 +694,25 @@
692
  </button>
693
  </div>
694
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
695
  </div>
696
  </div>
697
 
@@ -995,6 +1016,38 @@
995
  updateFrame();
996
  };
997
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
998
  // Mode Grid
999
  const grid = document.getElementById('mode-grid');
1000
  MODES.forEach(m => {
@@ -1090,6 +1143,11 @@
1090
  ss_guidance_strength: parseFloat(document.getElementById('ss_gs').value),
1091
  ss_sampling_steps: parseInt(document.getElementById('ss_steps').value),
1092
  shape_slat_guidance_strength: parseFloat(document.getElementById('shape_gs').value),
 
 
 
 
 
1093
  session_id: sessionId
1094
  };
1095
 
 
665
  3. Download the generated GLB file.
666
  </p>
667
  <p style="font-size: 0.72rem; color: var(--text-dim); line-height: 1.5; margin-top: 0.5rem; opacity: 0.7;">
668
+ Note: FOV is auto-estimated via MoGe-2.<br>
669
+ If distortion occurs, try manual FOV.<br>
670
+ Default <b style="color: var(--primary);">0.2 rad</b> generally works well.
671
  </p>
672
  <a href="https://ldyang694.github.io/projects/pixal3d/" target="_blank" class="btn btn-outline" style="margin-top: 1rem; padding: 0.6rem 1rem; font-size: 0.85rem;">
673
  <i data-lucide="globe" style="width: 16px;"></i>
 
694
  </button>
695
  </div>
696
  </div>
697
+ <div class="input-wrapper">
698
+ <label>
699
+ Camera FOV
700
+ <span style="display: flex; align-items: center; gap: 0.35rem;">
701
+ <input type="checkbox" id="fov-auto" checked style="accent-color: var(--primary); width: 14px; height: 14px; cursor: pointer;">
702
+ <span style="font-size: 0.75rem; color: var(--text-dim); cursor: pointer;" onclick="document.getElementById('fov-auto').click()">Auto</span>
703
+ </span>
704
+ </label>
705
+ <div style="display: flex; gap: 0.5rem;">
706
+ <input type="number" id="manual-fov" value="0.2" min="0.0175" max="2.9671" step="0.01" disabled style="opacity: 0.4; flex: 1;">
707
+ <select id="fov-unit" disabled style="opacity: 0.4; width: 70px; padding: 0.5rem;">
708
+ <option value="deg">deg</option>
709
+ <option value="rad" selected>rad</option>
710
+ </select>
711
+ </div>
712
+ <p id="fov-hint" style="font-size: 0.65rem; color: var(--text-dim); margin-top: 0.15rem;">
713
+ Manual FOV in radians (0.02–2.97 rad)
714
+ </p>
715
+ </div>
716
  </div>
717
  </div>
718
 
 
1016
  updateFrame();
1017
  };
1018
 
1019
+ // FOV auto toggle & unit switch
1020
+ const fovAutoCheck = document.getElementById('fov-auto');
1021
+ const fovInput = document.getElementById('manual-fov');
1022
+ const fovUnit = document.getElementById('fov-unit');
1023
+ const fovHint = document.getElementById('fov-hint');
1024
+
1025
+ function updateFovEnabled() {
1026
+ const manual = !fovAutoCheck.checked;
1027
+ fovInput.disabled = !manual;
1028
+ fovUnit.disabled = !manual;
1029
+ fovInput.style.opacity = manual ? '1' : '0.4';
1030
+ fovUnit.style.opacity = manual ? '1' : '0.4';
1031
+ }
1032
+
1033
+ fovAutoCheck.onchange = updateFovEnabled;
1034
+
1035
+ fovUnit.onchange = () => {
1036
+ const val = parseFloat(fovInput.value);
1037
+ if (isNaN(val)) return;
1038
+ if (fovUnit.value === 'rad') {
1039
+ // deg -> rad
1040
+ fovInput.value = (val * Math.PI / 180).toFixed(4);
1041
+ fovInput.min = '0.0175'; fovInput.max = '2.9671'; fovInput.step = '0.01';
1042
+ fovHint.textContent = 'Manual FOV in radians (0.02–2.97 rad)';
1043
+ } else {
1044
+ // rad -> deg
1045
+ fovInput.value = (val * 180 / Math.PI).toFixed(1);
1046
+ fovInput.min = '1'; fovInput.max = '170'; fovInput.step = '0.5';
1047
+ fovHint.textContent = 'Uncheck "Auto" to manually set FOV (1°–170°)';
1048
+ }
1049
+ };
1050
+
1051
  // Mode Grid
1052
  const grid = document.getElementById('mode-grid');
1053
  MODES.forEach(m => {
 
1143
  ss_guidance_strength: parseFloat(document.getElementById('ss_gs').value),
1144
  ss_sampling_steps: parseInt(document.getElementById('ss_steps').value),
1145
  shape_slat_guidance_strength: parseFloat(document.getElementById('shape_gs').value),
1146
+ manual_fov: (() => {
1147
+ if (document.getElementById('fov-auto').checked) return -1.0;
1148
+ const v = parseFloat(document.getElementById('manual-fov').value);
1149
+ return document.getElementById('fov-unit').value === 'rad' ? v * 180 / Math.PI : v;
1150
+ })(),
1151
  session_id: sessionId
1152
  };
1153
 
index_bak.html CHANGED
@@ -627,9 +627,6 @@
627
  2. Click Extract GLB to export.<br>
628
  3. Download the generated GLB file.
629
  </p>
630
- <p style="font-size: 0.72rem; color: var(--text-dim); line-height: 1.5; margin-top: 0.5rem; opacity: 0.7;">
631
- Note: Camera estimated automatically via MoGe-2.
632
- </p>
633
  <a href="https://ldyang694.github.io/projects/pixal3d/" target="_blank" class="btn btn-outline" style="margin-top: 1rem; padding: 0.6rem 1rem; font-size: 0.85rem;">
634
  <i data-lucide="globe" style="width: 16px;"></i>
635
  Project Page
@@ -665,6 +662,32 @@
665
  </button>
666
  </div>
667
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
  </div>
669
  </div>
670
 
@@ -944,6 +967,38 @@
944
  updateFrame();
945
  };
946
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947
  // Mode Grid
948
  const grid = document.getElementById('mode-grid');
949
  MODES.forEach(m => {
@@ -1037,6 +1092,11 @@
1037
  ss_guidance_strength: parseFloat(document.getElementById('ss_gs').value),
1038
  ss_sampling_steps: parseInt(document.getElementById('ss_steps').value),
1039
  shape_slat_guidance_strength: parseFloat(document.getElementById('shape_gs').value),
 
 
 
 
 
1040
  session_id: sessionId
1041
  };
1042
 
 
627
  2. Click Extract GLB to export.<br>
628
  3. Download the generated GLB file.
629
  </p>
 
 
 
630
  <a href="https://ldyang694.github.io/projects/pixal3d/" target="_blank" class="btn btn-outline" style="margin-top: 1rem; padding: 0.6rem 1rem; font-size: 0.85rem;">
631
  <i data-lucide="globe" style="width: 16px;"></i>
632
  Project Page
 
662
  </button>
663
  </div>
664
  </div>
665
+ <div class="input-wrapper">
666
+ <p style="font-size: 0.72rem; color: var(--text-dim); line-height: 1.5; opacity: 0.7;">
667
+ Note: FOV is auto-estimated via MoGe-2.<br>
668
+ If distortion occurs, try manual FOV.<br>
669
+ Default <b style="color: var(--primary);">0.2 rad</b> generally works well.
670
+ </p>
671
+ </div>
672
+ <div class="input-wrapper">
673
+ <label>
674
+ Camera FOV
675
+ <span style="display: flex; align-items: center; gap: 0.35rem;">
676
+ <input type="checkbox" id="fov-auto" checked style="accent-color: var(--primary); width: 14px; height: 14px; cursor: pointer;">
677
+ <span style="font-size: 0.75rem; color: var(--text-dim); cursor: pointer;" onclick="document.getElementById('fov-auto').click()">Auto</span>
678
+ </span>
679
+ </label>
680
+ <div style="display: flex; gap: 0.5rem;">
681
+ <input type="number" id="manual-fov" value="0.2" min="0.0175" max="2.9671" step="0.01" disabled style="opacity: 0.4; flex: 1;">
682
+ <select id="fov-unit" disabled style="opacity: 0.4; width: 70px; padding: 0.5rem;">
683
+ <option value="deg">deg</option>
684
+ <option value="rad" selected>rad</option>
685
+ </select>
686
+ </div>
687
+ <p id="fov-hint" style="font-size: 0.65rem; color: var(--text-dim); margin-top: 0.15rem;">
688
+ Manual FOV in radians (0.02–2.97 rad)
689
+ </p>
690
+ </div>
691
  </div>
692
  </div>
693
 
 
967
  updateFrame();
968
  };
969
 
970
+ // FOV auto toggle & unit switch
971
+ const fovAutoCheck = document.getElementById('fov-auto');
972
+ const fovInput = document.getElementById('manual-fov');
973
+ const fovUnit = document.getElementById('fov-unit');
974
+ const fovHint = document.getElementById('fov-hint');
975
+
976
+ function updateFovEnabled() {
977
+ const manual = !fovAutoCheck.checked;
978
+ fovInput.disabled = !manual;
979
+ fovUnit.disabled = !manual;
980
+ fovInput.style.opacity = manual ? '1' : '0.4';
981
+ fovUnit.style.opacity = manual ? '1' : '0.4';
982
+ }
983
+
984
+ fovAutoCheck.onchange = updateFovEnabled;
985
+
986
+ fovUnit.onchange = () => {
987
+ const val = parseFloat(fovInput.value);
988
+ if (isNaN(val)) return;
989
+ if (fovUnit.value === 'rad') {
990
+ // deg -> rad
991
+ fovInput.value = (val * Math.PI / 180).toFixed(4);
992
+ fovInput.min = '0.0175'; fovInput.max = '2.9671'; fovInput.step = '0.01';
993
+ fovHint.textContent = 'Manual FOV in radians (0.02–2.97 rad)';
994
+ } else {
995
+ // rad -> deg
996
+ fovInput.value = (val * 180 / Math.PI).toFixed(1);
997
+ fovInput.min = '1'; fovInput.max = '170'; fovInput.step = '0.5';
998
+ fovHint.textContent = 'Uncheck "Auto" to manually set FOV (1°–170°)';
999
+ }
1000
+ };
1001
+
1002
  // Mode Grid
1003
  const grid = document.getElementById('mode-grid');
1004
  MODES.forEach(m => {
 
1092
  ss_guidance_strength: parseFloat(document.getElementById('ss_gs').value),
1093
  ss_sampling_steps: parseInt(document.getElementById('ss_steps').value),
1094
  shape_slat_guidance_strength: parseFloat(document.getElementById('shape_gs').value),
1095
+ manual_fov: (() => {
1096
+ if (document.getElementById('fov-auto').checked) return -1.0;
1097
+ const v = parseFloat(document.getElementById('manual-fov').value);
1098
+ return document.getElementById('fov-unit').value === 'rad' ? v * 180 / Math.PI : v;
1099
+ })(),
1100
  session_id: sessionId
1101
  };
1102