xizaoqu
commited on
Commit
·
0cb2a53
1
Parent(s):
3b61a0b
update
Browse files- app.py +18 -7
- configurations/huggingface.yaml +1 -1
app.py
CHANGED
@@ -241,7 +241,7 @@ def set_memory_length(memory_length, sampling_memory_length_state):
|
|
241 |
|
242 |
def generate(keys):
|
243 |
# print("algo frame:", len(worldmem.frames))
|
244 |
-
|
245 |
global input_history
|
246 |
global memory_frames
|
247 |
global memory_curr_frame
|
@@ -251,8 +251,19 @@ def generate(keys):
|
|
251 |
global self_memory_c2w
|
252 |
global self_frame_idx
|
253 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = run_interactive(memory_frames[0],
|
255 |
-
|
256 |
None,
|
257 |
device=device,
|
258 |
self_frames=self_frames,
|
@@ -422,12 +433,12 @@ with gr.Blocks(css=css) as demo:
|
|
422 |
- D: turn right
|
423 |
- Q: move forward
|
424 |
- E: move backward
|
425 |
-
- N: no-op (do nothing)
|
426 |
-
- 1: switch to hotbar 1
|
427 |
- U: use item
|
428 |
-
5. Higher denoising steps produce more detailed results but take longer.
|
429 |
-
6.
|
430 |
-
7.
|
|
|
431 |
"""
|
432 |
)
|
433 |
# input_box.submit(update_image_and_log, inputs=[input_box], outputs=[image_display, video_display, log_output])
|
|
|
241 |
|
242 |
def generate(keys):
|
243 |
# print("algo frame:", len(worldmem.frames))
|
244 |
+
input_actions = parse_input_to_tensor(keys)
|
245 |
global input_history
|
246 |
global memory_frames
|
247 |
global memory_curr_frame
|
|
|
251 |
global self_memory_c2w
|
252 |
global self_frame_idx
|
253 |
|
254 |
+
if self_frames is None:
|
255 |
+
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = run_interactive(memory_frames[0],
|
256 |
+
actions[0],
|
257 |
+
poses[0],
|
258 |
+
device=device,
|
259 |
+
self_frames=self_frames,
|
260 |
+
self_actions=self_actions,
|
261 |
+
self_poses=self_poses,
|
262 |
+
self_memory_c2w=self_memory_c2w,
|
263 |
+
self_frame_idx=self_frame_idx)
|
264 |
+
|
265 |
new_frame, self_frames, self_actions, self_poses, self_memory_c2w, self_frame_idx = run_interactive(memory_frames[0],
|
266 |
+
input_actions,
|
267 |
None,
|
268 |
device=device,
|
269 |
self_frames=self_frames,
|
|
|
433 |
- D: turn right
|
434 |
- Q: move forward
|
435 |
- E: move backward
|
436 |
+
- N: no-op (do nothing)
|
|
|
437 |
- U: use item
|
438 |
+
5. Higher denoising steps produce more detailed results but take longer. 20 steps is a good balance between quality and speed. The same applies to context and memory length.
|
439 |
+
6. For faster performance, we recommend running the demo locally (~1s/frame on H100 vs ~5s on Spaces).
|
440 |
+
7. If you find this project interesting or useful, please consider giving it a ⭐️ on [GitHub]()!
|
441 |
+
8. For feedback or suggestions, feel free to open a GitHub issue or contact me directly at **zeqixiao1@gmail.com**.
|
442 |
"""
|
443 |
)
|
444 |
# input_box.submit(update_image_and_log, inputs=[input_box], outputs=[image_display, video_display, log_output])
|
configurations/huggingface.yaml
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
n_tokens:
|
2 |
pose_cond_dim: 5
|
3 |
use_plucker: true
|
4 |
focal_length: 0.35
|
|
|
1 |
+
n_tokens: 3
|
2 |
pose_cond_dim: 5
|
3 |
use_plucker: true
|
4 |
focal_length: 0.35
|