LAM

Runtime error

App Files Files Community

yuandong513 commited on Apr 3

Commit

17cd746

1 Parent(s): 13fa4fd

feat: init

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +7 -5
app.py +568 -0
app_lam.py +433 -0
app_preprocess.py +387 -0
configs/inference/lam-20k-8gpu.yaml +130 -0
configs/stylematte_config.json +2311 -0
external/human_matting/__init__.py +1 -0
external/human_matting/matting_engine.py +66 -0
external/human_matting/stylematte.py +272 -0
external/landmark_detection/FaceBoxesV2/__init__.py +2 -0
external/landmark_detection/FaceBoxesV2/detector.py +39 -0
external/landmark_detection/FaceBoxesV2/faceboxes_detector.py +97 -0
external/landmark_detection/FaceBoxesV2/utils/__init__.py +0 -0
external/landmark_detection/FaceBoxesV2/utils/box_utils.py +276 -0
external/landmark_detection/FaceBoxesV2/utils/build.py +57 -0
external/landmark_detection/FaceBoxesV2/utils/config.py +14 -0
external/landmark_detection/FaceBoxesV2/utils/faceboxes.py +239 -0
external/landmark_detection/FaceBoxesV2/utils/make.sh +3 -0
external/landmark_detection/FaceBoxesV2/utils/nms/__init__.py +0 -0
external/landmark_detection/FaceBoxesV2/utils/nms/cpu_nms.c +0 -0
external/landmark_detection/FaceBoxesV2/utils/nms/cpu_nms.py +0 -0
external/landmark_detection/FaceBoxesV2/utils/nms/cpu_nms.pyx +163 -0
external/landmark_detection/FaceBoxesV2/utils/nms/gpu_nms.hpp +2 -0
external/landmark_detection/FaceBoxesV2/utils/nms/gpu_nms.pyx +31 -0
external/landmark_detection/FaceBoxesV2/utils/nms/nms_kernel.cu +144 -0
external/landmark_detection/FaceBoxesV2/utils/nms/py_cpu_nms.py +38 -0
external/landmark_detection/FaceBoxesV2/utils/nms_wrapper.py +15 -0
external/landmark_detection/FaceBoxesV2/utils/prior_box.py +43 -0
external/landmark_detection/FaceBoxesV2/utils/timer.py +40 -0
external/landmark_detection/README.md +110 -0
external/landmark_detection/conf/__init__.py +1 -0
external/landmark_detection/conf/alignment.py +239 -0
external/landmark_detection/conf/base.py +94 -0
external/landmark_detection/config.json +15 -0
external/landmark_detection/data_processor/CheckFaceKeyPoint.py +147 -0
external/landmark_detection/data_processor/align.py +193 -0
external/landmark_detection/data_processor/process_pcd.py +250 -0
external/landmark_detection/evaluate.py +258 -0
external/landmark_detection/infer_folder.py +253 -0
external/landmark_detection/infer_image.py +251 -0
external/landmark_detection/infer_video.py +287 -0
external/landmark_detection/lib/__init__.py +9 -0
external/landmark_detection/lib/backbone/__init__.py +5 -0
external/landmark_detection/lib/backbone/core/coord_conv.py +157 -0
external/landmark_detection/lib/backbone/stackedHGNetV1.py +307 -0
external/landmark_detection/lib/dataset/__init__.py +11 -0
external/landmark_detection/lib/dataset/alignmentDataset.py +316 -0
external/landmark_detection/lib/dataset/augmentation.py +355 -0
external/landmark_detection/lib/dataset/decoder/__init__.py +8 -0
external/landmark_detection/lib/dataset/decoder/decoder_default.py +38 -0

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
 ---
-title: LAM
-emoji: 🌍
-colorFrom: green
-colorTo: pink
 sdk: gradio
-sdk_version: 5.23.3
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: LAM_test
+emoji: ⚡
+colorFrom: red
+colorTo: indigo
 sdk: gradio
+sdk_version: 5.20.1
 app_file: app.py
 pinned: false
+license: apache-2.0
+short_description: Large Avatar Model for One-shot Animatable Gaussian Head
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,568 @@

+# Copyright (c) 2024-2025, Yisheng He, Yuan Dong
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+os.system("rm -rf /data-nvme/zerogpu-offload/")
+os.system("pip install chumpy")
+# os.system("pip uninstall -y basicsr")
+os.system("pip install Cython")
+os.system("pip install ./wheels/diff_gaussian_rasterization-0.0.0-cp310-cp310-linux_x86_64.whl")
+os.system("pip install ./wheels/simple_knn-0.0.0-cp310-cp310-linux_x86_64.whl")
+os.system("pip install ./wheels/nvdiffrast-0.3.3-cp310-cp310-linux_x86_64.whl --force-reinstall")
+os.system(
+    "pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt240/download.html")
+os.system("pip install numpy==1.23.0")
+import cv2
+import sys
+import base64
+import subprocess
+import argparse
+from glob import glob
+import gradio as gr
+import numpy as np
+from PIL import Image
+from omegaconf import OmegaConf
+import torch
+import moviepy.editor as mpy
+from lam.runners.infer.head_utils import prepare_motion_seqs, preprocess_image
+from lam.utils.ffmpeg_utils import images_to_video
+import spaces
+def compile_module(subfolder, script):
+    try:
+        # Save the current working directory
+        current_dir = os.getcwd()
+        # Change directory to the subfolder
+        os.chdir(os.path.join(current_dir, subfolder))
+        # Run the compilation command
+        result = subprocess.run(
+            ["sh", script],
+            capture_output=True,
+            text=True,
+            check=True
+        )
+        # Print the compilation output
+        print("Compilation output:", result.stdout)
+    except Exception as e:
+        # Print any error that occurred
+        print(f"An error occurred: {e}")
+    finally:
+        # Ensure returning to the original directory
+        os.chdir(current_dir)
+        print("Returned to the original directory.")
+# compile flame_tracking dependence submodule
+compile_module("external/landmark_detection/FaceBoxesV2/utils/", "make.sh")
+from flame_tracking_single_image import FlameTrackingSingleImage
+def launch_pretrained():
+    from huggingface_hub import snapshot_download, hf_hub_download
+    # launch pretrained for flame tracking.
+    hf_hub_download(repo_id='yuandong513/flametracking_model',
+                    repo_type='model',
+                    filename='pretrain_model.tar',
+                    local_dir='./')
+    os.system('tar -xf pretrain_model.tar && rm pretrain_model.tar')
+    # launch human model files
+    hf_hub_download(repo_id='3DAIGC/LAM-assets',
+                    repo_type='model',
+                    filename='LAM_human_model.tar',
+                    local_dir='./')
+    os.system('tar -xf LAM_human_model.tar && rm LAM_human_model.tar')
+    # launch pretrained for LAM
+    model_dir = hf_hub_download(repo_id="3DAIGC/LAM-20K", repo_type="model", local_dir="./exps/releases/lam/lam-20k/step_045500/", filename="config.json")
+    print(model_dir)
+    model_dir = hf_hub_download(repo_id="3DAIGC/LAM-20K", repo_type="model", local_dir="./exps/releases/lam/lam-20k/step_045500/", filename="model.safetensors")
+    print(model_dir)
+    model_dir = hf_hub_download(repo_id="3DAIGC/LAM-20K", repo_type="model", local_dir="./exps/releases/lam/lam-20k/step_045500/", filename="README.md")
+    print(model_dir)
+    # launch example for LAM
+    hf_hub_download(repo_id='3DAIGC/LAM-assets',
+                    repo_type='model',
+                    filename='LAM_assets.tar',
+                    local_dir='./')
+    os.system('tar -xf LAM_assets.tar && rm LAM_assets.tar')
+    hf_hub_download(repo_id='3DAIGC/LAM-assets',
+                    repo_type='model',
+                    filename='config.json',
+                    local_dir='./tmp/')
+def launch_env_not_compile_with_cuda():
+    os.system('pip install chumpy')
+    os.system('pip install numpy==1.23.0')
+    os.system(
+        'pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt251/download.html'
+    )
+def assert_input_image(input_image):
+    if input_image is None:
+        raise gr.Error('No image selected or uploaded!')
+def prepare_working_dir():
+    import tempfile
+    working_dir = tempfile.TemporaryDirectory()
+    return working_dir
+def init_preprocessor():
+    from lam.utils.preprocess import Preprocessor
+    global preprocessor
+    preprocessor = Preprocessor()
+def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool,
+                  working_dir):
+    image_raw = os.path.join(working_dir.name, 'raw.png')
+    with Image.fromarray(image_in) as img:
+        img.save(image_raw)
+    image_out = os.path.join(working_dir.name, 'rembg.png')
+    success = preprocessor.preprocess(image_path=image_raw,
+                                      save_path=image_out,
+                                      rmbg=remove_bg,
+                                      recenter=recenter)
+    assert success, f'Failed under preprocess_fn!'
+    return image_out
+def get_image_base64(path):
+    with open(path, 'rb') as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode()
+    return f'data:image/png;base64,{encoded_string}'
+def save_imgs_2_video(imgs, v_pth, fps=30):
+    # moviepy example
+    from moviepy.editor import ImageSequenceClip, VideoFileClip
+    images = [image.astype(np.uint8) for image in imgs]
+    clip = ImageSequenceClip(images, fps=fps)
+    # final_duration = len(images) / fps
+    # clip = clip.subclip(0, final_duration)
+    clip = clip.subclip(0, len(images) / fps)
+    clip.write_videofile(v_pth, codec='libx264')
+    import cv2
+    cap = cv2.VideoCapture(v_pth)
+    nf = cap.get(cv2.CAP_PROP_FRAME_COUNT)
+    if nf != len(images):
+        print("="*100+f"\n{v_pth} moviepy saved video frame error."+"\n"+"="*100)
+    print(f"Video saved successfully at {v_pth}")
+def add_audio_to_video(video_path, out_path, audio_path, fps=30):
+    # Import necessary modules from moviepy
+    from moviepy.editor import VideoFileClip, AudioFileClip
+    # Load video file into VideoFileClip object
+    video_clip = VideoFileClip(video_path)
+    # Load audio file into AudioFileClip object
+    audio_clip = AudioFileClip(audio_path)
+    # Hard code clip audio
+    if audio_clip.duration > 10:
+        audio_clip = audio_clip.subclip(0, 10)
+    # Attach audio clip to video clip (replaces existing audio)
+    video_clip_with_audio = video_clip.set_audio(audio_clip)
+    # Export final video with audio using standard codecs
+    video_clip_with_audio.write_videofile(out_path, codec='libx264', audio_codec='aac', fps=fps)
+    print(f"Audio added successfully at {out_path}")
+def parse_configs():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str)
+    parser.add_argument("--infer", type=str)
+    args, unknown = parser.parse_known_args()
+    cfg = OmegaConf.create()
+    cli_cfg = OmegaConf.from_cli(unknown)
+    # parse from ENV
+    if os.environ.get("APP_INFER") is not None:
+        args.infer = os.environ.get("APP_INFER")
+    if os.environ.get("APP_MODEL_NAME") is not None:
+        cli_cfg.model_name = os.environ.get("APP_MODEL_NAME")
+    args.config = args.infer if args.config is None else args.config
+    if args.config is not None:
+        cfg_train = OmegaConf.load(args.config)
+        cfg.source_size = cfg_train.dataset.source_image_res
+        try:
+            cfg.src_head_size = cfg_train.dataset.src_head_size
+        except:
+            cfg.src_head_size = 112
+        cfg.render_size = cfg_train.dataset.render_image.high
+        _relative_path = os.path.join(
+            cfg_train.experiment.parent,
+            cfg_train.experiment.child,
+            os.path.basename(cli_cfg.model_name).split("_")[-1],
+        )
+        cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path)
+        cfg.image_dump = os.path.join("exps", "images", _relative_path)
+        cfg.video_dump = os.path.join("exps", "videos", _relative_path)  # output path
+    if args.infer is not None:
+        cfg_infer = OmegaConf.load(args.infer)
+        cfg.merge_with(cfg_infer)
+        cfg.setdefault(
+            "save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp")
+        )
+        cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images"))
+        cfg.setdefault(
+            "video_dump", os.path.join("dumps", cli_cfg.model_name, "videos")
+        )
+        cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes"))
+    cfg.motion_video_read_fps = 30
+    cfg.merge_with(cli_cfg)
+    cfg.setdefault("logger", "INFO")
+    assert cfg.model_name is not None, "model_name is required"
+    return cfg, cfg_train
+def demo_lam(flametracking, lam, cfg):
+    @spaces.GPU(duration=80)
+    def core_fn(image_path: str, video_params, working_dir):
+        image_raw = os.path.join(working_dir.name, "raw.png")
+        with Image.open(image_path).convert('RGB') as img:
+            img.save(image_raw)
+        base_vid = os.path.basename(video_params).split(".")[0]
+        flame_params_dir = os.path.join("./assets/sample_motion/export", base_vid, "flame_param")
+        base_iid = os.path.basename(image_path).split('.')[0]
+        image_path = os.path.join("./assets/sample_input", base_iid, "images/00000_00.png")
+        dump_video_path = os.path.join(working_dir.name, "output.mp4")
+        dump_image_path = os.path.join(working_dir.name, "output.png")
+        # prepare dump paths
+        omit_prefix = os.path.dirname(image_raw)
+        image_name = os.path.basename(image_raw)
+        uid = image_name.split(".")[0]
+        subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "")
+        subdir_path = (
+            subdir_path[1:] if subdir_path.startswith("/") else subdir_path
+        )
+        print("subdir_path and uid:", subdir_path, uid)
+        motion_seqs_dir = flame_params_dir
+        dump_image_dir = os.path.dirname(dump_image_path)
+        os.makedirs(dump_image_dir, exist_ok=True)
+        print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path)
+        dump_tmp_dir = dump_image_dir
+        if os.path.exists(dump_video_path):
+            return dump_image_path, dump_video_path
+        motion_img_need_mask = cfg.get("motion_img_need_mask", False)  # False
+        vis_motion = cfg.get("vis_motion", False)  # False
+        # preprocess input image: segmentation, flame params estimation
+        # """
+        return_code = flametracking.preprocess(image_raw)
+        assert (return_code == 0), "flametracking preprocess failed!"
+        return_code = flametracking.optimize()
+        assert (return_code == 0), "flametracking optimize failed!"
+        return_code, output_dir = flametracking.export()
+        assert (return_code == 0), "flametracking export failed!"
+        image_path = os.path.join(output_dir, "images/00000_00.png")
+        # """
+        mask_path = image_path.replace("/images/", "/fg_masks/").replace(".jpg", ".png")
+        print(image_path, mask_path)
+        aspect_standard = 1.0 / 1.0
+        source_size = cfg.source_size
+        render_size = cfg.render_size
+        render_fps = 30
+        # prepare reference image
+        image, _, _, shape_param = preprocess_image(image_path, mask_path=mask_path, intr=None, pad_ratio=0,
+                                                    bg_color=1.,
+                                                    max_tgt_size=None, aspect_standard=aspect_standard,
+                                                    enlarge_ratio=[1.0, 1.0],
+                                                    render_tgt_size=source_size, multiply=14, need_mask=True,
+                                                    get_shape_param=True)
+        # save masked image for vis
+        save_ref_img_path = os.path.join(dump_tmp_dir, "output.png")
+        vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(np.uint8)
+        Image.fromarray(vis_ref_img).save(save_ref_img_path)
+        # prepare motion seq
+        src = image_path.split('/')[-3]
+        driven = motion_seqs_dir.split('/')[-2]
+        src_driven = [src, driven]
+        motion_seq = prepare_motion_seqs(motion_seqs_dir, None, save_root=dump_tmp_dir, fps=render_fps,
+                                         bg_color=1., aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1, 0],
+                                         render_image_res=render_size, multiply=16,
+                                         need_mask=motion_img_need_mask, vis_motion=vis_motion,
+                                         shape_param=shape_param, test_sample=False, cross_id=False,
+                                         src_driven=src_driven, max_squen_length=300)
+        # start inference
+        motion_seq["flame_params"]["betas"] = shape_param.unsqueeze(0)
+        device, dtype = "cuda", torch.float32
+        print("start to inference...................")
+        with torch.no_grad():
+            # TODO check device and dtype
+            res = lam.infer_single_view(image.unsqueeze(0).to(device, dtype), None, None,
+                                        render_c2ws=motion_seq["render_c2ws"].to(device),
+                                        render_intrs=motion_seq["render_intrs"].to(device),
+                                        render_bg_colors=motion_seq["render_bg_colors"].to(device),
+                                        flame_params={k: v.to(device) for k, v in motion_seq["flame_params"].items()})
+        rgb = res["comp_rgb"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+        mask = res["comp_mask"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+        mask[mask < 0.5] = 0.0
+        rgb = rgb * mask + (1 - mask) * 1
+        rgb = (np.clip(rgb, 0, 1.0) * 255).astype(np.uint8)
+        if vis_motion:
+            vis_ref_img = np.tile(
+                cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]), interpolation=cv2.INTER_AREA)[None, :, :,
+                :],
+                (rgb.shape[0], 1, 1, 1),
+            )
+            rgb = np.concatenate([vis_ref_img, rgb, motion_seq["vis_motion_render"]], axis=2)
+        os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
+        print("==="*36, "\nrgb length:", rgb.shape, render_fps, "==="*36)
+        save_imgs_2_video(rgb, dump_video_path, render_fps)
+        # images_to_video(rgb, output_path=dump_video_path, fps=30, gradio_codec=False, verbose=True)
+        audio_path = os.path.join("./assets/sample_motion/export", base_vid, base_vid + ".wav")
+        dump_video_path_wa = dump_video_path.replace(".mp4", "_audio.mp4")
+        add_audio_to_video(dump_video_path, dump_video_path_wa, audio_path)
+        return dump_image_path, dump_video_path_wa
+    def core_fn_space(image_path: str, video_params, working_dir):
+        return core_fn(image_path, video_params, working_dir)
+    with gr.Blocks(analytics_enabled=False) as demo:
+        logo_url = './assets/images/logo.jpeg'
+        logo_base64 = get_image_base64(logo_url)
+        gr.HTML(f"""
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+            <div>
+                <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/>  Large Avatar Model for One-shot Animatable Gaussian Head</h1>
+            </div>
+            </div>
+            """)
+        gr.HTML(
+            """
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center; margin: 20px; gap: 10px;">
+                <a class="flex-item" href="https://arxiv.org/abs/2502.17796" target="_blank">
+                    <img src="https://img.shields.io/badge/Paper-arXiv-darkred.svg" alt="arXiv Paper">
+                </a>
+                <a class="flex-item" href="https://aigc3d.github.io/projects/LAM/" target="_blank">
+                    <img src="https://img.shields.io/badge/Project-LAM-blue" alt="Project Page">
+                </a>
+                <a class="flex-item" href="https://github.com/aigc3d/LAM" target="_blank">
+                    <img src="https://img.shields.io/github/stars/aigc3d/LAM?label=Github%20★&logo=github&color=C8C" alt="badge-github-stars">
+                </a>
+                <a class="flex-item" href="https://youtu.be/FrfE3RYSKhk" target="_blank">
+                    <img src="https://img.shields.io/badge/Youtube-Video-red.svg" alt="Video">
+                </a>
+            </div>
+            """
+        )
+        gr.HTML("""<div style="margin-top: -10px">
+            <p style="margin: 4px 0; line-height: 1.2"><h4 style="color: red; margin: 2px 0">Notes1: Inputing front-face images or face orientation close to the driven signal gets better results.</h4></p>
+            <p style="margin: 4px 0; line-height: 1.2"><h4 style="color: red; margin: 2px 0">Notes2: Due to computational constraints with Hugging Face's ZeroGPU infrastructure, video generation requires ~1 minute per instance.</h4></p>
+            <p style="margin: 4px 0; line-height: 1.2"><h4 style="color: red; margin: 2px 0">Notes3: Using LAM-20K model (lower quality than premium LAM-80K) to mitigate processing latency.</h4></p>
+        </div>""")
+        # DISPLAY
+        with gr.Row():
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='lam_input_image'):
+                    with gr.TabItem('Input Image'):
+                        with gr.Row():
+                            input_image = gr.Image(label='Input Image',
+                                                   image_mode='RGB',
+                                                   height=480,
+                                                   width=270,
+                                                   sources='upload',
+                                                   type='filepath',
+                                                   elem_id='content_image')
+                # EXAMPLES
+                with gr.Row():
+                    examples = [
+                        ['assets/sample_input/messi.png'],
+                        ['assets/sample_input/status.png'],
+                        ['assets/sample_input/james.png'],
+                        ['assets/sample_input/cluo.jpg'],
+                        ['assets/sample_input/dufu.jpg'],
+                        ['assets/sample_input/libai.jpg'],
+                        ['assets/sample_input/barbara.jpg'],
+                        ['assets/sample_input/pop.png'],
+                        ['assets/sample_input/musk.jpg'],
+                        ['assets/sample_input/speed.jpg'],
+                        ['assets/sample_input/zhouxingchi.jpg'],
+                    ]
+                gr.Examples(
+                    examples=examples,
+                    inputs=[input_image],
+                    examples_per_page=20
+                )
+            with gr.Column():
+                with gr.Tabs(elem_id='lam_input_video'):
+                    with gr.TabItem('Input Video'):
+                        with gr.Row():
+                            video_input = gr.Video(label='Input Video',
+                                                   height=480,
+                                                   width=270,
+                                                   interactive=False)
+                examples = ['./assets/sample_motion/export/Speeding_Scandal/Speeding_Scandal.mp4',
+                            './assets/sample_motion/export/Look_In_My_Eyes/Look_In_My_Eyes.mp4',
+                            './assets/sample_motion/export/D_ANgelo_Dinero/D_ANgelo_Dinero.mp4',
+                            './assets/sample_motion/export/Michael_Wayne_Rosen/Michael_Wayne_Rosen.mp4',
+                            './assets/sample_motion/export/I_Am_Iron_Man/I_Am_Iron_Man.mp4',
+                            './assets/sample_motion/export/Anti_Drugs/Anti_Drugs.mp4',
+                            './assets/sample_motion/export/Pen_Pineapple_Apple_Pen/Pen_Pineapple_Apple_Pen.mp4',
+                            './assets/sample_motion/export/Joe_Biden/Joe_Biden.mp4',
+                            './assets/sample_motion/export/Donald_Trump/Donald_Trump.mp4',
+                            './assets/sample_motion/export/Taylor_Swift/Taylor_Swift.mp4',
+                            './assets/sample_motion/export/GEM/GEM.mp4',
+                             './assets/sample_motion/export/The_Shawshank_Redemption/The_Shawshank_Redemption.mp4'
+                            ]
+                print("Video example list {}".format(examples))
+                gr.Examples(
+                    examples=examples,
+                    inputs=[video_input],
+                    examples_per_page=20,
+                )
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='lam_processed_image'):
+                    with gr.TabItem('Processed Image'):
+                        with gr.Row():
+                            processed_image = gr.Image(
+                                label='Processed Image',
+                                image_mode='RGBA',
+                                type='filepath',
+                                elem_id='processed_image',
+                                height=480,
+                                width=270,
+                                interactive=False)
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='lam_render_video'):
+                    with gr.TabItem('Rendered Video'):
+                        with gr.Row():
+                            output_video = gr.Video(label='Rendered Video',
+                                                    format='mp4',
+                                                    height=480,
+                                                    width=270,
+                                                    autoplay=True)
+        # SETTING
+        with gr.Row():
+            with gr.Column(variant='panel', scale=1):
+                submit = gr.Button('Generate',
+                                   elem_id='lam_generate',
+                                   variant='primary')
+        main_fn = core_fn
+        working_dir = gr.State()
+        submit.click(
+            fn=assert_input_image,
+            inputs=[input_image],
+            queue=False,
+        ).success(
+            fn=prepare_working_dir,
+            outputs=[working_dir],
+            queue=False,
+        ).success(
+            fn=main_fn,
+            inputs=[input_image, video_input,
+                    working_dir],  # video_params refer to smpl dir
+            outputs=[processed_image, output_video],
+        )
+        demo.queue()
+        demo.launch()
+def _build_model(cfg):
+    from lam.models import model_dict
+    from lam.utils.hf_hub import wrap_model_hub
+    hf_model_cls = wrap_model_hub(model_dict["lam"])
+    model = hf_model_cls.from_pretrained(cfg.model_name)
+    return model
+def launch_gradio_app():
+    os.environ.update({
+        'APP_ENABLED': '1',
+        'APP_MODEL_NAME':
+            './exps/releases/lam/lam-20k/step_045500/',
+        'APP_INFER': './configs/inference/lam-20k-8gpu.yaml',
+        'APP_TYPE': 'infer.lam',
+        'NUMBA_THREADING_LAYER': 'omp',
+    })
+    cfg, _ = parse_configs()
+    lam = _build_model(cfg)
+    lam.to('cuda')
+    flametracking = FlameTrackingSingleImage(output_dir='tracking_output',
+                                             alignment_model_path='./pretrain_model/68_keypoints_model.pkl',
+                                             vgghead_model_path='./pretrain_model/vgghead/vgg_heads_l.trcd',
+                                             human_matting_path='./pretrain_model/matting/stylematte_synth.pt',
+                                             facebox_model_path='./pretrain_model/FaceBoxesV2.pth',
+                                             detect_iris_landmarks=False)
+    demo_lam(flametracking, lam, cfg)
+if __name__ == '__main__':
+    launch_pretrained()
+    launch_gradio_app()

app_lam.py ADDED Viewed

	@@ -0,0 +1,433 @@

+# Copyright (c) 2024-2025, Yisheng He, Yuan Dong
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+import cv2
+import base64
+import subprocess
+import gradio as gr
+import numpy as np
+from PIL import Image
+import argparse
+from omegaconf import OmegaConf
+import torch
+from lam.runners.infer.head_utils import prepare_motion_seqs, preprocess_image
+import moviepy.editor as mpy
+from lam.utils.ffmpeg_utils import images_to_video
+import sys
+from flame_tracking_single_image import FlameTrackingSingleImage
+try:
+    import spaces
+except:
+    pass
+def launch_pretrained():
+    from huggingface_hub import snapshot_download, hf_hub_download
+    hf_hub_download(repo_id='DyrusQZ/LHM_Runtime',
+                    repo_type='model',
+                    filename='assets.tar',
+                    local_dir='./')
+    os.system('tar -xvf assets.tar && rm assets.tar')
+    hf_hub_download(repo_id='DyrusQZ/LHM_Runtime',
+                    repo_type='model',
+                    filename='LHM-0.5B.tar',
+                    local_dir='./')
+    os.system('tar -xvf LHM-0.5B.tar && rm LHM-0.5B.tar')
+    hf_hub_download(repo_id='DyrusQZ/LHM_Runtime',
+                    repo_type='model',
+                    filename='LHM_prior_model.tar',
+                    local_dir='./')
+    os.system('tar -xvf LHM_prior_model.tar && rm LHM_prior_model.tar')
+def launch_env_not_compile_with_cuda():
+    os.system('pip install chumpy')
+    os.system('pip uninstall -y basicsr')
+    os.system('pip install git+https://github.com/hitsz-zuoqi/BasicSR/')
+    os.system('pip install numpy==1.23.0')
+    os.system(
+        'pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py310_cu121_pyt251/download.html'
+    )
+def assert_input_image(input_image):
+    if input_image is None:
+        raise gr.Error('No image selected or uploaded!')
+def prepare_working_dir():
+    import tempfile
+    working_dir = tempfile.TemporaryDirectory()
+    return working_dir
+def init_preprocessor():
+    from lam.utils.preprocess import Preprocessor
+    global preprocessor
+    preprocessor = Preprocessor()
+def preprocess_fn(image_in: np.ndarray, remove_bg: bool, recenter: bool,
+                  working_dir):
+    image_raw = os.path.join(working_dir.name, 'raw.png')
+    with Image.fromarray(image_in) as img:
+        img.save(image_raw)
+    image_out = os.path.join(working_dir.name, 'rembg.png')
+    success = preprocessor.preprocess(image_path=image_raw,
+                                      save_path=image_out,
+                                      rmbg=remove_bg,
+                                      recenter=recenter)
+    assert success, f'Failed under preprocess_fn!'
+    return image_out
+def get_image_base64(path):
+    with open(path, 'rb') as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode()
+    return f'data:image/png;base64,{encoded_string}'
+def save_imgs_2_video(imgs, v_pth, fps):
+    img_lst = [imgs[i] for i in range(imgs.shape[0])]
+    # Convert the list of NumPy arrays to a list of ImageClip objects
+    clips = [mpy.ImageClip(img).set_duration(0.1) for img in img_lst]  # 0.1 seconds per frame
+    # Concatenate the ImageClips into a single VideoClip
+    video = mpy.concatenate_videoclips(clips, method="compose")
+    # Write the VideoClip to a file
+    video.write_videofile(v_pth, fps=fps)  # setting fps to 10 as example
+def parse_configs():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", type=str)
+    parser.add_argument("--infer", type=str)
+    args, unknown = parser.parse_known_args()
+    cfg = OmegaConf.create()
+    cli_cfg = OmegaConf.from_cli(unknown)
+    # parse from ENV
+    if os.environ.get("APP_INFER") is not None:
+        args.infer = os.environ.get("APP_INFER")
+    if os.environ.get("APP_MODEL_NAME") is not None:
+        cli_cfg.model_name = os.environ.get("APP_MODEL_NAME")
+    args.config = args.infer if args.config is None else args.config
+    if args.config is not None:
+        cfg_train = OmegaConf.load(args.config)
+        cfg.source_size = cfg_train.dataset.source_image_res
+        try:
+            cfg.src_head_size = cfg_train.dataset.src_head_size
+        except:
+            cfg.src_head_size = 112
+        cfg.render_size = cfg_train.dataset.render_image.high
+        _relative_path = os.path.join(
+            cfg_train.experiment.parent,
+            cfg_train.experiment.child,
+            os.path.basename(cli_cfg.model_name).split("_")[-1],
+        )
+        cfg.save_tmp_dump = os.path.join("exps", "save_tmp", _relative_path)
+        cfg.image_dump = os.path.join("exps", "images", _relative_path)
+        cfg.video_dump = os.path.join("exps", "videos", _relative_path)  # output path
+    if args.infer is not None:
+        cfg_infer = OmegaConf.load(args.infer)
+        cfg.merge_with(cfg_infer)
+        cfg.setdefault(
+            "save_tmp_dump", os.path.join("exps", cli_cfg.model_name, "save_tmp")
+        )
+        cfg.setdefault("image_dump", os.path.join("exps", cli_cfg.model_name, "images"))
+        cfg.setdefault(
+            "video_dump", os.path.join("dumps", cli_cfg.model_name, "videos")
+        )
+        cfg.setdefault("mesh_dump", os.path.join("dumps", cli_cfg.model_name, "meshes"))
+    cfg.motion_video_read_fps = 6
+    cfg.merge_with(cli_cfg)
+    cfg.setdefault("logger", "INFO")
+    assert cfg.model_name is not None, "model_name is required"
+    return cfg, cfg_train
+def demo_lam(flametracking, lam, cfg):
+    # @spaces.GPU(duration=80)
+    def core_fn(image_path: str, video_params, working_dir):
+        image_raw = os.path.join(working_dir.name, "raw.png")
+        with Image.open(image_path).convert('RGB') as img:
+            img.save(image_raw)
+        base_vid = os.path.basename(video_params).split(".")[0]
+        flame_params_dir = os.path.join("./assets/sample_motion/export", base_vid, "flame_param")
+        base_iid = os.path.basename(image_path).split('.')[0]
+        image_path = os.path.join("./assets/sample_input", base_iid, "images/00000_00.png")
+        dump_video_path = os.path.join(working_dir.name, "output.mp4")
+        dump_image_path = os.path.join(working_dir.name, "output.png")
+        # prepare dump paths
+        omit_prefix = os.path.dirname(image_raw)
+        image_name = os.path.basename(image_raw)
+        uid = image_name.split(".")[0]
+        subdir_path = os.path.dirname(image_raw).replace(omit_prefix, "")
+        subdir_path = (
+            subdir_path[1:] if subdir_path.startswith("/") else subdir_path
+        )
+        print("subdir_path and uid:", subdir_path, uid)
+        motion_seqs_dir = flame_params_dir
+        dump_image_dir = os.path.dirname(dump_image_path)
+        os.makedirs(dump_image_dir, exist_ok=True)
+        print(image_raw, motion_seqs_dir, dump_image_dir, dump_video_path)
+        dump_tmp_dir = dump_image_dir
+        if os.path.exists(dump_video_path):
+            return dump_image_path, dump_video_path
+        motion_img_need_mask = cfg.get("motion_img_need_mask", False)  # False
+        vis_motion = cfg.get("vis_motion", False)  # False
+        # preprocess input image: segmentation, flame params estimation
+        return_code = flametracking.preprocess(image_raw)
+        assert (return_code == 0), "flametracking preprocess failed!"
+        return_code = flametracking.optimize()
+        assert (return_code == 0), "flametracking optimize failed!"
+        return_code, output_dir = flametracking.export()
+        assert (return_code == 0), "flametracking export failed!"
+        image_path = os.path.join(output_dir, "images/00000_00.png")
+        mask_path = image_path.replace("/images/", "/fg_masks/").replace(".jpg", ".png")
+        print(image_path, mask_path)
+        aspect_standard = 1.0/1.0
+        source_size = cfg.source_size
+        render_size = cfg.render_size
+        render_fps = 30
+        # prepare reference image
+        image, _, _, shape_param = preprocess_image(image_path, mask_path=mask_path, intr=None, pad_ratio=0, bg_color=1.,
+                                             max_tgt_size=None, aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1.0],
+                                             render_tgt_size=source_size, multiply=14, need_mask=True, get_shape_param=True)
+        # save masked image for vis
+        save_ref_img_path = os.path.join(dump_tmp_dir, "output.png")
+        vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() * 255).astype(np.uint8)
+        Image.fromarray(vis_ref_img).save(save_ref_img_path)
+        # prepare motion seq
+        src = image_path.split('/')[-3]
+        driven = motion_seqs_dir.split('/')[-2]
+        src_driven = [src, driven]
+        motion_seq = prepare_motion_seqs(motion_seqs_dir, None, save_root=dump_tmp_dir, fps=render_fps,
+                                            bg_color=1., aspect_standard=aspect_standard, enlarge_ratio=[1.0, 1,0],
+                                            render_image_res=render_size,  multiply=16,
+                                            need_mask=motion_img_need_mask, vis_motion=vis_motion,
+                                            shape_param=shape_param, test_sample=False, cross_id=False, src_driven=src_driven)
+        # start inference
+        motion_seq["flame_params"]["betas"] = shape_param.unsqueeze(0)
+        device, dtype = "cuda", torch.float32
+        print("start to inference...................")
+        with torch.no_grad():
+            # TODO check device and dtype
+            res = lam.infer_single_view(image.unsqueeze(0).to(device, dtype), None, None,
+                                        render_c2ws=motion_seq["render_c2ws"].to(device),
+                                        render_intrs=motion_seq["render_intrs"].to(device),
+                                        render_bg_colors=motion_seq["render_bg_colors"].to(device),
+                                        flame_params={k:v.to(device) for k, v in motion_seq["flame_params"].items()})
+        rgb = res["comp_rgb"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+        mask = res["comp_mask"].detach().cpu().numpy()  # [Nv, H, W, 3], 0-1
+        mask[mask < 0.5] = 0.0
+        rgb = rgb * mask + (1 - mask) * 1
+        rgb = (np.clip(rgb, 0, 1.0) * 255).astype(np.uint8)
+        if vis_motion:
+            vis_ref_img = np.tile(
+                cv2.resize(vis_ref_img, (rgb[0].shape[1], rgb[0].shape[0]), interpolation=cv2.INTER_AREA)[None, :, :, :],
+                (rgb.shape[0], 1, 1, 1),
+            )
+            rgb = np.concatenate([vis_ref_img, rgb, motion_seq["vis_motion_render"]], axis=2)
+        os.makedirs(os.path.dirname(dump_video_path), exist_ok=True)
+        save_imgs_2_video(rgb, dump_video_path, render_fps)
+        # images_to_video(rgb, output_path=dump_video_path, fps=30, gradio_codec=False, verbose=True)
+        return dump_image_path, dump_video_path
+    with gr.Blocks(analytics_enabled=False) as demo:
+        logo_url = './assets/images/logo.png'
+        logo_base64 = get_image_base64(logo_url)
+        gr.HTML(f"""
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+            <div>
+                <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> LAM: Large Avatar Model for One-shot Animatable Gaussian Head</h1>
+            </div>
+            </div>
+            """)
+        gr.HTML(
+            """<p><h4 style="color: red;"> Notes: Inputing front-face images or face orientation close to the driven signal gets better results.</h4></p>"""
+        )
+        # DISPLAY
+        with gr.Row():
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='lam_input_image'):
+                    with gr.TabItem('Input Image'):
+                        with gr.Row():
+                            input_image = gr.Image(label='Input Image',
+                                                   image_mode='RGB',
+                                                   height=480,
+                                                   width=270,
+                                                   sources='upload',
+                                                   type='filepath', # 'numpy',
+                                                   elem_id='content_image')
+                # EXAMPLES
+                with gr.Row():
+                    examples = [
+                        ['assets/sample_input/2w01/images/2w01.png'],
+                        ['assets/sample_input/2w02/images/2w02.png'],
+                        ['assets/sample_input/2w03/images/2w03.png'],
+                        ['assets/sample_input/2w04/images/2w04.png'],
+                    ]
+                    gr.Examples(
+                        examples=examples,
+                        inputs=[input_image],
+                        examples_per_page=20,
+                    )
+            with gr.Column():
+                with gr.Tabs(elem_id='lam_input_video'):
+                    with gr.TabItem('Input Video'):
+                        with gr.Row():
+                            video_input = gr.Video(label='Input Video',
+                                                   height=480,
+                                                   width=270,
+                                                   interactive=False)
+                examples = [
+                    './assets/sample_motion/export/clip1/clip1.mp4',
+                    './assets/sample_motion/export/clip2/clip2.mp4',
+                    './assets/sample_motion/export/clip3/clip3.mp4',
+                ]
+                gr.Examples(
+                    examples=examples,
+                    inputs=[video_input],
+                    examples_per_page=20,
+                )
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='lam_processed_image'):
+                    with gr.TabItem('Processed Image'):
+                        with gr.Row():
+                            processed_image = gr.Image(
+                                label='Processed Image',
+                                image_mode='RGBA',
+                                type='filepath',
+                                elem_id='processed_image',
+                                height=480,
+                                width=270,
+                                interactive=False)
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='lam_render_video'):
+                    with gr.TabItem('Rendered Video'):
+                        with gr.Row():
+                            output_video = gr.Video(label='Rendered Video',
+                                                    format='mp4',
+                                                    height=480,
+                                                    width=270,
+                                                    autoplay=True)
+        # SETTING
+        with gr.Row():
+            with gr.Column(variant='panel', scale=1):
+                submit = gr.Button('Generate',
+                                   elem_id='lam_generate',
+                                   variant='primary')
+        working_dir = gr.State()
+        submit.click(
+            fn=assert_input_image,
+            inputs=[input_image],
+            queue=False,
+        ).success(
+            fn=prepare_working_dir,
+            outputs=[working_dir],
+            queue=False,
+        ).success(
+            fn=core_fn,
+            inputs=[input_image, video_input,
+                    working_dir],  # video_params refer to smpl dir
+            outputs=[processed_image, output_video],
+        )
+        demo.queue()
+        demo.launch()
+def _build_model(cfg):
+    from lam.models import model_dict
+    from lam.utils.hf_hub import wrap_model_hub
+    hf_model_cls = wrap_model_hub(model_dict["lam"])
+    model = hf_model_cls.from_pretrained(cfg.model_name)
+    return model
+def launch_gradio_app():
+    os.environ.update({
+        'APP_ENABLED': '1',
+        'APP_MODEL_NAME':
+        './exps/releases/lam/lam-20k/step_045500/',
+        'APP_INFER': './configs/inference/lam-20k-8gpu.yaml',
+        'APP_TYPE': 'infer.lam',
+        'NUMBA_THREADING_LAYER': 'omp',
+    })
+    cfg, _ = parse_configs()
+    lam = _build_model(cfg)
+    lam.to('cuda')
+    flametracking = FlameTrackingSingleImage(output_dir='tracking_output',
+                                             alignment_model_path='./pretrain_model/68_keypoints_model.pkl',
+                                             vgghead_model_path='./pretrain_model/vgghead/vgg_heads_l.trcd',
+                                             human_matting_path='./pretrain_model/matting/stylematte_synth.pt',
+                                             facebox_model_path='./pretrain_model/FaceBoxesV2.pth',
+                                             detect_iris_landmarks=True)
+    demo_lam(flametracking, lam, cfg)
+if __name__ == '__main__':
+    # launch_pretrained()
+    # launch_env_not_compile_with_cuda()
+    launch_gradio_app()

app_preprocess.py ADDED Viewed

	@@ -0,0 +1,387 @@

+# Copyright (c) 2023-2024, Qi Zuo
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import os
+os.system('rm -rf /data-nvme/zerogpu-offload/')
+os.system('pip install numpy==1.23.0')
+os.system('pip install ./wheels/pytorch3d-0.7.3-cp310-cp310-linux_x86_64.whl')
+import argparse
+import base64
+import time
+import cv2
+import numpy as np
+import torch
+from omegaconf import OmegaConf
+from PIL import Image
+import gradio as gr
+import spaces
+from flame_tracking_single_image import FlameTrackingSingleImage
+from ffmpeg_utils import images_to_video
+# torch._dynamo.config.disable = True
+def parse_configs():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--config', type=str)
+    parser.add_argument('--infer', type=str)
+    args, unknown = parser.parse_known_args()
+    cfg = OmegaConf.create()
+    cli_cfg = OmegaConf.from_cli(unknown)
+    # parse from ENV
+    if os.environ.get('APP_INFER') is not None:
+        args.infer = os.environ.get('APP_INFER')
+    if os.environ.get('APP_MODEL_NAME') is not None:
+        cli_cfg.model_name = os.environ.get('APP_MODEL_NAME')
+    args.config = args.infer if args.config is None else args.config
+    if args.config is not None:
+        cfg_train = OmegaConf.load(args.config)
+        cfg.source_size = cfg_train.dataset.source_image_res
+        try:
+            cfg.src_head_size = cfg_train.dataset.src_head_size
+        except:
+            cfg.src_head_size = 112
+        cfg.render_size = cfg_train.dataset.render_image.high
+        _relative_path = os.path.join(
+            cfg_train.experiment.parent,
+            cfg_train.experiment.child,
+            os.path.basename(cli_cfg.model_name).split('_')[-1],
+        )
+        cfg.save_tmp_dump = os.path.join('exps', 'save_tmp', _relative_path)
+        cfg.image_dump = os.path.join('exps', 'images', _relative_path)
+        cfg.video_dump = os.path.join('exps', 'videos',
+                                      _relative_path)  # output path
+    if args.infer is not None:
+        cfg_infer = OmegaConf.load(args.infer)
+        cfg.merge_with(cfg_infer)
+        cfg.setdefault('save_tmp_dump',
+                       os.path.join('exps', cli_cfg.model_name, 'save_tmp'))
+        cfg.setdefault('image_dump',
+                       os.path.join('exps', cli_cfg.model_name, 'images'))
+        cfg.setdefault('video_dump',
+                       os.path.join('dumps', cli_cfg.model_name, 'videos'))
+        cfg.setdefault('mesh_dump',
+                       os.path.join('dumps', cli_cfg.model_name, 'meshes'))
+    cfg.motion_video_read_fps = 6
+    cfg.merge_with(cli_cfg)
+    cfg.setdefault('logger', 'INFO')
+    assert cfg.model_name is not None, 'model_name is required'
+    return cfg, cfg_train
+def launch_pretrained():
+    from huggingface_hub import snapshot_download, hf_hub_download
+    hf_hub_download(repo_id='yuandong513/flametracking_model',
+                    repo_type='model',
+                    filename='pretrain_model.tar',
+                    local_dir='./')
+    os.system('tar -xf pretrain_model.tar && rm pretrain_model.tar')
+def animation_infer(renderer, gs_model_list, query_points, smplx_params,
+                    render_c2ws, render_intrs, render_bg_colors):
+    '''Inference code avoid repeat forward.
+    '''
+    render_h, render_w = int(render_intrs[0, 0, 1, 2] * 2), int(
+        render_intrs[0, 0, 0, 2] * 2)
+    # render target views
+    render_res_list = []
+    num_views = render_c2ws.shape[1]
+    start_time = time.time()
+    # render target views
+    render_res_list = []
+    for view_idx in range(num_views):
+        render_res = renderer.forward_animate_gs(
+            gs_model_list,
+            query_points,
+            renderer.get_single_view_smpl_data(smplx_params, view_idx),
+            render_c2ws[:, view_idx:view_idx + 1],
+            render_intrs[:, view_idx:view_idx + 1],
+            render_h,
+            render_w,
+            render_bg_colors[:, view_idx:view_idx + 1],
+        )
+        render_res_list.append(render_res)
+    print(
+        f'time elpased(animate gs model per frame):{(time.time() -  start_time)/num_views}'
+    )
+    out = defaultdict(list)
+    for res in render_res_list:
+        for k, v in res.items():
+            if isinstance(v[0], torch.Tensor):
+                out[k].append(v.detach().cpu())
+            else:
+                out[k].append(v)
+    for k, v in out.items():
+        # print(f"out key:{k}")
+        if isinstance(v[0], torch.Tensor):
+            out[k] = torch.concat(v, dim=1)
+            if k in ['comp_rgb', 'comp_mask', 'comp_depth']:
+                out[k] = out[k][0].permute(
+                    0, 2, 3,
+                    1)  # [1, Nv, 3, H, W] -> [Nv, 3, H, W] - > [Nv, H, W, 3]
+        else:
+            out[k] = v
+    return out
+def assert_input_image(input_image):
+    if input_image is None:
+        raise gr.Error('No image selected or uploaded!')
+def prepare_working_dir():
+    import tempfile
+    working_dir = tempfile.TemporaryDirectory()
+    return working_dir
+def get_image_base64(path):
+    with open(path, 'rb') as image_file:
+        encoded_string = base64.b64encode(image_file.read()).decode()
+    return f'data:image/png;base64,{encoded_string}'
+def demo_lhm(flametracking):
+    @spaces.GPU(duration=80)
+    def core_fn(image: str, video_params, working_dir):
+        image_raw = os.path.join(working_dir.name, 'raw.png')
+        with Image.fromarray(image) as img:
+            img.save(image_raw)
+        base_vid = os.path.basename(video_params).split('_')[0]
+        dump_video_path = os.path.join(working_dir.name, 'output.mp4')
+        dump_image_path = os.path.join(working_dir.name, 'output.png')
+        # prepare dump paths
+        omit_prefix = os.path.dirname(image_raw)
+        image_name = os.path.basename(image_raw)
+        uid = image_name.split('.')[0]
+        subdir_path = os.path.dirname(image_raw).replace(omit_prefix, '')
+        subdir_path = (subdir_path[1:]
+                       if subdir_path.startswith('/') else subdir_path)
+        print('==> subdir_path and uid:', subdir_path, uid)
+        dump_image_dir = os.path.dirname(dump_image_path)
+        os.makedirs(dump_image_dir, exist_ok=True)
+        print('==> path:', image_raw, dump_image_dir, dump_video_path)
+        dump_tmp_dir = dump_image_dir
+        return_code = flametracking.preprocess(image_raw)
+        return_code = flametracking.optimize()
+        return_code, output_dir = flametracking.export()
+        print("==> output_dir:", output_dir)
+        save_ref_img_path = os.path.join(dump_tmp_dir, 'output.png')
+        vis_ref_img = (image[0].permute(1, 2, 0).cpu().detach().numpy() *
+                       255).astype(np.uint8)
+        Image.fromarray(vis_ref_img).save(save_ref_img_path)
+        # rendering !!!!
+        start_time = time.time()
+        batch_dict = dict()
+        rgb = cv2.imread(os.path.join(output_dir,'images/00000_00.png'))
+        for i in range(30):
+            images_to_video(
+                rgb,
+                output_path=dump_video_path,
+                fps=30,
+                gradio_codec=False,
+                verbose=True,
+            )
+        return dump_image_path, dump_video_path
+    _TITLE = '''LHM: Large Animatable Human Model'''
+    _DESCRIPTION = '''
+        <strong>Reconstruct a human avatar in 0.2 seconds with A100!</strong>
+    '''
+    with gr.Blocks(analytics_enabled=False, delete_cache=[3600, 3600]) as demo:
+        # </div>
+        logo_url = './asset/logo.jpeg'
+        logo_base64 = get_image_base64(logo_url)
+        gr.HTML(f"""
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center;">
+            <div>
+                <h1> <img src="{logo_base64}" style='height:35px; display:inline-block;'/> Large Animatable Human Model </h1>
+            </div>
+            </div>
+            """)
+        gr.HTML("""
+            <div style="display: flex; justify-content: center; align-items: center; text-align: center; margin: 20px; gap: 10px;">
+                <a class="flex-item" href="https://arxiv.org/abs/2503.10625" target="_blank">
+                    <img src="https://img.shields.io/badge/Paper-arXiv-darkred.svg" alt="arXiv Paper">
+                </a>
+                <a class="flex-item" href="https://lingtengqiu.github.io/LHM/" target="_blank">
+                    <img src="https://img.shields.io/badge/Project-LHM-blue" alt="Project Page">
+                </a>
+                <a class="flex-item" href="https://github.com/aigc3d/LHM" target="_blank">
+                    <img src="https://img.shields.io/github/stars/aigc3d/LHM?label=Github%20%E2%98%85&logo=github&color=C8C" alt="badge-github-stars">
+                </a>
+                <a class="flex-item" href="https://www.youtube.com/watch?v=tivEpz_yiEo" target="_blank">
+                    <img src="https://img.shields.io/badge/Youtube-Video-red.svg" alt="Video">
+                </a>
+            </div>
+            """)
+        gr.HTML(
+            """<p><h4 style="color: red;"> Notes: Please input full-body image in case of detection errors. We simplify the pipeline in spaces: 1) using Rembg instead of SAM2; 2) limit the output video length to 10s; For best visual quality, try the inference code on Github instead.</h4></p>"""
+        )
+        # DISPLAY
+        with gr.Row():
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='openlrm_input_image'):
+                    with gr.TabItem('Input Image'):
+                        with gr.Row():
+                            input_image = gr.Image(label='Input Image',
+                                                   image_mode='RGB',
+                                                   height=480,
+                                                   width=270,
+                                                   sources='upload',
+                                                   type='numpy',
+                                                   elem_id='content_image')
+                # EXAMPLES
+                with gr.Row():
+                    examples = [
+                        ['asset/sample_input/00000.png'],
+                    ]
+                    gr.Examples(
+                        examples=examples,
+                        inputs=[input_image],
+                        examples_per_page=10,
+                    )
+            with gr.Column():
+                with gr.Tabs(elem_id='openlrm_input_video'):
+                    with gr.TabItem('Input Video'):
+                        with gr.Row():
+                            video_input = gr.Video(label='Input Video',
+                                                   height=480,
+                                                   width=270,
+                                                   interactive=False)
+                examples = [
+                    './asset/sample_input/demo.mp4',
+                ]
+                gr.Examples(
+                    examples=examples,
+                    inputs=[video_input],
+                    examples_per_page=20,
+                )
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='openlrm_processed_image'):
+                    with gr.TabItem('Processed Image'):
+                        with gr.Row():
+                            processed_image = gr.Image(
+                                label='Processed Image',
+                                image_mode='RGB',
+                                type='filepath',
+                                elem_id='processed_image',
+                                height=480,
+                                width=270,
+                                interactive=False)
+            with gr.Column(variant='panel', scale=1):
+                with gr.Tabs(elem_id='openlrm_render_video'):
+                    with gr.TabItem('Rendered Video'):
+                        with gr.Row():
+                            output_video = gr.Video(label='Rendered Video',
+                                                    format='mp4',
+                                                    height=480,
+                                                    width=270,
+                                                    autoplay=True)
+        # SETTING
+        with gr.Row():
+            with gr.Column(variant='panel', scale=1):
+                submit = gr.Button('Generate',
+                                   elem_id='openlrm_generate',
+                                   variant='primary')
+        working_dir = gr.State()
+        submit.click(
+            fn=assert_input_image,
+            inputs=[input_image],
+            queue=False,
+        ).success(
+            fn=prepare_working_dir,
+            outputs=[working_dir],
+            queue=False,
+        ).success(
+            fn=core_fn,
+            inputs=[input_image, video_input,
+                    working_dir],  # video_params refer to smpl dir
+            outputs=[processed_image, output_video],
+        )
+        demo.queue(max_size=1)
+        demo.launch()
+def launch_gradio_app():
+    os.environ.update({
+        'APP_ENABLED': '1',
+        'APP_MODEL_NAME':
+        './exps/releases/video_human_benchmark/human-lrm-500M/step_060000/',
+        'APP_INFER': './configs/inference/human-lrm-500M.yaml',
+        'APP_TYPE': 'infer.human_lrm',
+        'NUMBA_THREADING_LAYER': 'omp',
+    })
+    flametracking = FlameTrackingSingleImage(output_dir='tracking_output',
+                                             alignment_model_path='./pretrain_model/68_keypoints_model.pkl',
+                                             vgghead_model_path='./pretrain_model/vgghead/vgg_heads_l.trcd',
+                                             human_matting_path='./pretrain_model/matting/stylematte_synth.pt',
+                                             facebox_model_path='./pretrain_model/FaceBoxesV2.pth',
+                                             detect_iris_landmarks=True)
+    demo_lhm(flametracking)
+if __name__ == '__main__':
+    launch_pretrained()
+    launch_gradio_app()

configs/inference/lam-20k-8gpu.yaml ADDED Viewed

	@@ -0,0 +1,130 @@

+experiment:
+    type: lam
+    seed: 42
+    parent: lam
+    child: lam_20k
+model:
+    # image encoder
+    encoder_type: "dinov2_fusion"
+    encoder_model_name: "dinov2_vitl14_reg"
+    encoder_feat_dim: 1024
+    encoder_freeze: false
+    # points embeddings
+    latent_query_points_type: "e2e_flame"
+    pcl_dim: 1024
+    # transformer
+    transformer_type: "sd3_cond"
+    transformer_heads: 16
+    transformer_dim: 1024
+    transformer_layers: 10
+    tf_grad_ckpt: true
+    encoder_grad_ckpt: true
+    # for gs renderer
+    human_model_path: "./pretrained_models/human_model_files"
+    flame_subdivide_num: 1
+    flame_type: "flame"
+    gs_query_dim: 1024
+    gs_use_rgb: True
+    gs_sh: 3
+    gs_mlp_network_config:
+        n_neurons: 512
+        n_hidden_layers: 2
+        activation: silu
+    gs_xyz_offset_max_step: 0.2
+    gs_clip_scaling: 0.01
+    scale_sphere: false
+    expr_param_dim: 10
+    shape_param_dim: 10
+    add_teeth: false
+    fix_opacity: false
+    fix_rotation: false
+    has_disc: false
+    teeth_bs_flag: false
+    oral_mesh_flag: false
+dataset:
+    subsets:
+        -   name: video_head
+            root_dirs: "./train_data/vfhq_vhap_nooffset/export"
+            meta_path:
+                train: "./train_data/vfhq_vhap_nooffset/label/valid_id_train_list.json"
+                val: "./train_data/vfhq_vhap_nooffset/label/valid_id_val_list.json"
+            sample_rate: 1.0
+    sample_side_views: 7
+    sample_aug_views: 0
+    source_image_res: 512
+    render_image:
+        low: 512
+        high: 512
+        region: null
+    num_train_workers: 4
+    num_val_workers: 2
+    pin_mem: true
+    repeat_num: 1
+    gaga_track_type: "vfhq"
+train:
+    mixed_precision: bf16  # REPLACE THIS BASED ON GPU TYPE
+    find_unused_parameters: false
+    loss:
+        pixel_weight: 0.0
+        pixel_loss_fn: "mse"
+        crop_face_weight: 0.
+        crop_mouth_weight: 0.
+        crop_eye_weight: 0.
+        masked_pixel_weight: 1.0
+        perceptual_weight: 1.0
+        tv_weight: -1
+        mask_weight: 0:1.0:0.5:10000
+        offset_reg_weight: 0.1
+    optim:
+        lr: 4e-4
+        weight_decay: 0.05
+        beta1: 0.9
+        beta2: 0.95
+        clip_grad_norm: 1.0
+    scheduler:
+        type: cosine
+        warmup_real_iters: 3000
+    batch_size: 4  # REPLACE THIS (PER GPU)
+    accum_steps: 1  # REPLACE THIS
+    epochs: 100  # REPLACE THIS
+    debug_global_steps: null
+    resume: ""
+val:
+    batch_size: 2
+    global_step_period: 500
+    debug_batches: 10
+saver:
+    auto_resume: true
+    load_model: null
+    checkpoint_root: ./exps/checkpoints
+    checkpoint_global_steps: 500
+    checkpoint_keep_level: 5
+logger:
+    stream_level: WARNING
+    log_level: INFO
+    log_root: ./exps/logs
+    tracker_root: ./exps/trackers
+    enable_profiler: false
+    trackers:
+        - tensorboard
+    image_monitor:
+        train_global_steps: 500
+        samples_per_log: 4
+compile:
+    suppress_errors: true
+    print_specializations: true
+    disable: true

configs/stylematte_config.json ADDED Viewed

	@@ -0,0 +1,2311 @@

+{
+  "_commit_hash": null,
+  "activation_function": "relu",
+  "architectures": [
+    "Mask2FormerForUniversalSegmentation"
+  ],
+  "backbone_config": {
+    "_name_or_path": "",
+    "add_cross_attention": false,
+    "architectures": [
+      "SwinForImageClassification"
+    ],
+    "attention_probs_dropout_prob": 0.0,
+    "bad_words_ids": null,
+    "begin_suppress_tokens": null,
+    "bos_token_id": null,
+    "chunk_size_feed_forward": 0,
+    "cross_attention_hidden_size": null,
+    "decoder_start_token_id": null,
+    "depths": [
+      2,
+      2,
+      6,
+      2
+    ],
+    "diversity_penalty": 0.0,
+    "do_sample": false,
+    "drop_path_rate": 0.3,
+    "early_stopping": false,
+    "embed_dim": 96,
+    "encoder_no_repeat_ngram_size": 0,
+    "encoder_stride": 32,
+    "eos_token_id": null,
+    "exponential_decay_length_penalty": null,
+    "finetuning_task": null,
+    "forced_bos_token_id": null,
+    "forced_eos_token_id": null,
+    "hidden_act": "gelu",
+    "hidden_dropout_prob": 0.0,
+    "hidden_size": 768,
+    "id2label": {
+      "0": "tench, Tinca tinca",
+      "1": "goldfish, Carassius auratus",
+      "2": "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias",
+      "3": "tiger shark, Galeocerdo cuvieri",
+      "4": "hammerhead, hammerhead shark",
+      "5": "electric ray, crampfish, numbfish, torpedo",
+      "6": "stingray",
+      "7": "cock",
+      "8": "hen",
+      "9": "ostrich, Struthio camelus",
+      "10": "brambling, Fringilla montifringilla",
+      "11": "goldfinch, Carduelis carduelis",
+      "12": "house finch, linnet, Carpodacus mexicanus",
+      "13": "junco, snowbird",
+      "14": "indigo bunting, indigo finch, indigo bird, Passerina cyanea",
+      "15": "robin, American robin, Turdus migratorius",
+      "16": "bulbul",
+      "17": "jay",
+      "18": "magpie",
+      "19": "chickadee",
+      "20": "water ouzel, dipper",
+      "21": "kite",
+      "22": "bald eagle, American eagle, Haliaeetus leucocephalus",
+      "23": "vulture",
+      "24": "great grey owl, great gray owl, Strix nebulosa",
+      "25": "European fire salamander, Salamandra salamandra",
+      "26": "common newt, Triturus vulgaris",
+      "27": "eft",
+      "28": "spotted salamander, Ambystoma maculatum",
+      "29": "axolotl, mud puppy, Ambystoma mexicanum",
+      "30": "bullfrog, Rana catesbeiana",
+      "31": "tree frog, tree-frog",
+      "32": "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui",
+      "33": "loggerhead, loggerhead turtle, Caretta caretta",
+      "34": "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea",
+      "35": "mud turtle",
+      "36": "terrapin",
+      "37": "box turtle, box tortoise",
+      "38": "banded gecko",
+      "39": "common iguana, iguana, Iguana iguana",
+      "40": "American chameleon, anole, Anolis carolinensis",
+      "41": "whiptail, whiptail lizard",
+      "42": "agama",
+      "43": "frilled lizard, Chlamydosaurus kingi",
+      "44": "alligator lizard",
+      "45": "Gila monster, Heloderma suspectum",
+      "46": "green lizard, Lacerta viridis",
+      "47": "African chameleon, Chamaeleo chamaeleon",
+      "48": "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis",
+      "49": "African crocodile, Nile crocodile, Crocodylus niloticus",
+      "50": "American alligator, Alligator mississipiensis",
+      "51": "triceratops",
+      "52": "thunder snake, worm snake, Carphophis amoenus",
+      "53": "ringneck snake, ring-necked snake, ring snake",
+      "54": "hognose snake, puff adder, sand viper",
+      "55": "green snake, grass snake",
+      "56": "king snake, kingsnake",
+      "57": "garter snake, grass snake",
+      "58": "water snake",
+      "59": "vine snake",
+      "60": "night snake, Hypsiglena torquata",
+      "61": "boa constrictor, Constrictor constrictor",
+      "62": "rock python, rock snake, Python sebae",
+      "63": "Indian cobra, Naja naja",
+      "64": "green mamba",
+      "65": "sea snake",
+      "66": "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus",
+      "67": "diamondback, diamondback rattlesnake, Crotalus adamanteus",
+      "68": "sidewinder, horned rattlesnake, Crotalus cerastes",
+      "69": "trilobite",
+      "70": "harvestman, daddy longlegs, Phalangium opilio",
+      "71": "scorpion",
+      "72": "black and gold garden spider, Argiope aurantia",
+      "73": "barn spider, Araneus cavaticus",
+      "74": "garden spider, Aranea diademata",
+      "75": "black widow, Latrodectus mactans",
+      "76": "tarantula",
+      "77": "wolf spider, hunting spider",
+      "78": "tick",
+      "79": "centipede",
+      "80": "black grouse",
+      "81": "ptarmigan",
+      "82": "ruffed grouse, partridge, Bonasa umbellus",
+      "83": "prairie chicken, prairie grouse, prairie fowl",
+      "84": "peacock",
+      "85": "quail",
+      "86": "partridge",
+      "87": "African grey, African gray, Psittacus erithacus",
+      "88": "macaw",
+      "89": "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita",
+      "90": "lorikeet",
+      "91": "coucal",
+      "92": "bee eater",
+      "93": "hornbill",
+      "94": "hummingbird",
+      "95": "jacamar",
+      "96": "toucan",
+      "97": "drake",
+      "98": "red-breasted merganser, Mergus serrator",
+      "99": "goose",
+      "100": "black swan, Cygnus atratus",
+      "101": "tusker",
+      "102": "echidna, spiny anteater, anteater",
+      "103": "platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus",
+      "104": "wallaby, brush kangaroo",
+      "105": "koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus",
+      "106": "wombat",
+      "107": "jellyfish",
+      "108": "sea anemone, anemone",
+      "109": "brain coral",
+      "110": "flatworm, platyhelminth",
+      "111": "nematode, nematode worm, roundworm",
+      "112": "conch",
+      "113": "snail",
+      "114": "slug",
+      "115": "sea slug, nudibranch",
+      "116": "chiton, coat-of-mail shell, sea cradle, polyplacophore",
+      "117": "chambered nautilus, pearly nautilus, nautilus",
+      "118": "Dungeness crab, Cancer magister",
+      "119": "rock crab, Cancer irroratus",
+      "120": "fiddler crab",
+      "121": "king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica",
+      "122": "American lobster, Northern lobster, Maine lobster, Homarus americanus",
+      "123": "spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish",
+      "124": "crayfish, crawfish, crawdad, crawdaddy",
+      "125": "hermit crab",
+      "126": "isopod",
+      "127": "white stork, Ciconia ciconia",
+      "128": "black stork, Ciconia nigra",
+      "129": "spoonbill",
+      "130": "flamingo",
+      "131": "little blue heron, Egretta caerulea",
+      "132": "American egret, great white heron, Egretta albus",
+      "133": "bittern",
+      "134": "crane",
+      "135": "limpkin, Aramus pictus",
+      "136": "European gallinule, Porphyrio porphyrio",
+      "137": "American coot, marsh hen, mud hen, water hen, Fulica americana",
+      "138": "bustard",
+      "139": "ruddy turnstone, Arenaria interpres",
+      "140": "red-backed sandpiper, dunlin, Erolia alpina",
+      "141": "redshank, Tringa totanus",
+      "142": "dowitcher",
+      "143": "oystercatcher, oyster catcher",
+      "144": "pelican",
+      "145": "king penguin, Aptenodytes patagonica",
+      "146": "albatross, mollymawk",
+      "147": "grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus",
+      "148": "killer whale, killer, orca, grampus, sea wolf, Orcinus orca",
+      "149": "dugong, Dugong dugon",
+      "150": "sea lion",
+      "151": "Chihuahua",
+      "152": "Japanese spaniel",
+      "153": "Maltese dog, Maltese terrier, Maltese",
+      "154": "Pekinese, Pekingese, Peke",
+      "155": "Shih-Tzu",
+      "156": "Blenheim spaniel",
+      "157": "papillon",
+      "158": "toy terrier",
+      "159": "Rhodesian ridgeback",
+      "160": "Afghan hound, Afghan",
+      "161": "basset, basset hound",
+      "162": "beagle",
+      "163": "bloodhound, sleuthhound",
+      "164": "bluetick",
+      "165": "black-and-tan coonhound",
+      "166": "Walker hound, Walker foxhound",
+      "167": "English foxhound",
+      "168": "redbone",
+      "169": "borzoi, Russian wolfhound",
+      "170": "Irish wolfhound",
+      "171": "Italian greyhound",
+      "172": "whippet",
+      "173": "Ibizan hound, Ibizan Podenco",
+      "174": "Norwegian elkhound, elkhound",
+      "175": "otterhound, otter hound",
+      "176": "Saluki, gazelle hound",
+      "177": "Scottish deerhound, deerhound",
+      "178": "Weimaraner",
+      "179": "Staffordshire bullterrier, Staffordshire bull terrier",
+      "180": "American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier",
+      "181": "Bedlington terrier",
+      "182": "Border terrier",
+      "183": "Kerry blue terrier",
+      "184": "Irish terrier",
+      "185": "Norfolk terrier",
+      "186": "Norwich terrier",
+      "187": "Yorkshire terrier",
+      "188": "wire-haired fox terrier",
+      "189": "Lakeland terrier",
+      "190": "Sealyham terrier, Sealyham",
+      "191": "Airedale, Airedale terrier",
+      "192": "cairn, cairn terrier",
+      "193": "Australian terrier",
+      "194": "Dandie Dinmont, Dandie Dinmont terrier",
+      "195": "Boston bull, Boston terrier",
+      "196": "miniature schnauzer",
+      "197": "giant schnauzer",
+      "198": "standard schnauzer",
+      "199": "Scotch terrier, Scottish terrier, Scottie",
+      "200": "Tibetan terrier, chrysanthemum dog",
+      "201": "silky terrier, Sydney silky",
+      "202": "soft-coated wheaten terrier",
+      "203": "West Highland white terrier",
+      "204": "Lhasa, Lhasa apso",
+      "205": "flat-coated retriever",
+      "206": "curly-coated retriever",
+      "207": "golden retriever",
+      "208": "Labrador retriever",
+      "209": "Chesapeake Bay retriever",
+      "210": "German short-haired pointer",
+      "211": "vizsla, Hungarian pointer",
+      "212": "English setter",
+      "213": "Irish setter, red setter",
+      "214": "Gordon setter",
+      "215": "Brittany spaniel",
+      "216": "clumber, clumber spaniel",
+      "217": "English springer, English springer spaniel",
+      "218": "Welsh springer spaniel",
+      "219": "cocker spaniel, English cocker spaniel, cocker",
+      "220": "Sussex spaniel",
+      "221": "Irish water spaniel",
+      "222": "kuvasz",
+      "223": "schipperke",
+      "224": "groenendael",
+      "225": "malinois",
+      "226": "briard",
+      "227": "kelpie",
+      "228": "komondor",
+      "229": "Old English sheepdog, bobtail",
+      "230": "Shetland sheepdog, Shetland sheep dog, Shetland",
+      "231": "collie",
+      "232": "Border collie",
+      "233": "Bouvier des Flandres, Bouviers des Flandres",
+      "234": "Rottweiler",
+      "235": "German shepherd, German shepherd dog, German police dog, alsatian",
+      "236": "Doberman, Doberman pinscher",
+      "237": "miniature pinscher",
+      "238": "Greater Swiss Mountain dog",
+      "239": "Bernese mountain dog",
+      "240": "Appenzeller",
+      "241": "EntleBucher",
+      "242": "boxer",
+      "243": "bull mastiff",
+      "244": "Tibetan mastiff",
+      "245": "French bulldog",
+      "246": "Great Dane",
+      "247": "Saint Bernard, St Bernard",
+      "248": "Eskimo dog, husky",
+      "249": "malamute, malemute, Alaskan malamute",
+      "250": "Siberian husky",
+      "251": "dalmatian, coach dog, carriage dog",
+      "252": "affenpinscher, monkey pinscher, monkey dog",
+      "253": "basenji",
+      "254": "pug, pug-dog",
+      "255": "Leonberg",
+      "256": "Newfoundland, Newfoundland dog",
+      "257": "Great Pyrenees",
+      "258": "Samoyed, Samoyede",
+      "259": "Pomeranian",
+      "260": "chow, chow chow",
+      "261": "keeshond",
+      "262": "Brabancon griffon",
+      "263": "Pembroke, Pembroke Welsh corgi",
+      "264": "Cardigan, Cardigan Welsh corgi",
+      "265": "toy poodle",
+      "266": "miniature poodle",
+      "267": "standard poodle",
+      "268": "Mexican hairless",
+      "269": "timber wolf, grey wolf, gray wolf, Canis lupus",
+      "270": "white wolf, Arctic wolf, Canis lupus tundrarum",
+      "271": "red wolf, maned wolf, Canis rufus, Canis niger",
+      "272": "coyote, prairie wolf, brush wolf, Canis latrans",
+      "273": "dingo, warrigal, warragal, Canis dingo",
+      "274": "dhole, Cuon alpinus",
+      "275": "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus",
+      "276": "hyena, hyaena",
+      "277": "red fox, Vulpes vulpes",
+      "278": "kit fox, Vulpes macrotis",
+      "279": "Arctic fox, white fox, Alopex lagopus",
+      "280": "grey fox, gray fox, Urocyon cinereoargenteus",
+      "281": "tabby, tabby cat",
+      "282": "tiger cat",
+      "283": "Persian cat",
+      "284": "Siamese cat, Siamese",
+      "285": "Egyptian cat",
+      "286": "cougar, puma, catamount, mountain lion, painter, panther, Felis concolor",
+      "287": "lynx, catamount",
+      "288": "leopard, Panthera pardus",
+      "289": "snow leopard, ounce, Panthera uncia",
+      "290": "jaguar, panther, Panthera onca, Felis onca",
+      "291": "lion, king of beasts, Panthera leo",
+      "292": "tiger, Panthera tigris",
+      "293": "cheetah, chetah, Acinonyx jubatus",
+      "294": "brown bear, bruin, Ursus arctos",
+      "295": "American black bear, black bear, Ursus americanus, Euarctos americanus",
+      "296": "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus",
+      "297": "sloth bear, Melursus ursinus, Ursus ursinus",
+      "298": "mongoose",
+      "299": "meerkat, mierkat",
+      "300": "tiger beetle",
+      "301": "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle",
+      "302": "ground beetle, carabid beetle",
+      "303": "long-horned beetle, longicorn, longicorn beetle",
+      "304": "leaf beetle, chrysomelid",
+      "305": "dung beetle",
+      "306": "rhinoceros beetle",
+      "307": "weevil",
+      "308": "fly",
+      "309": "bee",
+      "310": "ant, emmet, pismire",
+      "311": "grasshopper, hopper",
+      "312": "cricket",
+      "313": "walking stick, walkingstick, stick insect",
+      "314": "cockroach, roach",
+      "315": "mantis, mantid",
+      "316": "cicada, cicala",
+      "317": "leafhopper",
+      "318": "lacewing, lacewing fly",
+      "319": "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk",
+      "320": "damselfly",
+      "321": "admiral",
+      "322": "ringlet, ringlet butterfly",
+      "323": "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus",
+      "324": "cabbage butterfly",
+      "325": "sulphur butterfly, sulfur butterfly",
+      "326": "lycaenid, lycaenid butterfly",
+      "327": "starfish, sea star",
+      "328": "sea urchin",
+      "329": "sea cucumber, holothurian",
+      "330": "wood rabbit, cottontail, cottontail rabbit",
+      "331": "hare",
+      "332": "Angora, Angora rabbit",
+      "333": "hamster",
+      "334": "porcupine, hedgehog",
+      "335": "fox squirrel, eastern fox squirrel, Sciurus niger",
+      "336": "marmot",
+      "337": "beaver",
+      "338": "guinea pig, Cavia cobaya",
+      "339": "sorrel",
+      "340": "zebra",
+      "341": "hog, pig, grunter, squealer, Sus scrofa",
+      "342": "wild boar, boar, Sus scrofa",
+      "343": "warthog",
+      "344": "hippopotamus, hippo, river horse, Hippopotamus amphibius",
+      "345": "ox",
+      "346": "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis",
+      "347": "bison",
+      "348": "ram, tup",
+      "349": "bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis",
+      "350": "ibex, Capra ibex",
+      "351": "hartebeest",
+      "352": "impala, Aepyceros melampus",
+      "353": "gazelle",
+      "354": "Arabian camel, dromedary, Camelus dromedarius",
+      "355": "llama",
+      "356": "weasel",
+      "357": "mink",
+      "358": "polecat, fitch, foulmart, foumart, Mustela putorius",
+      "359": "black-footed ferret, ferret, Mustela nigripes",
+      "360": "otter",
+      "361": "skunk, polecat, wood pussy",
+      "362": "badger",
+      "363": "armadillo",
+      "364": "three-toed sloth, ai, Bradypus tridactylus",
+      "365": "orangutan, orang, orangutang, Pongo pygmaeus",
+      "366": "gorilla, Gorilla gorilla",
+      "367": "chimpanzee, chimp, Pan troglodytes",
+      "368": "gibbon, Hylobates lar",
+      "369": "siamang, Hylobates syndactylus, Symphalangus syndactylus",
+      "370": "guenon, guenon monkey",
+      "371": "patas, hussar monkey, Erythrocebus patas",
+      "372": "baboon",
+      "373": "macaque",
+      "374": "langur",
+      "375": "colobus, colobus monkey",
+      "376": "proboscis monkey, Nasalis larvatus",
+      "377": "marmoset",
+      "378": "capuchin, ringtail, Cebus capucinus",
+      "379": "howler monkey, howler",
+      "380": "titi, titi monkey",
+      "381": "spider monkey, Ateles geoffroyi",
+      "382": "squirrel monkey, Saimiri sciureus",
+      "383": "Madagascar cat, ring-tailed lemur, Lemur catta",
+      "384": "indri, indris, Indri indri, Indri brevicaudatus",
+      "385": "Indian elephant, Elephas maximus",
+      "386": "African elephant, Loxodonta africana",
+      "387": "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens",
+      "388": "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca",
+      "389": "barracouta, snoek",
+      "390": "eel",
+      "391": "coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch",
+      "392": "rock beauty, Holocanthus tricolor",
+      "393": "anemone fish",
+      "394": "sturgeon",
+      "395": "gar, garfish, garpike, billfish, Lepisosteus osseus",
+      "396": "lionfish",
+      "397": "puffer, pufferfish, blowfish, globefish",
+      "398": "abacus",
+      "399": "abaya",
+      "400": "academic gown, academic robe, judge's robe",
+      "401": "accordion, piano accordion, squeeze box",
+      "402": "acoustic guitar",
+      "403": "aircraft carrier, carrier, flattop, attack aircraft carrier",
+      "404": "airliner",
+      "405": "airship, dirigible",
+      "406": "altar",
+      "407": "ambulance",
+      "408": "amphibian, amphibious vehicle",
+      "409": "analog clock",
+      "410": "apiary, bee house",
+      "411": "apron",
+      "412": "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin",
+      "413": "assault rifle, assault gun",
+      "414": "backpack, back pack, knapsack, packsack, rucksack, haversack",
+      "415": "bakery, bakeshop, bakehouse",
+      "416": "balance beam, beam",
+      "417": "balloon",
+      "418": "ballpoint, ballpoint pen, ballpen, Biro",
+      "419": "Band Aid",
+      "420": "banjo",
+      "421": "bannister, banister, balustrade, balusters, handrail",
+      "422": "barbell",
+      "423": "barber chair",
+      "424": "barbershop",
+      "425": "barn",
+      "426": "barometer",
+      "427": "barrel, cask",
+      "428": "barrow, garden cart, lawn cart, wheelbarrow",
+      "429": "baseball",
+      "430": "basketball",
+      "431": "bassinet",
+      "432": "bassoon",
+      "433": "bathing cap, swimming cap",
+      "434": "bath towel",
+      "435": "bathtub, bathing tub, bath, tub",
+      "436": "beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon",
+      "437": "beacon, lighthouse, beacon light, pharos",
+      "438": "beaker",
+      "439": "bearskin, busby, shako",
+      "440": "beer bottle",
+      "441": "beer glass",
+      "442": "bell cote, bell cot",
+      "443": "bib",
+      "444": "bicycle-built-for-two, tandem bicycle, tandem",
+      "445": "bikini, two-piece",
+      "446": "binder, ring-binder",
+      "447": "binoculars, field glasses, opera glasses",
+      "448": "birdhouse",
+      "449": "boathouse",
+      "450": "bobsled, bobsleigh, bob",
+      "451": "bolo tie, bolo, bola tie, bola",
+      "452": "bonnet, poke bonnet",
+      "453": "bookcase",
+      "454": "bookshop, bookstore, bookstall",
+      "455": "bottlecap",
+      "456": "bow",
+      "457": "bow tie, bow-tie, bowtie",
+      "458": "brass, memorial tablet, plaque",
+      "459": "brassiere, bra, bandeau",
+      "460": "breakwater, groin, groyne, mole, bulwark, seawall, jetty",
+      "461": "breastplate, aegis, egis",
+      "462": "broom",
+      "463": "bucket, pail",
+      "464": "buckle",
+      "465": "bulletproof vest",
+      "466": "bullet train, bullet",
+      "467": "butcher shop, meat market",
+      "468": "cab, hack, taxi, taxicab",
+      "469": "caldron, cauldron",
+      "470": "candle, taper, wax light",
+      "471": "cannon",
+      "472": "canoe",
+      "473": "can opener, tin opener",
+      "474": "cardigan",
+      "475": "car mirror",
+      "476": "carousel, carrousel, merry-go-round, roundabout, whirligig",
+      "477": "carpenter's kit, tool kit",
+      "478": "carton",
+      "479": "car wheel",
+      "480": "cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM",
+      "481": "cassette",
+      "482": "cassette player",
+      "483": "castle",
+      "484": "catamaran",
+      "485": "CD player",
+      "486": "cello, violoncello",
+      "487": "cellular telephone, cellular phone, cellphone, cell, mobile phone",
+      "488": "chain",
+      "489": "chainlink fence",
+      "490": "chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour",
+      "491": "chain saw, chainsaw",
+      "492": "chest",
+      "493": "chiffonier, commode",
+      "494": "chime, bell, gong",
+      "495": "china cabinet, china closet",
+      "496": "Christmas stocking",
+      "497": "church, church building",
+      "498": "cinema, movie theater, movie theatre, movie house, picture palace",
+      "499": "cleaver, meat cleaver, chopper",
+      "500": "cliff dwelling",
+      "501": "cloak",
+      "502": "clog, geta, patten, sabot",
+      "503": "cocktail shaker",
+      "504": "coffee mug",
+      "505": "coffeepot",
+      "506": "coil, spiral, volute, whorl, helix",
+      "507": "combination lock",
+      "508": "computer keyboard, keypad",
+      "509": "confectionery, confectionary, candy store",
+      "510": "container ship, containership, container vessel",
+      "511": "convertible",
+      "512": "corkscrew, bottle screw",
+      "513": "cornet, horn, trumpet, trump",
+      "514": "cowboy boot",
+      "515": "cowboy hat, ten-gallon hat",
+      "516": "cradle",
+      "517": "crane",
+      "518": "crash helmet",
+      "519": "crate",
+      "520": "crib, cot",
+      "521": "Crock Pot",
+      "522": "croquet ball",
+      "523": "crutch",
+      "524": "cuirass",
+      "525": "dam, dike, dyke",
+      "526": "desk",
+      "527": "desktop computer",
+      "528": "dial telephone, dial phone",
+      "529": "diaper, nappy, napkin",
+      "530": "digital clock",
+      "531": "digital watch",
+      "532": "dining table, board",
+      "533": "dishrag, dishcloth",
+      "534": "dishwasher, dish washer, dishwashing machine",
+      "535": "disk brake, disc brake",
+      "536": "dock, dockage, docking facility",
+      "537": "dogsled, dog sled, dog sleigh",
+      "538": "dome",
+      "539": "doormat, welcome mat",
+      "540": "drilling platform, offshore rig",
+      "541": "drum, membranophone, tympan",
+      "542": "drumstick",
+      "543": "dumbbell",
+      "544": "Dutch oven",
+      "545": "electric fan, blower",
+      "546": "electric guitar",
+      "547": "electric locomotive",
+      "548": "entertainment center",
+      "549": "envelope",
+      "550": "espresso maker",
+      "551": "face powder",
+      "552": "feather boa, boa",
+      "553": "file, file cabinet, filing cabinet",
+      "554": "fireboat",
+      "555": "fire engine, fire truck",
+      "556": "fire screen, fireguard",
+      "557": "flagpole, flagstaff",
+      "558": "flute, transverse flute",
+      "559": "folding chair",
+      "560": "football helmet",
+      "561": "forklift",
+      "562": "fountain",
+      "563": "fountain pen",
+      "564": "four-poster",
+      "565": "freight car",
+      "566": "French horn, horn",
+      "567": "frying pan, frypan, skillet",
+      "568": "fur coat",
+      "569": "garbage truck, dustcart",
+      "570": "gasmask, respirator, gas helmet",
+      "571": "gas pump, gasoline pump, petrol pump, island dispenser",
+      "572": "goblet",
+      "573": "go-kart",
+      "574": "golf ball",
+      "575": "golfcart, golf cart",
+      "576": "gondola",
+      "577": "gong, tam-tam",
+      "578": "gown",
+      "579": "grand piano, grand",
+      "580": "greenhouse, nursery, glasshouse",
+      "581": "grille, radiator grille",
+      "582": "grocery store, grocery, food market, market",
+      "583": "guillotine",
+      "584": "hair slide",
+      "585": "hair spray",
+      "586": "half track",
+      "587": "hammer",
+      "588": "hamper",
+      "589": "hand blower, blow dryer, blow drier, hair dryer, hair drier",
+      "590": "hand-held computer, hand-held microcomputer",
+      "591": "handkerchief, hankie, hanky, hankey",
+      "592": "hard disc, hard disk, fixed disk",
+      "593": "harmonica, mouth organ, harp, mouth harp",
+      "594": "harp",
+      "595": "harvester, reaper",
+      "596": "hatchet",
+      "597": "holster",
+      "598": "home theater, home theatre",
+      "599": "honeycomb",
+      "600": "hook, claw",
+      "601": "hoopskirt, crinoline",
+      "602": "horizontal bar, high bar",
+      "603": "horse cart, horse-cart",
+      "604": "hourglass",
+      "605": "iPod",
+      "606": "iron, smoothing iron",
+      "607": "jack-o'-lantern",
+      "608": "jean, blue jean, denim",
+      "609": "jeep, landrover",
+      "610": "jersey, T-shirt, tee shirt",
+      "611": "jigsaw puzzle",
+      "612": "jinrikisha, ricksha, rickshaw",
+      "613": "joystick",
+      "614": "kimono",
+      "615": "knee pad",
+      "616": "knot",
+      "617": "lab coat, laboratory coat",
+      "618": "ladle",
+      "619": "lampshade, lamp shade",
+      "620": "laptop, laptop computer",
+      "621": "lawn mower, mower",
+      "622": "lens cap, lens cover",
+      "623": "letter opener, paper knife, paperknife",
+      "624": "library",
+      "625": "lifeboat",
+      "626": "lighter, light, igniter, ignitor",
+      "627": "limousine, limo",
+      "628": "liner, ocean liner",
+      "629": "lipstick, lip rouge",
+      "630": "Loafer",
+      "631": "lotion",
+      "632": "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system",
+      "633": "loupe, jeweler's loupe",
+      "634": "lumbermill, sawmill",
+      "635": "magnetic compass",
+      "636": "mailbag, postbag",
+      "637": "mailbox, letter box",
+      "638": "maillot",
+      "639": "maillot, tank suit",
+      "640": "manhole cover",
+      "641": "maraca",
+      "642": "marimba, xylophone",
+      "643": "mask",
+      "644": "matchstick",
+      "645": "maypole",
+      "646": "maze, labyrinth",
+      "647": "measuring cup",
+      "648": "medicine chest, medicine cabinet",
+      "649": "megalith, megalithic structure",
+      "650": "microphone, mike",
+      "651": "microwave, microwave oven",
+      "652": "military uniform",
+      "653": "milk can",
+      "654": "minibus",
+      "655": "miniskirt, mini",
+      "656": "minivan",
+      "657": "missile",
+      "658": "mitten",
+      "659": "mixing bowl",
+      "660": "mobile home, manufactured home",
+      "661": "Model T",
+      "662": "modem",
+      "663": "monastery",
+      "664": "monitor",
+      "665": "moped",
+      "666": "mortar",
+      "667": "mortarboard",
+      "668": "mosque",
+      "669": "mosquito net",
+      "670": "motor scooter, scooter",
+      "671": "mountain bike, all-terrain bike, off-roader",
+      "672": "mountain tent",
+      "673": "mouse, computer mouse",
+      "674": "mousetrap",
+      "675": "moving van",
+      "676": "muzzle",
+      "677": "nail",
+      "678": "neck brace",
+      "679": "necklace",
+      "680": "nipple",
+      "681": "notebook, notebook computer",
+      "682": "obelisk",
+      "683": "oboe, hautboy, hautbois",
+      "684": "ocarina, sweet potato",
+      "685": "odometer, hodometer, mileometer, milometer",
+      "686": "oil filter",
+      "687": "organ, pipe organ",
+      "688": "oscilloscope, scope, cathode-ray oscilloscope, CRO",
+      "689": "overskirt",
+      "690": "oxcart",
+      "691": "oxygen mask",
+      "692": "packet",
+      "693": "paddle, boat paddle",
+      "694": "paddlewheel, paddle wheel",
+      "695": "padlock",
+      "696": "paintbrush",
+      "697": "pajama, pyjama, pj's, jammies",
+      "698": "palace",
+      "699": "panpipe, pandean pipe, syrinx",
+      "700": "paper towel",
+      "701": "parachute, chute",
+      "702": "parallel bars, bars",
+      "703": "park bench",
+      "704": "parking meter",
+      "705": "passenger car, coach, carriage",
+      "706": "patio, terrace",
+      "707": "pay-phone, pay-station",
+      "708": "pedestal, plinth, footstall",
+      "709": "pencil box, pencil case",
+      "710": "pencil sharpener",
+      "711": "perfume, essence",
+      "712": "Petri dish",
+      "713": "photocopier",
+      "714": "pick, plectrum, plectron",
+      "715": "pickelhaube",
+      "716": "picket fence, paling",
+      "717": "pickup, pickup truck",
+      "718": "pier",
+      "719": "piggy bank, penny bank",
+      "720": "pill bottle",
+      "721": "pillow",
+      "722": "ping-pong ball",
+      "723": "pinwheel",
+      "724": "pirate, pirate ship",
+      "725": "pitcher, ewer",
+      "726": "plane, carpenter's plane, woodworking plane",
+      "727": "planetarium",
+      "728": "plastic bag",
+      "729": "plate rack",
+      "730": "plow, plough",
+      "731": "plunger, plumber's helper",
+      "732": "Polaroid camera, Polaroid Land camera",
+      "733": "pole",
+      "734": "police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria",
+      "735": "poncho",
+      "736": "pool table, billiard table, snooker table",
+      "737": "pop bottle, soda bottle",
+      "738": "pot, flowerpot",
+      "739": "potter's wheel",
+      "740": "power drill",
+      "741": "prayer rug, prayer mat",
+      "742": "printer",
+      "743": "prison, prison house",
+      "744": "projectile, missile",
+      "745": "projector",
+      "746": "puck, hockey puck",
+      "747": "punching bag, punch bag, punching ball, punchball",
+      "748": "purse",
+      "749": "quill, quill pen",
+      "750": "quilt, comforter, comfort, puff",
+      "751": "racer, race car, racing car",
+      "752": "racket, racquet",
+      "753": "radiator",
+      "754": "radio, wireless",
+      "755": "radio telescope, radio reflector",
+      "756": "rain barrel",
+      "757": "recreational vehicle, RV, R.V.",
+      "758": "reel",
+      "759": "reflex camera",
+      "760": "refrigerator, icebox",
+      "761": "remote control, remote",
+      "762": "restaurant, eating house, eating place, eatery",
+      "763": "revolver, six-gun, six-shooter",
+      "764": "rifle",
+      "765": "rocking chair, rocker",
+      "766": "rotisserie",
+      "767": "rubber eraser, rubber, pencil eraser",
+      "768": "rugby ball",
+      "769": "rule, ruler",
+      "770": "running shoe",
+      "771": "safe",
+      "772": "safety pin",
+      "773": "saltshaker, salt shaker",
+      "774": "sandal",
+      "775": "sarong",
+      "776": "sax, saxophone",
+      "777": "scabbard",
+      "778": "scale, weighing machine",
+      "779": "school bus",
+      "780": "schooner",
+      "781": "scoreboard",
+      "782": "screen, CRT screen",
+      "783": "screw",
+      "784": "screwdriver",
+      "785": "seat belt, seatbelt",
+      "786": "sewing machine",
+      "787": "shield, buckler",
+      "788": "shoe shop, shoe-shop, shoe store",
+      "789": "shoji",
+      "790": "shopping basket",
+      "791": "shopping cart",
+      "792": "shovel",
+      "793": "shower cap",
+      "794": "shower curtain",
+      "795": "ski",
+      "796": "ski mask",
+      "797": "sleeping bag",
+      "798": "slide rule, slipstick",
+      "799": "sliding door",
+      "800": "slot, one-armed bandit",
+      "801": "snorkel",
+      "802": "snowmobile",
+      "803": "snowplow, snowplough",
+      "804": "soap dispenser",
+      "805": "soccer ball",
+      "806": "sock",
+      "807": "solar dish, solar collector, solar furnace",
+      "808": "sombrero",
+      "809": "soup bowl",
+      "810": "space bar",
+      "811": "space heater",
+      "812": "space shuttle",
+      "813": "spatula",
+      "814": "speedboat",
+      "815": "spider web, spider's web",
+      "816": "spindle",
+      "817": "sports car, sport car",
+      "818": "spotlight, spot",
+      "819": "stage",
+      "820": "steam locomotive",
+      "821": "steel arch bridge",
+      "822": "steel drum",
+      "823": "stethoscope",
+      "824": "stole",
+      "825": "stone wall",
+      "826": "stopwatch, stop watch",
+      "827": "stove",
+      "828": "strainer",
+      "829": "streetcar, tram, tramcar, trolley, trolley car",
+      "830": "stretcher",
+      "831": "studio couch, day bed",
+      "832": "stupa, tope",
+      "833": "submarine, pigboat, sub, U-boat",
+      "834": "suit, suit of clothes",
+      "835": "sundial",
+      "836": "sunglass",
+      "837": "sunglasses, dark glasses, shades",
+      "838": "sunscreen, sunblock, sun blocker",
+      "839": "suspension bridge",
+      "840": "swab, swob, mop",
+      "841": "sweatshirt",
+      "842": "swimming trunks, bathing trunks",
+      "843": "swing",
+      "844": "switch, electric switch, electrical switch",
+      "845": "syringe",
+      "846": "table lamp",
+      "847": "tank, army tank, armored combat vehicle, armoured combat vehicle",
+      "848": "tape player",
+      "849": "teapot",
+      "850": "teddy, teddy bear",
+      "851": "television, television system",
+      "852": "tennis ball",
+      "853": "thatch, thatched roof",
+      "854": "theater curtain, theatre curtain",
+      "855": "thimble",
+      "856": "thresher, thrasher, threshing machine",
+      "857": "throne",
+      "858": "tile roof",
+      "859": "toaster",
+      "860": "tobacco shop, tobacconist shop, tobacconist",
+      "861": "toilet seat",
+      "862": "torch",
+      "863": "totem pole",
+      "864": "tow truck, tow car, wrecker",
+      "865": "toyshop",
+      "866": "tractor",
+      "867": "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi",
+      "868": "tray",
+      "869": "trench coat",
+      "870": "tricycle, trike, velocipede",
+      "871": "trimaran",
+      "872": "tripod",
+      "873": "triumphal arch",
+      "874": "trolleybus, trolley coach, trackless trolley",
+      "875": "trombone",
+      "876": "tub, vat",
+      "877": "turnstile",
+      "878": "typewriter keyboard",
+      "879": "umbrella",
+      "880": "unicycle, monocycle",
+      "881": "upright, upright piano",
+      "882": "vacuum, vacuum cleaner",
+      "883": "vase",
+      "884": "vault",
+      "885": "velvet",
+      "886": "vending machine",
+      "887": "vestment",
+      "888": "viaduct",
+      "889": "violin, fiddle",
+      "890": "volleyball",
+      "891": "waffle iron",
+      "892": "wall clock",
+      "893": "wallet, billfold, notecase, pocketbook",
+      "894": "wardrobe, closet, press",
+      "895": "warplane, military plane",
+      "896": "washbasin, handbasin, washbowl, lavabo, wash-hand basin",
+      "897": "washer, automatic washer, washing machine",
+      "898": "water bottle",
+      "899": "water jug",
+      "900": "water tower",
+      "901": "whiskey jug",
+      "902": "whistle",
+      "903": "wig",
+      "904": "window screen",
+      "905": "window shade",
+      "906": "Windsor tie",
+      "907": "wine bottle",
+      "908": "wing",
+      "909": "wok",
+      "910": "wooden spoon",
+      "911": "wool, woolen, woollen",
+      "912": "worm fence, snake fence, snake-rail fence, Virginia fence",
+      "913": "wreck",
+      "914": "yawl",
+      "915": "yurt",
+      "916": "web site, website, internet site, site",
+      "917": "comic book",
+      "918": "crossword puzzle, crossword",
+      "919": "street sign",
+      "920": "traffic light, traffic signal, stoplight",
+      "921": "book jacket, dust cover, dust jacket, dust wrapper",
+      "922": "menu",
+      "923": "plate",
+      "924": "guacamole",
+      "925": "consomme",
+      "926": "hot pot, hotpot",
+      "927": "trifle",
+      "928": "ice cream, icecream",
+      "929": "ice lolly, lolly, lollipop, popsicle",
+      "930": "French loaf",
+      "931": "bagel, beigel",
+      "932": "pretzel",
+      "933": "cheeseburger",
+      "934": "hotdog, hot dog, red hot",
+      "935": "mashed potato",
+      "936": "head cabbage",
+      "937": "broccoli",
+      "938": "cauliflower",
+      "939": "zucchini, courgette",
+      "940": "spaghetti squash",
+      "941": "acorn squash",
+      "942": "butternut squash",
+      "943": "cucumber, cuke",
+      "944": "artichoke, globe artichoke",
+      "945": "bell pepper",
+      "946": "cardoon",
+      "947": "mushroom",
+      "948": "Granny Smith",
+      "949": "strawberry",
+      "950": "orange",
+      "951": "lemon",
+      "952": "fig",
+      "953": "pineapple, ananas",
+      "954": "banana",
+      "955": "jackfruit, jak, jack",
+      "956": "custard apple",
+      "957": "pomegranate",
+      "958": "hay",
+      "959": "carbonara",
+      "960": "chocolate sauce, chocolate syrup",
+      "961": "dough",
+      "962": "meat loaf, meatloaf",
+      "963": "pizza, pizza pie",
+      "964": "potpie",
+      "965": "burrito",
+      "966": "red wine",
+      "967": "espresso",
+      "968": "cup",
+      "969": "eggnog",
+      "970": "alp",
+      "971": "bubble",
+      "972": "cliff, drop, drop-off",
+      "973": "coral reef",
+      "974": "geyser",
+      "975": "lakeside, lakeshore",
+      "976": "promontory, headland, head, foreland",
+      "977": "sandbar, sand bar",
+      "978": "seashore, coast, seacoast, sea-coast",
+      "979": "valley, vale",
+      "980": "volcano",
+      "981": "ballplayer, baseball player",
+      "982": "groom, bridegroom",
+      "983": "scuba diver",
+      "984": "rapeseed",
+      "985": "daisy",
+      "986": "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum",
+      "987": "corn",
+      "988": "acorn",
+      "989": "hip, rose hip, rosehip",
+      "990": "buckeye, horse chestnut, conker",
+      "991": "coral fungus",
+      "992": "agaric",
+      "993": "gyromitra",
+      "994": "stinkhorn, carrion fungus",
+      "995": "earthstar",
+      "996": "hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa",
+      "997": "bolete",
+      "998": "ear, spike, capitulum",
+      "999": "toilet tissue, toilet paper, bathroom tissue"
+    },
+    "image_size": 224,
+    "initializer_range": 0.02,
+    "is_decoder": false,
+    "is_encoder_decoder": false,
+    "label2id": {
+      "Afghan hound, Afghan": 160,
+      "African chameleon, Chamaeleo chamaeleon": 47,
+      "African crocodile, Nile crocodile, Crocodylus niloticus": 49,
+      "African elephant, Loxodonta africana": 386,
+      "African grey, African gray, Psittacus erithacus": 87,
+      "African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus": 275,
+      "Airedale, Airedale terrier": 191,
+      "American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier": 180,
+      "American alligator, Alligator mississipiensis": 50,
+      "American black bear, black bear, Ursus americanus, Euarctos americanus": 295,
+      "American chameleon, anole, Anolis carolinensis": 40,
+      "American coot, marsh hen, mud hen, water hen, Fulica americana": 137,
+      "American egret, great white heron, Egretta albus": 132,
+      "American lobster, Northern lobster, Maine lobster, Homarus americanus": 122,
+      "Angora, Angora rabbit": 332,
+      "Appenzeller": 240,
+      "Arabian camel, dromedary, Camelus dromedarius": 354,
+      "Arctic fox, white fox, Alopex lagopus": 279,
+      "Australian terrier": 193,
+      "Band Aid": 419,
+      "Bedlington terrier": 181,
+      "Bernese mountain dog": 239,
+      "Blenheim spaniel": 156,
+      "Border collie": 232,
+      "Border terrier": 182,
+      "Boston bull, Boston terrier": 195,
+      "Bouvier des Flandres, Bouviers des Flandres": 233,
+      "Brabancon griffon": 262,
+      "Brittany spaniel": 215,
+      "CD player": 485,
+      "Cardigan, Cardigan Welsh corgi": 264,
+      "Chesapeake Bay retriever": 209,
+      "Chihuahua": 151,
+      "Christmas stocking": 496,
+      "Crock Pot": 521,
+      "Dandie Dinmont, Dandie Dinmont terrier": 194,
+      "Doberman, Doberman pinscher": 236,
+      "Dungeness crab, Cancer magister": 118,
+      "Dutch oven": 544,
+      "Egyptian cat": 285,
+      "English foxhound": 167,
+      "English setter": 212,
+      "English springer, English springer spaniel": 217,
+      "EntleBucher": 241,
+      "Eskimo dog, husky": 248,
+      "European fire salamander, Salamandra salamandra": 25,
+      "European gallinule, Porphyrio porphyrio": 136,
+      "French bulldog": 245,
+      "French horn, horn": 566,
+      "French loaf": 930,
+      "German shepherd, German shepherd dog, German police dog, alsatian": 235,
+      "German short-haired pointer": 210,
+      "Gila monster, Heloderma suspectum": 45,
+      "Gordon setter": 214,
+      "Granny Smith": 948,
+      "Great Dane": 246,
+      "Great Pyrenees": 257,
+      "Greater Swiss Mountain dog": 238,
+      "Ibizan hound, Ibizan Podenco": 173,
+      "Indian cobra, Naja naja": 63,
+      "Indian elephant, Elephas maximus": 385,
+      "Irish setter, red setter": 213,
+      "Irish terrier": 184,
+      "Irish water spaniel": 221,
+      "Irish wolfhound": 170,
+      "Italian greyhound": 171,
+      "Japanese spaniel": 152,
+      "Kerry blue terrier": 183,
+      "Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis": 48,
+      "Labrador retriever": 208,
+      "Lakeland terrier": 189,
+      "Leonberg": 255,
+      "Lhasa, Lhasa apso": 204,
+      "Loafer": 630,
+      "Madagascar cat, ring-tailed lemur, Lemur catta": 383,
+      "Maltese dog, Maltese terrier, Maltese": 153,
+      "Mexican hairless": 268,
+      "Model T": 661,
+      "Newfoundland, Newfoundland dog": 256,
+      "Norfolk terrier": 185,
+      "Norwegian elkhound, elkhound": 174,
+      "Norwich terrier": 186,
+      "Old English sheepdog, bobtail": 229,
+      "Pekinese, Pekingese, Peke": 154,
+      "Pembroke, Pembroke Welsh corgi": 263,
+      "Persian cat": 283,
+      "Petri dish": 712,
+      "Polaroid camera, Polaroid Land camera": 732,
+      "Pomeranian": 259,
+      "Rhodesian ridgeback": 159,
+      "Rottweiler": 234,
+      "Saint Bernard, St Bernard": 247,
+      "Saluki, gazelle hound": 176,
+      "Samoyed, Samoyede": 258,
+      "Scotch terrier, Scottish terrier, Scottie": 199,
+      "Scottish deerhound, deerhound": 177,
+      "Sealyham terrier, Sealyham": 190,
+      "Shetland sheepdog, Shetland sheep dog, Shetland": 230,
+      "Shih-Tzu": 155,
+      "Siamese cat, Siamese": 284,
+      "Siberian husky": 250,
+      "Staffordshire bullterrier, Staffordshire bull terrier": 179,
+      "Sussex spaniel": 220,
+      "Tibetan mastiff": 244,
+      "Tibetan terrier, chrysanthemum dog": 200,
+      "Walker hound, Walker foxhound": 166,
+      "Weimaraner": 178,
+      "Welsh springer spaniel": 218,
+      "West Highland white terrier": 203,
+      "Windsor tie": 906,
+      "Yorkshire terrier": 187,
+      "abacus": 398,
+      "abaya": 399,
+      "academic gown, academic robe, judge's robe": 400,
+      "accordion, piano accordion, squeeze box": 401,
+      "acorn": 988,
+      "acorn squash": 941,
+      "acoustic guitar": 402,
+      "admiral": 321,
+      "affenpinscher, monkey pinscher, monkey dog": 252,
+      "agama": 42,
+      "agaric": 992,
+      "aircraft carrier, carrier, flattop, attack aircraft carrier": 403,
+      "airliner": 404,
+      "airship, dirigible": 405,
+      "albatross, mollymawk": 146,
+      "alligator lizard": 44,
+      "alp": 970,
+      "altar": 406,
+      "ambulance": 407,
+      "amphibian, amphibious vehicle": 408,
+      "analog clock": 409,
+      "anemone fish": 393,
+      "ant, emmet, pismire": 310,
+      "apiary, bee house": 410,
+      "apron": 411,
+      "armadillo": 363,
+      "artichoke, globe artichoke": 944,
+      "ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin": 412,
+      "assault rifle, assault gun": 413,
+      "axolotl, mud puppy, Ambystoma mexicanum": 29,
+      "baboon": 372,
+      "backpack, back pack, knapsack, packsack, rucksack, haversack": 414,
+      "badger": 362,
+      "bagel, beigel": 931,
+      "bakery, bakeshop, bakehouse": 415,
+      "balance beam, beam": 416,
+      "bald eagle, American eagle, Haliaeetus leucocephalus": 22,
+      "balloon": 417,
+      "ballplayer, baseball player": 981,
+      "ballpoint, ballpoint pen, ballpen, Biro": 418,
+      "banana": 954,
+      "banded gecko": 38,
+      "banjo": 420,
+      "bannister, banister, balustrade, balusters, handrail": 421,
+      "barbell": 422,
+      "barber chair": 423,
+      "barbershop": 424,
+      "barn": 425,
+      "barn spider, Araneus cavaticus": 73,
+      "barometer": 426,
+      "barracouta, snoek": 389,
+      "barrel, cask": 427,
+      "barrow, garden cart, lawn cart, wheelbarrow": 428,
+      "baseball": 429,
+      "basenji": 253,
+      "basketball": 430,
+      "basset, basset hound": 161,
+      "bassinet": 431,
+      "bassoon": 432,
+      "bath towel": 434,
+      "bathing cap, swimming cap": 433,
+      "bathtub, bathing tub, bath, tub": 435,
+      "beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon": 436,
+      "beacon, lighthouse, beacon light, pharos": 437,
+      "beagle": 162,
+      "beaker": 438,
+      "bearskin, busby, shako": 439,
+      "beaver": 337,
+      "bee": 309,
+      "bee eater": 92,
+      "beer bottle": 440,
+      "beer glass": 441,
+      "bell cote, bell cot": 442,
+      "bell pepper": 945,
+      "bib": 443,
+      "bicycle-built-for-two, tandem bicycle, tandem": 444,
+      "bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis": 349,
+      "bikini, two-piece": 445,
+      "binder, ring-binder": 446,
+      "binoculars, field glasses, opera glasses": 447,
+      "birdhouse": 448,
+      "bison": 347,
+      "bittern": 133,
+      "black and gold garden spider, Argiope aurantia": 72,
+      "black grouse": 80,
+      "black stork, Ciconia nigra": 128,
+      "black swan, Cygnus atratus": 100,
+      "black widow, Latrodectus mactans": 75,
+      "black-and-tan coonhound": 165,
+      "black-footed ferret, ferret, Mustela nigripes": 359,
+      "bloodhound, sleuthhound": 163,
+      "bluetick": 164,
+      "boa constrictor, Constrictor constrictor": 61,
+      "boathouse": 449,
+      "bobsled, bobsleigh, bob": 450,
+      "bolete": 997,
+      "bolo tie, bolo, bola tie, bola": 451,
+      "bonnet, poke bonnet": 452,
+      "book jacket, dust cover, dust jacket, dust wrapper": 921,
+      "bookcase": 453,
+      "bookshop, bookstore, bookstall": 454,
+      "borzoi, Russian wolfhound": 169,
+      "bottlecap": 455,
+      "bow": 456,
+      "bow tie, bow-tie, bowtie": 457,
+      "box turtle, box tortoise": 37,
+      "boxer": 242,
+      "brain coral": 109,
+      "brambling, Fringilla montifringilla": 10,
+      "brass, memorial tablet, plaque": 458,
+      "brassiere, bra, bandeau": 459,
+      "breakwater, groin, groyne, mole, bulwark, seawall, jetty": 460,
+      "breastplate, aegis, egis": 461,
+      "briard": 226,
+      "broccoli": 937,
+      "broom": 462,
+      "brown bear, bruin, Ursus arctos": 294,
+      "bubble": 971,
+      "bucket, pail": 463,
+      "buckeye, horse chestnut, conker": 990,
+      "buckle": 464,
+      "bulbul": 16,
+      "bull mastiff": 243,
+      "bullet train, bullet": 466,
+      "bulletproof vest": 465,
+      "bullfrog, Rana catesbeiana": 30,
+      "burrito": 965,
+      "bustard": 138,
+      "butcher shop, meat market": 467,
+      "butternut squash": 942,
+      "cab, hack, taxi, taxicab": 468,
+      "cabbage butterfly": 324,
+      "cairn, cairn terrier": 192,
+      "caldron, cauldron": 469,
+      "can opener, tin opener": 473,
+      "candle, taper, wax light": 470,
+      "cannon": 471,
+      "canoe": 472,
+      "capuchin, ringtail, Cebus capucinus": 378,
+      "car mirror": 475,
+      "car wheel": 479,
+      "carbonara": 959,
+      "cardigan": 474,
+      "cardoon": 946,
+      "carousel, carrousel, merry-go-round, roundabout, whirligig": 476,
+      "carpenter's kit, tool kit": 477,
+      "carton": 478,
+      "cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM": 480,
+      "cassette": 481,
+      "cassette player": 482,
+      "castle": 483,
+      "catamaran": 484,
+      "cauliflower": 938,
+      "cello, violoncello": 486,
+      "cellular telephone, cellular phone, cellphone, cell, mobile phone": 487,
+      "centipede": 79,
+      "chain": 488,
+      "chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour": 490,
+      "chain saw, chainsaw": 491,
+      "chainlink fence": 489,
+      "chambered nautilus, pearly nautilus, nautilus": 117,
+      "cheeseburger": 933,
+      "cheetah, chetah, Acinonyx jubatus": 293,
+      "chest": 492,
+      "chickadee": 19,
+      "chiffonier, commode": 493,
+      "chime, bell, gong": 494,
+      "chimpanzee, chimp, Pan troglodytes": 367,
+      "china cabinet, china closet": 495,
+      "chiton, coat-of-mail shell, sea cradle, polyplacophore": 116,
+      "chocolate sauce, chocolate syrup": 960,
+      "chow, chow chow": 260,
+      "church, church building": 497,
+      "cicada, cicala": 316,
+      "cinema, movie theater, movie theatre, movie house, picture palace": 498,
+      "cleaver, meat cleaver, chopper": 499,
+      "cliff dwelling": 500,
+      "cliff, drop, drop-off": 972,
+      "cloak": 501,
+      "clog, geta, patten, sabot": 502,
+      "clumber, clumber spaniel": 216,
+      "cock": 7,
+      "cocker spaniel, English cocker spaniel, cocker": 219,
+      "cockroach, roach": 314,
+      "cocktail shaker": 503,
+      "coffee mug": 504,
+      "coffeepot": 505,
+      "coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch": 391,
+      "coil, spiral, volute, whorl, helix": 506,
+      "collie": 231,
+      "colobus, colobus monkey": 375,
+      "combination lock": 507,
+      "comic book": 917,
+      "common iguana, iguana, Iguana iguana": 39,
+      "common newt, Triturus vulgaris": 26,
+      "computer keyboard, keypad": 508,
+      "conch": 112,
+      "confectionery, confectionary, candy store": 509,
+      "consomme": 925,
+      "container ship, containership, container vessel": 510,
+      "convertible": 511,
+      "coral fungus": 991,
+      "coral reef": 973,
+      "corkscrew, bottle screw": 512,
+      "corn": 987,
+      "cornet, horn, trumpet, trump": 513,
+      "coucal": 91,
+      "cougar, puma, catamount, mountain lion, painter, panther, Felis concolor": 286,
+      "cowboy boot": 514,
+      "cowboy hat, ten-gallon hat": 515,
+      "coyote, prairie wolf, brush wolf, Canis latrans": 272,
+      "cradle": 516,
+      "crane": 517,
+      "crash helmet": 518,
+      "crate": 519,
+      "crayfish, crawfish, crawdad, crawdaddy": 124,
+      "crib, cot": 520,
+      "cricket": 312,
+      "croquet ball": 522,
+      "crossword puzzle, crossword": 918,
+      "crutch": 523,
+      "cucumber, cuke": 943,
+      "cuirass": 524,
+      "cup": 968,
+      "curly-coated retriever": 206,
+      "custard apple": 956,
+      "daisy": 985,
+      "dalmatian, coach dog, carriage dog": 251,
+      "dam, dike, dyke": 525,
+      "damselfly": 320,
+      "desk": 526,
+      "desktop computer": 527,
+      "dhole, Cuon alpinus": 274,
+      "dial telephone, dial phone": 528,
+      "diamondback, diamondback rattlesnake, Crotalus adamanteus": 67,
+      "diaper, nappy, napkin": 529,
+      "digital clock": 530,
+      "digital watch": 531,
+      "dingo, warrigal, warragal, Canis dingo": 273,
+      "dining table, board": 532,
+      "dishrag, dishcloth": 533,
+      "dishwasher, dish washer, dishwashing machine": 534,
+      "disk brake, disc brake": 535,
+      "dock, dockage, docking facility": 536,
+      "dogsled, dog sled, dog sleigh": 537,
+      "dome": 538,
+      "doormat, welcome mat": 539,
+      "dough": 961,
+      "dowitcher": 142,
+      "dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk": 319,
+      "drake": 97,
+      "drilling platform, offshore rig": 540,
+      "drum, membranophone, tympan": 541,
+      "drumstick": 542,
+      "dugong, Dugong dugon": 149,
+      "dumbbell": 543,
+      "dung beetle": 305,
+      "ear, spike, capitulum": 998,
+      "earthstar": 995,
+      "echidna, spiny anteater, anteater": 102,
+      "eel": 390,
+      "eft": 27,
+      "eggnog": 969,
+      "electric fan, blower": 545,
+      "electric guitar": 546,
+      "electric locomotive": 547,
+      "electric ray, crampfish, numbfish, torpedo": 5,
+      "entertainment center": 548,
+      "envelope": 549,
+      "espresso": 967,
+      "espresso maker": 550,
+      "face powder": 551,
+      "feather boa, boa": 552,
+      "fiddler crab": 120,
+      "fig": 952,
+      "file, file cabinet, filing cabinet": 553,
+      "fire engine, fire truck": 555,
+      "fire screen, fireguard": 556,
+      "fireboat": 554,
+      "flagpole, flagstaff": 557,
+      "flamingo": 130,
+      "flat-coated retriever": 205,
+      "flatworm, platyhelminth": 110,
+      "flute, transverse flute": 558,
+      "fly": 308,
+      "folding chair": 559,
+      "football helmet": 560,
+      "forklift": 561,
+      "fountain": 562,
+      "fountain pen": 563,
+      "four-poster": 564,
+      "fox squirrel, eastern fox squirrel, Sciurus niger": 335,
+      "freight car": 565,
+      "frilled lizard, Chlamydosaurus kingi": 43,
+      "frying pan, frypan, skillet": 567,
+      "fur coat": 568,
+      "gar, garfish, garpike, billfish, Lepisosteus osseus": 395,
+      "garbage truck, dustcart": 569,
+      "garden spider, Aranea diademata": 74,
+      "garter snake, grass snake": 57,
+      "gas pump, gasoline pump, petrol pump, island dispenser": 571,
+      "gasmask, respirator, gas helmet": 570,
+      "gazelle": 353,
+      "geyser": 974,
+      "giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca": 388,
+      "giant schnauzer": 197,
+      "gibbon, Hylobates lar": 368,
+      "go-kart": 573,
+      "goblet": 572,
+      "golden retriever": 207,
+      "goldfinch, Carduelis carduelis": 11,
+      "goldfish, Carassius auratus": 1,
+      "golf ball": 574,
+      "golfcart, golf cart": 575,
+      "gondola": 576,
+      "gong, tam-tam": 577,
+      "goose": 99,
+      "gorilla, Gorilla gorilla": 366,
+      "gown": 578,
+      "grand piano, grand": 579,
+      "grasshopper, hopper": 311,
+      "great grey owl, great gray owl, Strix nebulosa": 24,
+      "great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias": 2,
+      "green lizard, Lacerta viridis": 46,
+      "green mamba": 64,
+      "green snake, grass snake": 55,
+      "greenhouse, nursery, glasshouse": 580,
+      "grey fox, gray fox, Urocyon cinereoargenteus": 280,
+      "grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus": 147,
+      "grille, radiator grille": 581,
+      "grocery store, grocery, food market, market": 582,
+      "groenendael": 224,
+      "groom, bridegroom": 982,
+      "ground beetle, carabid beetle": 302,
+      "guacamole": 924,
+      "guenon, guenon monkey": 370,
+      "guillotine": 583,
+      "guinea pig, Cavia cobaya": 338,
+      "gyromitra": 993,
+      "hair slide": 584,
+      "hair spray": 585,
+      "half track": 586,
+      "hammer": 587,
+      "hammerhead, hammerhead shark": 4,
+      "hamper": 588,
+      "hamster": 333,
+      "hand blower, blow dryer, blow drier, hair dryer, hair drier": 589,
+      "hand-held computer, hand-held microcomputer": 590,
+      "handkerchief, hankie, hanky, hankey": 591,
+      "hard disc, hard disk, fixed disk": 592,
+      "hare": 331,
+      "harmonica, mouth organ, harp, mouth harp": 593,
+      "harp": 594,
+      "hartebeest": 351,
+      "harvester, reaper": 595,
+      "harvestman, daddy longlegs, Phalangium opilio": 70,
+      "hatchet": 596,
+      "hay": 958,
+      "head cabbage": 936,
+      "hen": 8,
+      "hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa": 996,
+      "hermit crab": 125,
+      "hip, rose hip, rosehip": 989,
+      "hippopotamus, hippo, river horse, Hippopotamus amphibius": 344,
+      "hog, pig, grunter, squealer, Sus scrofa": 341,
+      "hognose snake, puff adder, sand viper": 54,
+      "holster": 597,
+      "home theater, home theatre": 598,
+      "honeycomb": 599,
+      "hook, claw": 600,
+      "hoopskirt, crinoline": 601,
+      "horizontal bar, high bar": 602,
+      "hornbill": 93,
+      "horned viper, cerastes, sand viper, horned asp, Cerastes cornutus": 66,
+      "horse cart, horse-cart": 603,
+      "hot pot, hotpot": 926,
+      "hotdog, hot dog, red hot": 934,
+      "hourglass": 604,
+      "house finch, linnet, Carpodacus mexicanus": 12,
+      "howler monkey, howler": 379,
+      "hummingbird": 94,
+      "hyena, hyaena": 276,
+      "iPod": 605,
+      "ibex, Capra ibex": 350,
+      "ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus": 296,
+      "ice cream, icecream": 928,
+      "ice lolly, lolly, lollipop, popsicle": 929,
+      "impala, Aepyceros melampus": 352,
+      "indigo bunting, indigo finch, indigo bird, Passerina cyanea": 14,
+      "indri, indris, Indri indri, Indri brevicaudatus": 384,
+      "iron, smoothing iron": 606,
+      "isopod": 126,
+      "jacamar": 95,
+      "jack-o'-lantern": 607,
+      "jackfruit, jak, jack": 955,
+      "jaguar, panther, Panthera onca, Felis onca": 290,
+      "jay": 17,
+      "jean, blue jean, denim": 608,
+      "jeep, landrover": 609,
+      "jellyfish": 107,
+      "jersey, T-shirt, tee shirt": 610,
+      "jigsaw puzzle": 611,
+      "jinrikisha, ricksha, rickshaw": 612,
+      "joystick": 613,
+      "junco, snowbird": 13,
+      "keeshond": 261,
+      "kelpie": 227,
+      "killer whale, killer, orca, grampus, sea wolf, Orcinus orca": 148,
+      "kimono": 614,
+      "king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica": 121,
+      "king penguin, Aptenodytes patagonica": 145,
+      "king snake, kingsnake": 56,
+      "kit fox, Vulpes macrotis": 278,
+      "kite": 21,
+      "knee pad": 615,
+      "knot": 616,
+      "koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus": 105,
+      "komondor": 228,
+      "kuvasz": 222,
+      "lab coat, laboratory coat": 617,
+      "lacewing, lacewing fly": 318,
+      "ladle": 618,
+      "ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle": 301,
+      "lakeside, lakeshore": 975,
+      "lampshade, lamp shade": 619,
+      "langur": 374,
+      "laptop, laptop computer": 620,
+      "lawn mower, mower": 621,
+      "leaf beetle, chrysomelid": 304,
+      "leafhopper": 317,
+      "leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea": 34,
+      "lemon": 951,
+      "lens cap, lens cover": 622,
+      "leopard, Panthera pardus": 288,
+      "lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens": 387,
+      "letter opener, paper knife, paperknife": 623,
+      "library": 624,
+      "lifeboat": 625,
+      "lighter, light, igniter, ignitor": 626,
+      "limousine, limo": 627,
+      "limpkin, Aramus pictus": 135,
+      "liner, ocean liner": 628,
+      "lion, king of beasts, Panthera leo": 291,
+      "lionfish": 396,
+      "lipstick, lip rouge": 629,
+      "little blue heron, Egretta caerulea": 131,
+      "llama": 355,
+      "loggerhead, loggerhead turtle, Caretta caretta": 33,
+      "long-horned beetle, longicorn, longicorn beetle": 303,
+      "lorikeet": 90,
+      "lotion": 631,
+      "loudspeaker, speaker, speaker unit, loudspeaker system, speaker system": 632,
+      "loupe, jeweler's loupe": 633,
+      "lumbermill, sawmill": 634,
+      "lycaenid, lycaenid butterfly": 326,
+      "lynx, catamount": 287,
+      "macaque": 373,
+      "macaw": 88,
+      "magnetic compass": 635,
+      "magpie": 18,
+      "mailbag, postbag": 636,
+      "mailbox, letter box": 637,
+      "maillot": 638,
+      "maillot, tank suit": 639,
+      "malamute, malemute, Alaskan malamute": 249,
+      "malinois": 225,
+      "manhole cover": 640,
+      "mantis, mantid": 315,
+      "maraca": 641,
+      "marimba, xylophone": 642,
+      "marmoset": 377,
+      "marmot": 336,
+      "mashed potato": 935,
+      "mask": 643,
+      "matchstick": 644,
+      "maypole": 645,
+      "maze, labyrinth": 646,
+      "measuring cup": 647,
+      "meat loaf, meatloaf": 962,
+      "medicine chest, medicine cabinet": 648,
+      "meerkat, mierkat": 299,
+      "megalith, megalithic structure": 649,
+      "menu": 922,
+      "microphone, mike": 650,
+      "microwave, microwave oven": 651,
+      "military uniform": 652,
+      "milk can": 653,
+      "miniature pinscher": 237,
+      "miniature poodle": 266,
+      "miniature schnauzer": 196,
+      "minibus": 654,
+      "miniskirt, mini": 655,
+      "minivan": 656,
+      "mink": 357,
+      "missile": 657,
+      "mitten": 658,
+      "mixing bowl": 659,
+      "mobile home, manufactured home": 660,
+      "modem": 662,
+      "monarch, monarch butterfly, milkweed butterfly, Danaus plexippus": 323,
+      "monastery": 663,
+      "mongoose": 298,
+      "monitor": 664,
+      "moped": 665,
+      "mortar": 666,
+      "mortarboard": 667,
+      "mosque": 668,
+      "mosquito net": 669,
+      "motor scooter, scooter": 670,
+      "mountain bike, all-terrain bike, off-roader": 671,
+      "mountain tent": 672,
+      "mouse, computer mouse": 673,
+      "mousetrap": 674,
+      "moving van": 675,
+      "mud turtle": 35,
+      "mushroom": 947,
+      "muzzle": 676,
+      "nail": 677,
+      "neck brace": 678,
+      "necklace": 679,
+      "nematode, nematode worm, roundworm": 111,
+      "night snake, Hypsiglena torquata": 60,
+      "nipple": 680,
+      "notebook, notebook computer": 681,
+      "obelisk": 682,
+      "oboe, hautboy, hautbois": 683,
+      "ocarina, sweet potato": 684,
+      "odometer, hodometer, mileometer, milometer": 685,
+      "oil filter": 686,
+      "orange": 950,
+      "orangutan, orang, orangutang, Pongo pygmaeus": 365,
+      "organ, pipe organ": 687,
+      "oscilloscope, scope, cathode-ray oscilloscope, CRO": 688,
+      "ostrich, Struthio camelus": 9,
+      "otter": 360,
+      "otterhound, otter hound": 175,
+      "overskirt": 689,
+      "ox": 345,
+      "oxcart": 690,
+      "oxygen mask": 691,
+      "oystercatcher, oyster catcher": 143,
+      "packet": 692,
+      "paddle, boat paddle": 693,
+      "paddlewheel, paddle wheel": 694,
+      "padlock": 695,
+      "paintbrush": 696,
+      "pajama, pyjama, pj's, jammies": 697,
+      "palace": 698,
+      "panpipe, pandean pipe, syrinx": 699,
+      "paper towel": 700,
+      "papillon": 157,
+      "parachute, chute": 701,
+      "parallel bars, bars": 702,
+      "park bench": 703,
+      "parking meter": 704,
+      "partridge": 86,
+      "passenger car, coach, carriage": 705,
+      "patas, hussar monkey, Erythrocebus patas": 371,
+      "patio, terrace": 706,
+      "pay-phone, pay-station": 707,
+      "peacock": 84,
+      "pedestal, plinth, footstall": 708,
+      "pelican": 144,
+      "pencil box, pencil case": 709,
+      "pencil sharpener": 710,
+      "perfume, essence": 711,
+      "photocopier": 713,
+      "pick, plectrum, plectron": 714,
+      "pickelhaube": 715,
+      "picket fence, paling": 716,
+      "pickup, pickup truck": 717,
+      "pier": 718,
+      "piggy bank, penny bank": 719,
+      "pill bottle": 720,
+      "pillow": 721,
+      "pineapple, ananas": 953,
+      "ping-pong ball": 722,
+      "pinwheel": 723,
+      "pirate, pirate ship": 724,
+      "pitcher, ewer": 725,
+      "pizza, pizza pie": 963,
+      "plane, carpenter's plane, woodworking plane": 726,
+      "planetarium": 727,
+      "plastic bag": 728,
+      "plate": 923,
+      "plate rack": 729,
+      "platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus": 103,
+      "plow, plough": 730,
+      "plunger, plumber's helper": 731,
+      "pole": 733,
+      "polecat, fitch, foulmart, foumart, Mustela putorius": 358,
+      "police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria": 734,
+      "pomegranate": 957,
+      "poncho": 735,
+      "pool table, billiard table, snooker table": 736,
+      "pop bottle, soda bottle": 737,
+      "porcupine, hedgehog": 334,
+      "pot, flowerpot": 738,
+      "potpie": 964,
+      "potter's wheel": 739,
+      "power drill": 740,
+      "prairie chicken, prairie grouse, prairie fowl": 83,
+      "prayer rug, prayer mat": 741,
+      "pretzel": 932,
+      "printer": 742,
+      "prison, prison house": 743,
+      "proboscis monkey, Nasalis larvatus": 376,
+      "projectile, missile": 744,
+      "projector": 745,
+      "promontory, headland, head, foreland": 976,
+      "ptarmigan": 81,
+      "puck, hockey puck": 746,
+      "puffer, pufferfish, blowfish, globefish": 397,
+      "pug, pug-dog": 254,
+      "punching bag, punch bag, punching ball, punchball": 747,
+      "purse": 748,
+      "quail": 85,
+      "quill, quill pen": 749,
+      "quilt, comforter, comfort, puff": 750,
+      "racer, race car, racing car": 751,
+      "racket, racquet": 752,
+      "radiator": 753,
+      "radio telescope, radio reflector": 755,
+      "radio, wireless": 754,
+      "rain barrel": 756,
+      "ram, tup": 348,
+      "rapeseed": 984,
+      "recreational vehicle, RV, R.V.": 757,
+      "red fox, Vulpes vulpes": 277,
+      "red wine": 966,
+      "red wolf, maned wolf, Canis rufus, Canis niger": 271,
+      "red-backed sandpiper, dunlin, Erolia alpina": 140,
+      "red-breasted merganser, Mergus serrator": 98,
+      "redbone": 168,
+      "redshank, Tringa totanus": 141,
+      "reel": 758,
+      "reflex camera": 759,
+      "refrigerator, icebox": 760,
+      "remote control, remote": 761,
+      "restaurant, eating house, eating place, eatery": 762,
+      "revolver, six-gun, six-shooter": 763,
+      "rhinoceros beetle": 306,
+      "rifle": 764,
+      "ringlet, ringlet butterfly": 322,
+      "ringneck snake, ring-necked snake, ring snake": 53,
+      "robin, American robin, Turdus migratorius": 15,
+      "rock beauty, Holocanthus tricolor": 392,
+      "rock crab, Cancer irroratus": 119,
+      "rock python, rock snake, Python sebae": 62,
+      "rocking chair, rocker": 765,
+      "rotisserie": 766,
+      "rubber eraser, rubber, pencil eraser": 767,
+      "ruddy turnstone, Arenaria interpres": 139,
+      "ruffed grouse, partridge, Bonasa umbellus": 82,
+      "rugby ball": 768,
+      "rule, ruler": 769,
+      "running shoe": 770,
+      "safe": 771,
+      "safety pin": 772,
+      "saltshaker, salt shaker": 773,
+      "sandal": 774,
+      "sandbar, sand bar": 977,
+      "sarong": 775,
+      "sax, saxophone": 776,
+      "scabbard": 777,
+      "scale, weighing machine": 778,
+      "schipperke": 223,
+      "school bus": 779,
+      "schooner": 780,
+      "scoreboard": 781,
+      "scorpion": 71,
+      "screen, CRT screen": 782,
+      "screw": 783,
+      "screwdriver": 784,
+      "scuba diver": 983,
+      "sea anemone, anemone": 108,
+      "sea cucumber, holothurian": 329,
+      "sea lion": 150,
+      "sea slug, nudibranch": 115,
+      "sea snake": 65,
+      "sea urchin": 328,
+      "seashore, coast, seacoast, sea-coast": 978,
+      "seat belt, seatbelt": 785,
+      "sewing machine": 786,
+      "shield, buckler": 787,
+      "shoe shop, shoe-shop, shoe store": 788,
+      "shoji": 789,
+      "shopping basket": 790,
+      "shopping cart": 791,
+      "shovel": 792,
+      "shower cap": 793,
+      "shower curtain": 794,
+      "siamang, Hylobates syndactylus, Symphalangus syndactylus": 369,
+      "sidewinder, horned rattlesnake, Crotalus cerastes": 68,
+      "silky terrier, Sydney silky": 201,
+      "ski": 795,
+      "ski mask": 796,
+      "skunk, polecat, wood pussy": 361,
+      "sleeping bag": 797,
+      "slide rule, slipstick": 798,
+      "sliding door": 799,
+      "slot, one-armed bandit": 800,
+      "sloth bear, Melursus ursinus, Ursus ursinus": 297,
+      "slug": 114,
+      "snail": 113,
+      "snorkel": 801,
+      "snow leopard, ounce, Panthera uncia": 289,
+      "snowmobile": 802,
+      "snowplow, snowplough": 803,
+      "soap dispenser": 804,
+      "soccer ball": 805,
+      "sock": 806,
+      "soft-coated wheaten terrier": 202,
+      "solar dish, solar collector, solar furnace": 807,
+      "sombrero": 808,
+      "sorrel": 339,
+      "soup bowl": 809,
+      "space bar": 810,
+      "space heater": 811,
+      "space shuttle": 812,
+      "spaghetti squash": 940,
+      "spatula": 813,
+      "speedboat": 814,
+      "spider monkey, Ateles geoffroyi": 381,
+      "spider web, spider's web": 815,
+      "spindle": 816,
+      "spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish": 123,
+      "spoonbill": 129,
+      "sports car, sport car": 817,
+      "spotlight, spot": 818,
+      "spotted salamander, Ambystoma maculatum": 28,
+      "squirrel monkey, Saimiri sciureus": 382,
+      "stage": 819,
+      "standard poodle": 267,
+      "standard schnauzer": 198,
+      "starfish, sea star": 327,
+      "steam locomotive": 820,
+      "steel arch bridge": 821,
+      "steel drum": 822,
+      "stethoscope": 823,
+      "stingray": 6,
+      "stinkhorn, carrion fungus": 994,
+      "stole": 824,
+      "stone wall": 825,
+      "stopwatch, stop watch": 826,
+      "stove": 827,
+      "strainer": 828,
+      "strawberry": 949,
+      "street sign": 919,
+      "streetcar, tram, tramcar, trolley, trolley car": 829,
+      "stretcher": 830,
+      "studio couch, day bed": 831,
+      "stupa, tope": 832,
+      "sturgeon": 394,
+      "submarine, pigboat, sub, U-boat": 833,
+      "suit, suit of clothes": 834,
+      "sulphur butterfly, sulfur butterfly": 325,
+      "sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita": 89,
+      "sundial": 835,
+      "sunglass": 836,
+      "sunglasses, dark glasses, shades": 837,
+      "sunscreen, sunblock, sun blocker": 838,
+      "suspension bridge": 839,
+      "swab, swob, mop": 840,
+      "sweatshirt": 841,
+      "swimming trunks, bathing trunks": 842,
+      "swing": 843,
+      "switch, electric switch, electrical switch": 844,
+      "syringe": 845,
+      "tabby, tabby cat": 281,
+      "table lamp": 846,
+      "tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui": 32,
+      "tank, army tank, armored combat vehicle, armoured combat vehicle": 847,
+      "tape player": 848,
+      "tarantula": 76,
+      "teapot": 849,
+      "teddy, teddy bear": 850,
+      "television, television system": 851,
+      "tench, Tinca tinca": 0,
+      "tennis ball": 852,
+      "terrapin": 36,
+      "thatch, thatched roof": 853,
+      "theater curtain, theatre curtain": 854,
+      "thimble": 855,
+      "three-toed sloth, ai, Bradypus tridactylus": 364,
+      "thresher, thrasher, threshing machine": 856,
+      "throne": 857,
+      "thunder snake, worm snake, Carphophis amoenus": 52,
+      "tick": 78,
+      "tiger beetle": 300,
+      "tiger cat": 282,
+      "tiger shark, Galeocerdo cuvieri": 3,
+      "tiger, Panthera tigris": 292,
+      "tile roof": 858,
+      "timber wolf, grey wolf, gray wolf, Canis lupus": 269,
+      "titi, titi monkey": 380,
+      "toaster": 859,
+      "tobacco shop, tobacconist shop, tobacconist": 860,
+      "toilet seat": 861,
+      "toilet tissue, toilet paper, bathroom tissue": 999,
+      "torch": 862,
+      "totem pole": 863,
+      "toucan": 96,
+      "tow truck, tow car, wrecker": 864,
+      "toy poodle": 265,
+      "toy terrier": 158,
+      "toyshop": 865,
+      "tractor": 866,
+      "traffic light, traffic signal, stoplight": 920,
+      "trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi": 867,
+      "tray": 868,
+      "tree frog, tree-frog": 31,
+      "trench coat": 869,
+      "triceratops": 51,
+      "tricycle, trike, velocipede": 870,
+      "trifle": 927,
+      "trilobite": 69,
+      "trimaran": 871,
+      "tripod": 872,
+      "triumphal arch": 873,
+      "trolleybus, trolley coach, trackless trolley": 874,
+      "trombone": 875,
+      "tub, vat": 876,
+      "turnstile": 877,
+      "tusker": 101,
+      "typewriter keyboard": 878,
+      "umbrella": 879,
+      "unicycle, monocycle": 880,
+      "upright, upright piano": 881,
+      "vacuum, vacuum cleaner": 882,
+      "valley, vale": 979,
+      "vase": 883,
+      "vault": 884,
+      "velvet": 885,
+      "vending machine": 886,
+      "vestment": 887,
+      "viaduct": 888,
+      "vine snake": 59,
+      "violin, fiddle": 889,
+      "vizsla, Hungarian pointer": 211,
+      "volcano": 980,
+      "volleyball": 890,
+      "vulture": 23,
+      "waffle iron": 891,
+      "walking stick, walkingstick, stick insect": 313,
+      "wall clock": 892,
+      "wallaby, brush kangaroo": 104,
+      "wallet, billfold, notecase, pocketbook": 893,
+      "wardrobe, closet, press": 894,
+      "warplane, military plane": 895,
+      "warthog": 343,
+      "washbasin, handbasin, washbowl, lavabo, wash-hand basin": 896,
+      "washer, automatic washer, washing machine": 897,
+      "water bottle": 898,
+      "water buffalo, water ox, Asiatic buffalo, Bubalus bubalis": 346,
+      "water jug": 899,
+      "water ouzel, dipper": 20,
+      "water snake": 58,
+      "water tower": 900,
+      "weasel": 356,
+      "web site, website, internet site, site": 916,
+      "weevil": 307,
+      "whippet": 172,
+      "whiptail, whiptail lizard": 41,
+      "whiskey jug": 901,
+      "whistle": 902,
+      "white stork, Ciconia ciconia": 127,
+      "white wolf, Arctic wolf, Canis lupus tundrarum": 270,
+      "wig": 903,
+      "wild boar, boar, Sus scrofa": 342,
+      "window screen": 904,
+      "window shade": 905,
+      "wine bottle": 907,
+      "wing": 908,
+      "wire-haired fox terrier": 188,
+      "wok": 909,
+      "wolf spider, hunting spider": 77,
+      "wombat": 106,
+      "wood rabbit, cottontail, cottontail rabbit": 330,
+      "wooden spoon": 910,
+      "wool, woolen, woollen": 911,
+      "worm fence, snake fence, snake-rail fence, Virginia fence": 912,
+      "wreck": 913,
+      "yawl": 914,
+      "yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum": 986,
+      "yurt": 915,
+      "zebra": 340,
+      "zucchini, courgette": 939
+    },
+    "layer_norm_eps": 1e-05,
+    "length_penalty": 1.0,
+    "max_length": 20,
+    "min_length": 0,
+    "mlp_ratio": 4.0,
+    "model_type": "swin",
+    "no_repeat_ngram_size": 0,
+    "num_beam_groups": 1,
+    "num_beams": 1,
+    "num_channels": 3,
+    "num_heads": [
+      3,
+      6,
+      12,
+      24
+    ],
+    "num_layers": 4,
+    "num_return_sequences": 1,
+    "out_features": [
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "output_attentions": false,
+    "output_hidden_states": false,
+    "output_scores": false,
+    "pad_token_id": null,
+    "patch_size": 4,
+    "path_norm": true,
+    "prefix": null,
+    "problem_type": null,
+    "pruned_heads": {},
+    "qkv_bias": true,
+    "remove_invalid_values": false,
+    "repetition_penalty": 1.0,
+    "return_dict": true,
+    "return_dict_in_generate": false,
+    "sep_token_id": null,
+    "stage_names": [
+      "stem",
+      "stage1",
+      "stage2",
+      "stage3",
+      "stage4"
+    ],
+    "suppress_tokens": null,
+    "task_specific_params": null,
+    "temperature": 1.0,
+    "tf_legacy_loss": false,
+    "tie_encoder_decoder": false,
+    "tie_word_embeddings": true,
+    "tokenizer_class": null,
+    "top_k": 50,
+    "top_p": 1.0,
+    "torch_dtype": "float32",
+    "torchscript": false,
+    "transformers_version": "4.26.0.dev0",
+    "typical_p": 1.0,
+    "use_absolute_embeddings": false,
+    "use_bfloat16": false,
+    "window_size": 7
+  },
+  "class_weight": 2.0,
+  "common_stride": 4,
+  "decoder_layers": 10,
+  "dice_weight": 5.0,
+  "dim_feedforward": 2048,
+  "dropout": 0.0,
+  "encoder_feedforward_dim": 1024,
+  "encoder_layers": 6,
+  "enforce_input_proj": false,
+  "enforce_input_projection": false,
+  "feature_size": 256,
+  "feature_strides": [
+    4,
+    8,
+    16,
+    32
+  ],
+  "hidden_dim": 256,
+  "id2label": {
+    "0": "person",
+    "1": "bicycle",
+    "2": "car",
+    "3": "motorbike",
+    "4": "aeroplane",
+    "5": "bus",
+    "6": "train",
+    "7": "truck",
+    "8": "boat",
+    "9": "traffic light",
+    "10": "fire hydrant",
+    "11": "stop sign",
+    "12": "parking meter",
+    "13": "bench",
+    "14": "bird",
+    "15": "cat",
+    "16": "dog",
+    "17": "horse",
+    "18": "sheep",
+    "19": "cow",
+    "20": "elephant",
+    "21": "bear",
+    "22": "zebra",
+    "23": "giraffe",
+    "24": "backpack",
+    "25": "umbrella",
+    "26": "handbag",
+    "27": "tie",
+    "28": "suitcase",
+    "29": "frisbee",
+    "30": "skis",
+    "31": "snowboard",
+    "32": "sports ball",
+    "33": "kite",
+    "34": "baseball bat",
+    "35": "baseball glove",
+    "36": "skateboard",
+    "37": "surfboard",
+    "38": "tennis racket",
+    "39": "bottle",
+    "40": "wine glass",
+    "41": "cup",
+    "42": "fork",
+    "43": "knife",
+    "44": "spoon",
+    "45": "bowl",
+    "46": "banana",
+    "47": "apple",
+    "48": "sandwich",
+    "49": "orange",
+    "50": "broccoli",
+    "51": "carrot",
+    "52": "hot dog",
+    "53": "pizza",
+    "54": "donut",
+    "55": "cake",
+    "56": "chair",
+    "57": "sofa",
+    "58": "pottedplant",
+    "59": "bed",
+    "60": "diningtable",
+    "61": "toilet",
+    "62": "tvmonitor",
+    "63": "laptop",
+    "64": "mouse",
+    "65": "remote",
+    "66": "keyboard",
+    "67": "cell phone",
+    "68": "microwave",
+    "69": "oven",
+    "70": "toaster",
+    "71": "sink",
+    "72": "refrigerator",
+    "73": "book",
+    "74": "clock",
+    "75": "vase",
+    "76": "scissors",
+    "77": "teddy bear",
+    "78": "hair drier",
+    "79": "toothbrush"
+  },
+  "ignore_value": 255,
+  "importance_sample_ratio": 0.75,
+  "init_std": 0.02,
+  "init_xavier_std": 1.0,
+  "label2id": {
+    "aeroplane": 4,
+    "apple": 47,
+    "backpack": 24,
+    "banana": 46,
+    "baseball bat": 34,
+    "baseball glove": 35,
+    "bear": 21,
+    "bed": 59,
+    "bench": 13,
+    "bicycle": 1,
+    "bird": 14,
+    "boat": 8,
+    "book": 73,
+    "bottle": 39,
+    "bowl": 45,
+    "broccoli": 50,
+    "bus": 5,
+    "cake": 55,
+    "car": 2,
+    "carrot": 51,
+    "cat": 15,
+    "cell phone": 67,
+    "chair": 56,
+    "clock": 74,
+    "cow": 19,
+    "cup": 41,
+    "diningtable": 60,
+    "dog": 16,
+    "donut": 54,
+    "elephant": 20,
+    "fire hydrant": 10,
+    "fork": 42,
+    "frisbee": 29,
+    "giraffe": 23,
+    "hair drier": 78,
+    "handbag": 26,
+    "horse": 17,
+    "hot dog": 52,
+    "keyboard": 66,
+    "kite": 33,
+    "knife": 43,
+    "laptop": 63,
+    "microwave": 68,
+    "motorbike": 3,
+    "mouse": 64,
+    "orange": 49,
+    "oven": 69,
+    "parking meter": 12,
+    "person": 0,
+    "pizza": 53,
+    "pottedplant": 58,
+    "refrigerator": 72,
+    "remote": 65,
+    "sandwich": 48,
+    "scissors": 76,
+    "sheep": 18,
+    "sink": 71,
+    "skateboard": 36,
+    "skis": 30,
+    "snowboard": 31,
+    "sofa": 57,
+    "spoon": 44,
+    "sports ball": 32,
+    "stop sign": 11,
+    "suitcase": 28,
+    "surfboard": 37,
+    "teddy bear": 77,
+    "tennis racket": 38,
+    "tie": 27,
+    "toaster": 70,
+    "toilet": 61,
+    "toothbrush": 79,
+    "traffic light": 9,
+    "train": 6,
+    "truck": 7,
+    "tvmonitor": 62,
+    "umbrella": 25,
+    "vase": 75,
+    "wine glass": 40,
+    "zebra": 22
+  },
+  "mask_feature_size": 256,
+  "mask_weight": 5.0,
+  "model_type": "mask2former",
+  "no_object_weight": 0.1,
+  "num_attention_heads": 8,
+  "num_hidden_layers": 10,
+  "num_queries": 100,
+  "output_auxiliary_logits": null,
+  "oversample_ratio": 3.0,
+  "pre_norm": false,
+  "torch_dtype": "float32",
+  "train_num_points": 12544,
+  "transformers_version": null,
+  "use_auxiliary_loss": true
+}

external/human_matting/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .matting_engine import StyleMatteEngine

external/human_matting/matting_engine.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import os
+import torch
+import inspect
+import warnings
+import torchvision
+from .stylematte import StyleMatte
+class StyleMatteEngine(torch.nn.Module):
+    def __init__(self, device='cpu',human_matting_path='./pretrain_model/matting/stylematte_synth.pt'):
+        super().__init__()
+        self._device = device
+        self.normalize = torchvision.transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+        self._init_models(human_matting_path)
+    def _init_models(self,_ckpt_path):
+        # load dict
+        state_dict = torch.load(_ckpt_path, map_location='cpu')
+        # build model
+        model = StyleMatte()
+        model.load_state_dict(state_dict)
+        self.model = model.to(self._device).eval()
+    @torch.no_grad()
+    def forward(self, input_image, return_type='matting', background_rgb=1.0):
+        if not hasattr(self, 'model'):
+            self._init_models()
+        if input_image.max() > 2.0:
+            warnings.warn('Image should be normalized to [0, 1].')
+        _, ori_h, ori_w = input_image.shape
+        input_image = input_image.to(self._device).float()
+        image = input_image.clone()
+        # resize
+        if max(ori_h, ori_w) > 1024:
+            scale = 1024.0 / max(ori_h, ori_w)
+            resized_h, resized_w = int(ori_h * scale), int(ori_w * scale)
+            image = torchvision.transforms.functional.resize(image, (resized_h, resized_w), antialias=True)
+        else:
+            resized_h, resized_w = ori_h, ori_w
+        # padding
+        if resized_h % 8 != 0 or resized_w % 8 != 0:
+            image = torchvision.transforms.functional.pad(image, ((8-resized_w % 8)%8, (8-resized_h % 8)%8, 0, 0, ), padding_mode='reflect')
+        # normalize and forwarding
+        image = self.normalize(image)[None]
+        predict = self.model(image)[0]
+        # undo padding
+        predict = predict[:, -resized_h:, -resized_w:]
+        # undo resize
+        if resized_h != ori_h or resized_w != ori_w:
+            predict = torchvision.transforms.functional.resize(predict, (ori_h, ori_w), antialias=True)
+        if return_type == 'alpha':
+            return predict[0]
+        elif return_type == 'matting':
+            predict = predict.expand(3, -1, -1)
+            matting_image = input_image.clone()
+            background_rgb = matting_image.new_ones(matting_image.shape) * background_rgb
+            matting_image = matting_image * predict + (1-predict) * background_rgb
+            return matting_image, predict[0]
+        elif return_type == 'all':
+            predict = predict.expand(3, -1, -1)
+            background_rgb = input_image.new_ones(input_image.shape) * background_rgb
+            foreground_image = input_image * predict + (1-predict) * background_rgb
+            background_image = input_image * (1-predict) + predict * background_rgb
+            return foreground_image, background_image
+        else:
+            raise NotImplementedError

external/human_matting/stylematte.py ADDED Viewed

	@@ -0,0 +1,272 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import Mask2FormerForUniversalSegmentation
+from transformers.models.mask2former.configuration_mask2former import Mask2FormerConfig
+class StyleMatte(nn.Module):
+    def __init__(self):
+        super(StyleMatte, self).__init__()
+        self.fpn = FPN_fuse(feature_channels=[256, 256, 256, 256], fpn_out=256)
+        config = Mask2FormerConfig.from_json_file('./configs/stylematte_config.json')
+        self.pixel_decoder = Mask2FormerForUniversalSegmentation(config).base_model.pixel_level_module
+        self.fgf = FastGuidedFilter(eps=1e-4)
+        self.conv = nn.Conv2d(256, 1, kernel_size=3, padding=1)
+    def forward(self, image, normalize=False):
+        decoder_out = self.pixel_decoder(image)
+        decoder_states = list(decoder_out.decoder_hidden_states)
+        decoder_states.append(decoder_out.decoder_last_hidden_state)
+        out_pure = self.fpn(decoder_states)
+        image_lr = nn.functional.interpolate(image.mean(1, keepdim=True),
+                                             scale_factor=0.25,
+                                             mode='bicubic',
+                                             align_corners=True
+                                             )
+        out = self.conv(out_pure)
+        out = self.fgf(image_lr, out, image.mean(1, keepdim=True))
+        return torch.sigmoid(out)
+    def get_training_params(self):
+        return list(self.fpn.parameters())+list(self.conv.parameters())
+def conv2d_relu(input_filters, output_filters, kernel_size=3,  bias=True):
+    return nn.Sequential(
+        nn.Conv2d(input_filters, output_filters,
+                  kernel_size=kernel_size, padding=kernel_size//2, bias=bias),
+        nn.LeakyReLU(0.2, inplace=True),
+        nn.BatchNorm2d(output_filters)
+    )
+def up_and_add(x, y):
+    return F.interpolate(x, size=(y.size(2), y.size(3)), mode='bilinear', align_corners=True) + y
+class FPN_fuse(nn.Module):
+    def __init__(self, feature_channels=[256, 512, 1024, 2048], fpn_out=256):
+        super(FPN_fuse, self).__init__()
+        assert feature_channels[0] == fpn_out
+        self.conv1x1 = nn.ModuleList([nn.Conv2d(ft_size, fpn_out, kernel_size=1)
+                                      for ft_size in feature_channels[1:]])
+        self.smooth_conv = nn.ModuleList([nn.Conv2d(fpn_out, fpn_out, kernel_size=3, padding=1)]
+                                         * (len(feature_channels)-1))
+        self.conv_fusion = nn.Sequential(
+            nn.Conv2d(2*fpn_out, fpn_out, kernel_size=3,
+                      padding=1, bias=False),
+            nn.BatchNorm2d(fpn_out),
+            nn.ReLU(inplace=True),
+        )
+    def forward(self, features):
+        features[:-1] = [conv1x1(feature) for feature,
+                         conv1x1 in zip(features[:-1], self.conv1x1)]
+        feature = up_and_add(self.smooth_conv[0](features[0]), features[1])
+        feature = up_and_add(self.smooth_conv[1](feature), features[2])
+        feature = up_and_add(self.smooth_conv[2](feature), features[3])
+        H, W = features[-1].size(2), features[-1].size(3)
+        x = [feature, features[-1]]
+        x = [F.interpolate(x_el, size=(H, W), mode='bilinear',
+                           align_corners=True) for x_el in x]
+        x = self.conv_fusion(torch.cat(x, dim=1))
+        return x
+class PSPModule(nn.Module):
+    # In the original inmplementation they use precise RoI pooling
+    # Instead of using adaptative average pooling
+    def __init__(self, in_channels, bin_sizes=[1, 2, 4, 6]):
+        super(PSPModule, self).__init__()
+        out_channels = in_channels // len(bin_sizes)
+        self.stages = nn.ModuleList([self._make_stages(in_channels, out_channels, b_s)
+                                     for b_s in bin_sizes])
+        self.bottleneck = nn.Sequential(
+            nn.Conv2d(in_channels+(out_channels * len(bin_sizes)), in_channels,
+                      kernel_size=3, padding=1, bias=False),
+            nn.BatchNorm2d(in_channels),
+            nn.ReLU(inplace=True),
+            nn.Dropout2d(0.1)
+        )
+    def _make_stages(self, in_channels, out_channels, bin_sz):
+        prior = nn.AdaptiveAvgPool2d(output_size=bin_sz)
+        conv = nn.Conv2d(in_channels, out_channels, kernel_size=1, bias=False)
+        bn = nn.BatchNorm2d(out_channels)
+        relu = nn.ReLU(inplace=True)
+        return nn.Sequential(prior, conv, bn, relu)
+    def forward(self, features):
+        h, w = features.size()[2], features.size()[3]
+        pyramids = [features]
+        pyramids.extend([F.interpolate(stage(features), size=(h, w), mode='bilinear',
+                                       align_corners=True) for stage in self.stages])
+        output = self.bottleneck(torch.cat(pyramids, dim=1))
+        return output
+class GuidedFilter(nn.Module):
+    def __init__(self, r, eps=1e-8):
+        super(GuidedFilter, self).__init__()
+        self.r = r
+        self.eps = eps
+        self.boxfilter = BoxFilter(r)
+    def forward(self, x, y):
+        n_x, c_x, h_x, w_x = x.size()
+        n_y, c_y, h_y, w_y = y.size()
+        assert n_x == n_y
+        assert c_x == 1 or c_x == c_y
+        assert h_x == h_y and w_x == w_y
+        assert h_x > 2 * self.r + 1 and w_x > 2 * self.r + 1
+        # N
+        N = self.boxfilter((x.data.new().resize_((1, 1, h_x, w_x)).fill_(1.0)))
+        # mean_x
+        mean_x = self.boxfilter(x) / N
+        # mean_y
+        mean_y = self.boxfilter(y) / N
+        # cov_xy
+        cov_xy = self.boxfilter(x * y) / N - mean_x * mean_y
+        # var_x
+        var_x = self.boxfilter(x * x) / N - mean_x * mean_x
+        # A
+        A = cov_xy / (var_x + self.eps)
+        # b
+        b = mean_y - A * mean_x
+        # mean_A; mean_b
+        mean_A = self.boxfilter(A) / N
+        mean_b = self.boxfilter(b) / N
+        return mean_A * x + mean_b
+class FastGuidedFilter(nn.Module):
+    def __init__(self, r=1, eps=1e-8):
+        super(FastGuidedFilter, self).__init__()
+        self.r = r
+        self.eps = eps
+        self.boxfilter = BoxFilter(r)
+    def forward(self, lr_x, lr_y, hr_x):
+        n_lrx, c_lrx, h_lrx, w_lrx = lr_x.size()
+        n_lry, c_lry, h_lry, w_lry = lr_y.size()
+        n_hrx, c_hrx, h_hrx, w_hrx = hr_x.size()
+        assert n_lrx == n_lry and n_lry == n_hrx
+        assert c_lrx == c_hrx and (c_lrx == 1 or c_lrx == c_lry)
+        assert h_lrx == h_lry and w_lrx == w_lry
+        assert h_lrx > 2*self.r+1 and w_lrx > 2*self.r+1
+        # N
+        N = self.boxfilter(lr_x.new().resize_((1, 1, h_lrx, w_lrx)).fill_(1.0))
+        # mean_x
+        mean_x = self.boxfilter(lr_x) / N
+        # mean_y
+        mean_y = self.boxfilter(lr_y) / N
+        # cov_xy
+        cov_xy = self.boxfilter(lr_x * lr_y) / N - mean_x * mean_y
+        # var_x
+        var_x = self.boxfilter(lr_x * lr_x) / N - mean_x * mean_x
+        # A
+        A = cov_xy / (var_x + self.eps)
+        # b
+        b = mean_y - A * mean_x
+        # mean_A; mean_b
+        mean_A = F.interpolate(
+            A, (h_hrx, w_hrx), mode='bilinear', align_corners=True)
+        mean_b = F.interpolate(
+            b, (h_hrx, w_hrx), mode='bilinear', align_corners=True)
+        return mean_A*hr_x+mean_b
+class DeepGuidedFilterRefiner(nn.Module):
+    def __init__(self, hid_channels=16):
+        super().__init__()
+        self.box_filter = nn.Conv2d(
+            4, 4, kernel_size=3, padding=1, bias=False, groups=4)
+        self.box_filter.weight.data[...] = 1 / 9
+        self.conv = nn.Sequential(
+            nn.Conv2d(4 * 2 + hid_channels, hid_channels,
+                      kernel_size=1, bias=False),
+            nn.BatchNorm2d(hid_channels),
+            nn.ReLU(True),
+            nn.Conv2d(hid_channels, hid_channels, kernel_size=1, bias=False),
+            nn.BatchNorm2d(hid_channels),
+            nn.ReLU(True),
+            nn.Conv2d(hid_channels, 4, kernel_size=1, bias=True)
+        )
+    def forward(self, fine_src, base_src, base_fgr, base_pha, base_hid):
+        fine_x = torch.cat([fine_src, fine_src.mean(1, keepdim=True)], dim=1)
+        base_x = torch.cat([base_src, base_src.mean(1, keepdim=True)], dim=1)
+        base_y = torch.cat([base_fgr, base_pha], dim=1)
+        mean_x = self.box_filter(base_x)
+        mean_y = self.box_filter(base_y)
+        cov_xy = self.box_filter(base_x * base_y) - mean_x * mean_y
+        var_x = self.box_filter(base_x * base_x) - mean_x * mean_x
+        A = self.conv(torch.cat([cov_xy, var_x, base_hid], dim=1))
+        b = mean_y - A * mean_x
+        H, W = fine_src.shape[2:]
+        A = F.interpolate(A, (H, W), mode='bilinear', align_corners=False)
+        b = F.interpolate(b, (H, W), mode='bilinear', align_corners=False)
+        out = A * fine_x + b
+        fgr, pha = out.split([3, 1], dim=1)
+        return fgr, pha
+def diff_x(input, r):
+    assert input.dim() == 4
+    left = input[:, :,         r:2 * r + 1]
+    middle = input[:, :, 2 * r + 1:] - input[:, :, :-2 * r - 1]
+    right = input[:, :,        -1:] - input[:, :, -2 * r - 1: -r - 1]
+    output = torch.cat([left, middle, right], dim=2)
+    return output
+def diff_y(input, r):
+    assert input.dim() == 4
+    left = input[:, :, :,         r:2 * r + 1]
+    middle = input[:, :, :, 2 * r + 1:] - input[:, :, :, :-2 * r - 1]
+    right = input[:, :, :,        -1:] - input[:, :, :, -2 * r - 1: -r - 1]
+    output = torch.cat([left, middle, right], dim=3)
+    return output
+class BoxFilter(nn.Module):
+    def __init__(self, r):
+        super(BoxFilter, self).__init__()
+        self.r = r
+    def forward(self, x):
+        assert x.dim() == 4
+        return diff_y(diff_x(x.cumsum(dim=2), self.r).cumsum(dim=3), self.r)

external/landmark_detection/FaceBoxesV2/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ from . import detector
2	+ from . import faceboxes_detector

external/landmark_detection/FaceBoxesV2/detector.py ADDED Viewed

	@@ -0,0 +1,39 @@

+import cv2
+class Detector(object):
+    def __init__(self, model_arch, model_weights):
+        self.model_arch = model_arch
+        self.model_weights = model_weights
+    def detect(self, image, thresh):
+        raise NotImplementedError
+    def crop(self, image, detections):
+        crops = []
+        for det in detections:
+            xmin = max(det[2], 0)
+            ymin = max(det[3], 0)
+            width = det[4]
+            height = det[5]
+            xmax = min(xmin+width, image.shape[1])
+            ymax = min(ymin+height, image.shape[0])
+            cut = image[ymin:ymax, xmin:xmax,:]
+            crops.append(cut)
+        return crops
+    def draw(self, image, detections, im_scale=None):
+        if im_scale is not None:
+            image = cv2.resize(image, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
+            detections = [[det[0],det[1],int(det[2]*im_scale),int(det[3]*im_scale),int(det[4]*im_scale),int(det[5]*im_scale)] for det in detections]
+        for det in detections:
+            xmin = det[2]
+            ymin = det[3]
+            width = det[4]
+            height = det[5]
+            xmax = xmin + width
+            ymax = ymin + height
+            cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
+        return image

external/landmark_detection/FaceBoxesV2/faceboxes_detector.py ADDED Viewed

	@@ -0,0 +1,97 @@

+from .detector import Detector
+import cv2, os
+import numpy as np
+import torch
+import torch.nn as nn
+from .utils.config import cfg
+from .utils.prior_box import PriorBox
+from .utils.nms_wrapper import nms
+from .utils.faceboxes import FaceBoxesV2
+from .utils.box_utils import decode
+import time
+class FaceBoxesDetector(Detector):
+    def __init__(self, model_arch, model_weights, use_gpu, device):
+        super().__init__(model_arch, model_weights)
+        self.name = 'FaceBoxesDetector'
+        self.net = FaceBoxesV2(phase='test', size=None, num_classes=2)    # initialize detector
+        self.use_gpu = use_gpu
+        self.device = device
+        state_dict = torch.load(self.model_weights, map_location=self.device)
+        # create new OrderedDict that does not contain `module.`
+        from collections import OrderedDict
+        new_state_dict = OrderedDict()
+        for k, v in state_dict.items():
+            name = k[7:] # remove `module.`
+            new_state_dict[name] = v
+        # load params
+        self.net.load_state_dict(new_state_dict)
+        self.net = self.net.to(self.device)
+        self.net.eval()
+    def detect(self, image, thresh=0.6, im_scale=None):
+        # auto resize for large images
+        if im_scale is None:
+            height, width, _ = image.shape
+            if min(height, width) > 600:
+                im_scale = 600. / min(height, width)
+            else:
+                im_scale = 1
+        image_scale = cv2.resize(image, None, None, fx=im_scale, fy=im_scale, interpolation=cv2.INTER_LINEAR)
+        scale = torch.Tensor([image_scale.shape[1], image_scale.shape[0], image_scale.shape[1], image_scale.shape[0]])
+        image_scale = torch.from_numpy(image_scale.transpose(2,0,1)).to(self.device).int()
+        mean_tmp = torch.IntTensor([104, 117, 123]).to(self.device)
+        mean_tmp = mean_tmp.unsqueeze(1).unsqueeze(2)
+        image_scale -= mean_tmp
+        image_scale = image_scale.float().unsqueeze(0)
+        scale = scale.to(self.device)
+        with torch.no_grad():
+            out = self.net(image_scale)
+            #priorbox = PriorBox(cfg, out[2], (image_scale.size()[2], image_scale.size()[3]), phase='test')
+            priorbox = PriorBox(cfg, image_size=(image_scale.size()[2], image_scale.size()[3]))
+            priors = priorbox.forward()
+            priors = priors.to(self.device)
+            loc, conf = out
+            prior_data = priors.data
+            boxes = decode(loc.data.squeeze(0), prior_data, cfg['variance'])
+            boxes = boxes * scale
+            boxes = boxes.cpu().numpy()
+            scores = conf.data.cpu().numpy()[:, 1]
+            # ignore low scores
+            inds = np.where(scores > thresh)[0]
+            boxes = boxes[inds]
+            scores = scores[inds]
+            # keep top-K before NMS
+            order = scores.argsort()[::-1][:5000]
+            boxes = boxes[order]
+            scores = scores[order]
+            # do NMS
+            dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32, copy=False)
+            keep = nms(dets, 0.3)
+            dets = dets[keep, :]
+            dets = dets[:750, :]
+            detections_scale = []
+            for i in range(dets.shape[0]):
+                xmin = int(dets[i][0])
+                ymin = int(dets[i][1])
+                xmax = int(dets[i][2])
+                ymax = int(dets[i][3])
+                score = dets[i][4]
+                width = xmax - xmin
+                height = ymax - ymin
+                detections_scale.append(['face', score, xmin, ymin, width, height])
+        # adapt bboxes to the original image size
+        if len(detections_scale) > 0:
+            detections_scale = [[det[0],det[1],int(det[2]/im_scale),int(det[3]/im_scale),int(det[4]/im_scale),int(det[5]/im_scale)] for det in detections_scale]
+        return detections_scale, im_scale

external/landmark_detection/FaceBoxesV2/utils/__init__.py ADDED Viewed

File without changes

external/landmark_detection/FaceBoxesV2/utils/box_utils.py ADDED Viewed

	@@ -0,0 +1,276 @@

+import torch
+import numpy as np
+def point_form(boxes):
+    """ Convert prior_boxes to (xmin, ymin, xmax, ymax)
+    representation for comparison to point form ground truth data.
+    Args:
+        boxes: (tensor) center-size default boxes from priorbox layers.
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return torch.cat((boxes[:, :2] - boxes[:, 2:]/2,     # xmin, ymin
+                     boxes[:, :2] + boxes[:, 2:]/2), 1)  # xmax, ymax
+def center_size(boxes):
+    """ Convert prior_boxes to (cx, cy, w, h)
+    representation for comparison to center-size form ground truth data.
+    Args:
+        boxes: (tensor) point_form boxes
+    Return:
+        boxes: (tensor) Converted xmin, ymin, xmax, ymax form of boxes.
+    """
+    return torch.cat((boxes[:, 2:] + boxes[:, :2])/2,  # cx, cy
+                     boxes[:, 2:] - boxes[:, :2], 1)  # w, h
+def intersect(box_a, box_b):
+    """ We resize both tensors to [A,B,2] without new malloc:
+    [A,2] -> [A,1,2] -> [A,B,2]
+    [B,2] -> [1,B,2] -> [A,B,2]
+    Then we compute the area of intersect between box_a and box_b.
+    Args:
+      box_a: (tensor) bounding boxes, Shape: [A,4].
+      box_b: (tensor) bounding boxes, Shape: [B,4].
+    Return:
+      (tensor) intersection area, Shape: [A,B].
+    """
+    A = box_a.size(0)
+    B = box_b.size(0)
+    max_xy = torch.min(box_a[:, 2:].unsqueeze(1).expand(A, B, 2),
+                       box_b[:, 2:].unsqueeze(0).expand(A, B, 2))
+    min_xy = torch.max(box_a[:, :2].unsqueeze(1).expand(A, B, 2),
+                       box_b[:, :2].unsqueeze(0).expand(A, B, 2))
+    inter = torch.clamp((max_xy - min_xy), min=0)
+    return inter[:, :, 0] * inter[:, :, 1]
+def jaccard(box_a, box_b):
+    """Compute the jaccard overlap of two sets of boxes.  The jaccard overlap
+    is simply the intersection over union of two boxes.  Here we operate on
+    ground truth boxes and default boxes.
+    E.g.:
+        A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)
+    Args:
+        box_a: (tensor) Ground truth bounding boxes, Shape: [num_objects,4]
+        box_b: (tensor) Prior boxes from priorbox layers, Shape: [num_priors,4]
+    Return:
+        jaccard overlap: (tensor) Shape: [box_a.size(0), box_b.size(0)]
+    """
+    inter = intersect(box_a, box_b)
+    area_a = ((box_a[:, 2]-box_a[:, 0]) *
+              (box_a[:, 3]-box_a[:, 1])).unsqueeze(1).expand_as(inter)  # [A,B]
+    area_b = ((box_b[:, 2]-box_b[:, 0]) *
+              (box_b[:, 3]-box_b[:, 1])).unsqueeze(0).expand_as(inter)  # [A,B]
+    union = area_a + area_b - inter
+    return inter / union  # [A,B]
+def matrix_iou(a, b):
+    """
+    return iou of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    area_b = np.prod(b[:, 2:] - b[:, :2], axis=1)
+    return area_i / (area_a[:, np.newaxis] + area_b - area_i)
+def matrix_iof(a, b):
+    """
+    return iof of a and b, numpy version for data augenmentation
+    """
+    lt = np.maximum(a[:, np.newaxis, :2], b[:, :2])
+    rb = np.minimum(a[:, np.newaxis, 2:], b[:, 2:])
+    area_i = np.prod(rb - lt, axis=2) * (lt < rb).all(axis=2)
+    area_a = np.prod(a[:, 2:] - a[:, :2], axis=1)
+    return area_i / np.maximum(area_a[:, np.newaxis], 1)
+def match(threshold, truths, priors, variances, labels, loc_t, conf_t, idx):
+    """Match each prior box with the ground truth box of the highest jaccard
+    overlap, encode the bounding boxes, then return the matched indices
+    corresponding to both confidence and location preds.
+    Args:
+        threshold: (float) The overlap threshold used when mathing boxes.
+        truths: (tensor) Ground truth boxes, Shape: [num_obj, num_priors].
+        priors: (tensor) Prior boxes from priorbox layers, Shape: [n_priors,4].
+        variances: (tensor) Variances corresponding to each prior coord,
+            Shape: [num_priors, 4].
+        labels: (tensor) All the class labels for the image, Shape: [num_obj].
+        loc_t: (tensor) Tensor to be filled w/ endcoded location targets.
+        conf_t: (tensor) Tensor to be filled w/ matched indices for conf preds.
+        idx: (int) current batch index
+    Return:
+        The matched indices corresponding to 1)location and 2)confidence preds.
+    """
+    # jaccard index
+    overlaps = jaccard(
+        truths,
+        point_form(priors)
+    )
+    # (Bipartite Matching)
+    # [1,num_objects] best prior for each ground truth
+    best_prior_overlap, best_prior_idx = overlaps.max(1, keepdim=True)
+    # ignore hard gt
+    valid_gt_idx = best_prior_overlap[:, 0] >= 0.2
+    best_prior_idx_filter = best_prior_idx[valid_gt_idx, :]
+    if best_prior_idx_filter.shape[0] <= 0:
+        loc_t[idx] = 0
+        conf_t[idx] = 0
+        return
+    # [1,num_priors] best ground truth for each prior
+    best_truth_overlap, best_truth_idx = overlaps.max(0, keepdim=True)
+    best_truth_idx.squeeze_(0)
+    best_truth_overlap.squeeze_(0)
+    best_prior_idx.squeeze_(1)
+    best_prior_idx_filter.squeeze_(1)
+    best_prior_overlap.squeeze_(1)
+    best_truth_overlap.index_fill_(0, best_prior_idx_filter, 2)  # ensure best prior
+    # TODO refactor: index  best_prior_idx with long tensor
+    # ensure every gt matches with its prior of max overlap
+    for j in range(best_prior_idx.size(0)):
+        best_truth_idx[best_prior_idx[j]] = j
+    matches = truths[best_truth_idx]          # Shape: [num_priors,4]
+    conf = labels[best_truth_idx]          # Shape: [num_priors]
+    conf[best_truth_overlap < threshold] = 0  # label as background
+    loc = encode(matches, priors, variances)
+    loc_t[idx] = loc    # [num_priors,4] encoded offsets to learn
+    conf_t[idx] = conf  # [num_priors] top class label for each prior
+def encode(matched, priors, variances):
+    """Encode the variances from the priorbox layers into the ground truth boxes
+    we have matched (based on jaccard overlap) with the prior boxes.
+    Args:
+        matched: (tensor) Coords of ground truth for each prior in point-form
+            Shape: [num_priors, 4].
+        priors: (tensor) Prior boxes in center-offset form
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        encoded boxes (tensor), Shape: [num_priors, 4]
+    """
+    # dist b/t match center and prior's center
+    g_cxcy = (matched[:, :2] + matched[:, 2:])/2 - priors[:, :2]
+    # encode variance
+    g_cxcy /= (variances[0] * priors[:, 2:])
+    # match wh / prior wh
+    g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
+    g_wh = torch.log(g_wh) / variances[1]
+    # return target for smooth_l1_loss
+    return torch.cat([g_cxcy, g_wh], 1)  # [num_priors,4]
+# Adapted from https://github.com/Hakuyume/chainer-ssd
+def decode(loc, priors, variances):
+    """Decode locations from predictions using priors to undo
+    the encoding we did for offset regression at train time.
+    Args:
+        loc (tensor): location predictions for loc layers,
+            Shape: [num_priors,4]
+        priors (tensor): Prior boxes in center-offset form.
+            Shape: [num_priors,4].
+        variances: (list[float]) Variances of priorboxes
+    Return:
+        decoded bounding box predictions
+    """
+    boxes = torch.cat((
+        priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
+        priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
+    boxes[:, :2] -= boxes[:, 2:] / 2
+    boxes[:, 2:] += boxes[:, :2]
+    return boxes
+def log_sum_exp(x):
+    """Utility function for computing log_sum_exp while determining
+    This will be used to determine unaveraged confidence loss across
+    all examples in a batch.
+    Args:
+        x (Variable(tensor)): conf_preds from conf layers
+    """
+    x_max = x.data.max()
+    return torch.log(torch.sum(torch.exp(x-x_max), 1, keepdim=True)) + x_max
+# Original author: Francisco Massa:
+# https://github.com/fmassa/object-detection.torch
+# Ported to PyTorch by Max deGroot (02/01/2017)
+def nms(boxes, scores, overlap=0.5, top_k=200):
+    """Apply non-maximum suppression at test time to avoid detecting too many
+    overlapping bounding boxes for a given object.
+    Args:
+        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
+        scores: (tensor) The class predscores for the img, Shape:[num_priors].
+        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
+        top_k: (int) The Maximum number of box preds to consider.
+    Return:
+        The indices of the kept boxes with respect to num_priors.
+    """
+    keep = torch.Tensor(scores.size(0)).fill_(0).long()
+    if boxes.numel() == 0:
+        return keep
+    x1 = boxes[:, 0]
+    y1 = boxes[:, 1]
+    x2 = boxes[:, 2]
+    y2 = boxes[:, 3]
+    area = torch.mul(x2 - x1, y2 - y1)
+    v, idx = scores.sort(0)  # sort in ascending order
+    # I = I[v >= 0.01]
+    idx = idx[-top_k:]  # indices of the top-k largest vals
+    xx1 = boxes.new()
+    yy1 = boxes.new()
+    xx2 = boxes.new()
+    yy2 = boxes.new()
+    w = boxes.new()
+    h = boxes.new()
+    # keep = torch.Tensor()
+    count = 0
+    while idx.numel() > 0:
+        i = idx[-1]  # index of current largest val
+        # keep.append(i)
+        keep[count] = i
+        count += 1
+        if idx.size(0) == 1:
+            break
+        idx = idx[:-1]  # remove kept element from view
+        # load bboxes of next highest vals
+        torch.index_select(x1, 0, idx, out=xx1)
+        torch.index_select(y1, 0, idx, out=yy1)
+        torch.index_select(x2, 0, idx, out=xx2)
+        torch.index_select(y2, 0, idx, out=yy2)
+        # store element-wise max with next highest score
+        xx1 = torch.clamp(xx1, min=x1[i])
+        yy1 = torch.clamp(yy1, min=y1[i])
+        xx2 = torch.clamp(xx2, max=x2[i])
+        yy2 = torch.clamp(yy2, max=y2[i])
+        w.resize_as_(xx2)
+        h.resize_as_(yy2)
+        w = xx2 - xx1
+        h = yy2 - yy1
+        # check sizes of xx1 and xx2.. after each iteration
+        w = torch.clamp(w, min=0.0)
+        h = torch.clamp(h, min=0.0)
+        inter = w*h
+        # IoU = i / (area(a) + area(b) - i)
+        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
+        union = (rem_areas - inter) + area[i]
+        IoU = inter/union  # store result in iou
+        # keep only elements with an IoU <= overlap
+        idx = idx[IoU.le(overlap)]
+    return keep, count

external/landmark_detection/FaceBoxesV2/utils/build.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# coding: utf-8
+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import os
+from os.path import join as pjoin
+import numpy as np
+from distutils.core import setup
+from distutils.extension import Extension
+from Cython.Distutils import build_ext
+def find_in_path(name, path):
+    "Find a file in a search path"
+    # adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
+    for dir in path.split(os.pathsep):
+        binpath = pjoin(dir, name)
+        if os.path.exists(binpath):
+            return os.path.abspath(binpath)
+    return None
+# Obtain the numpy include directory.  This logic works across numpy versions.
+try:
+    numpy_include = np.get_include()
+except AttributeError:
+    numpy_include = np.get_numpy_include()
+# run the customize_compiler
+class custom_build_ext(build_ext):
+    def build_extensions(self):
+        # customize_compiler_for_nvcc(self.compiler)
+        build_ext.build_extensions(self)
+ext_modules = [
+    Extension(
+        "nms.cpu_nms",
+        ["nms/cpu_nms.pyx"],
+        # extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
+        extra_compile_args=["-Wno-cpp", "-Wno-unused-function"],
+        include_dirs=[numpy_include]
+    )
+]
+setup(
+    name='mot_utils',
+    ext_modules=ext_modules,
+    # inject our custom trigger
+    cmdclass={'build_ext': custom_build_ext},
+)

external/landmark_detection/FaceBoxesV2/utils/config.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# config.py
+cfg = {
+    'name': 'FaceBoxes',
+    #'min_dim': 1024,
+    #'feature_maps': [[32, 32], [16, 16], [8, 8]],
+    # 'aspect_ratios': [[1], [1], [1]],
+    'min_sizes': [[32, 64, 128], [256], [512]],
+    'steps': [32, 64, 128],
+    'variance': [0.1, 0.2],
+    'clip': False,
+    'loc_weight': 2.0,
+    'gpu_train': True
+}

external/landmark_detection/FaceBoxesV2/utils/faceboxes.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class BasicConv2d(nn.Module):
+    def __init__(self, in_channels, out_channels, **kwargs):
+        super(BasicConv2d, self).__init__()
+        self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+        self.bn = nn.BatchNorm2d(out_channels, eps=1e-5)
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return F.relu(x, inplace=True)
+class Inception(nn.Module):
+  def __init__(self):
+    super(Inception, self).__init__()
+    self.branch1x1 = BasicConv2d(128, 32, kernel_size=1, padding=0)
+    self.branch1x1_2 = BasicConv2d(128, 32, kernel_size=1, padding=0)
+    self.branch3x3_reduce = BasicConv2d(128, 24, kernel_size=1, padding=0)
+    self.branch3x3 = BasicConv2d(24, 32, kernel_size=3, padding=1)
+    self.branch3x3_reduce_2 = BasicConv2d(128, 24, kernel_size=1, padding=0)
+    self.branch3x3_2 = BasicConv2d(24, 32, kernel_size=3, padding=1)
+    self.branch3x3_3 = BasicConv2d(32, 32, kernel_size=3, padding=1)
+  def forward(self, x):
+    branch1x1 = self.branch1x1(x)
+    branch1x1_pool = F.avg_pool2d(x, kernel_size=3, stride=1, padding=1)
+    branch1x1_2 = self.branch1x1_2(branch1x1_pool)
+    branch3x3_reduce = self.branch3x3_reduce(x)
+    branch3x3 = self.branch3x3(branch3x3_reduce)
+    branch3x3_reduce_2 = self.branch3x3_reduce_2(x)
+    branch3x3_2 = self.branch3x3_2(branch3x3_reduce_2)
+    branch3x3_3 = self.branch3x3_3(branch3x3_2)
+    outputs = [branch1x1, branch1x1_2, branch3x3, branch3x3_3]
+    return torch.cat(outputs, 1)
+class CRelu(nn.Module):
+  def __init__(self, in_channels, out_channels, **kwargs):
+    super(CRelu, self).__init__()
+    self.conv = nn.Conv2d(in_channels, out_channels, bias=False, **kwargs)
+    self.bn = nn.BatchNorm2d(out_channels, eps=1e-5)
+  def forward(self, x):
+    x = self.conv(x)
+    x = self.bn(x)
+    x = torch.cat([x, -x], 1)
+    x = F.relu(x, inplace=True)
+    return x
+class FaceBoxes(nn.Module):
+  def __init__(self, phase, size, num_classes):
+    super(FaceBoxes, self).__init__()
+    self.phase = phase
+    self.num_classes = num_classes
+    self.size = size
+    self.conv1 = CRelu(3, 24, kernel_size=7, stride=4, padding=3)
+    self.conv2 = CRelu(48, 64, kernel_size=5, stride=2, padding=2)
+    self.inception1 = Inception()
+    self.inception2 = Inception()
+    self.inception3 = Inception()
+    self.conv3_1 = BasicConv2d(128, 128, kernel_size=1, stride=1, padding=0)
+    self.conv3_2 = BasicConv2d(128, 256, kernel_size=3, stride=2, padding=1)
+    self.conv4_1 = BasicConv2d(256, 128, kernel_size=1, stride=1, padding=0)
+    self.conv4_2 = BasicConv2d(128, 256, kernel_size=3, stride=2, padding=1)
+    self.loc, self.conf = self.multibox(self.num_classes)
+    if self.phase == 'test':
+        self.softmax = nn.Softmax(dim=-1)
+    if self.phase == 'train':
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m.bias is not None:
+                    nn.init.xavier_normal_(m.weight.data)
+                    m.bias.data.fill_(0.02)
+                else:
+                    m.weight.data.normal_(0, 0.01)
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+  def multibox(self, num_classes):
+    loc_layers = []
+    conf_layers = []
+    loc_layers += [nn.Conv2d(128, 21 * 4, kernel_size=3, padding=1)]
+    conf_layers += [nn.Conv2d(128, 21 * num_classes, kernel_size=3, padding=1)]
+    loc_layers += [nn.Conv2d(256, 1 * 4, kernel_size=3, padding=1)]
+    conf_layers += [nn.Conv2d(256, 1 * num_classes, kernel_size=3, padding=1)]
+    loc_layers += [nn.Conv2d(256, 1 * 4, kernel_size=3, padding=1)]
+    conf_layers += [nn.Conv2d(256, 1 * num_classes, kernel_size=3, padding=1)]
+    return nn.Sequential(*loc_layers), nn.Sequential(*conf_layers)
+  def forward(self, x):
+    detection_sources = list()
+    loc = list()
+    conf = list()
+    x = self.conv1(x)
+    x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+    x = self.conv2(x)
+    x = F.max_pool2d(x, kernel_size=3, stride=2, padding=1)
+    x = self.inception1(x)
+    x = self.inception2(x)
+    x = self.inception3(x)
+    detection_sources.append(x)
+    x = self.conv3_1(x)
+    x = self.conv3_2(x)
+    detection_sources.append(x)
+    x = self.conv4_1(x)
+    x = self.conv4_2(x)
+    detection_sources.append(x)
+    for (x, l, c) in zip(detection_sources, self.loc, self.conf):
+        loc.append(l(x).permute(0, 2, 3, 1).contiguous())
+        conf.append(c(x).permute(0, 2, 3, 1).contiguous())
+    loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
+    conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
+    if self.phase == "test":
+      output = (loc.view(loc.size(0), -1, 4),
+                self.softmax(conf.view(conf.size(0), -1, self.num_classes)))
+    else:
+      output = (loc.view(loc.size(0), -1, 4),
+                conf.view(conf.size(0), -1, self.num_classes))
+    return output
+class FaceBoxesV2(nn.Module):
+  def __init__(self, phase, size, num_classes):
+    super(FaceBoxesV2, self).__init__()
+    self.phase = phase
+    self.num_classes = num_classes
+    self.size = size
+    self.conv1 = BasicConv2d(3, 8, kernel_size=3, stride=2, padding=1)
+    self.conv2 = BasicConv2d(8, 16, kernel_size=3, stride=2, padding=1)
+    self.conv3 = BasicConv2d(16, 32, kernel_size=3, stride=2, padding=1)
+    self.conv4 = BasicConv2d(32, 64, kernel_size=3, stride=2, padding=1)
+    self.conv5 = BasicConv2d(64, 128, kernel_size=3, stride=2, padding=1)
+    self.inception1 = Inception()
+    self.inception2 = Inception()
+    self.inception3 = Inception()
+    self.conv6_1 = BasicConv2d(128, 128, kernel_size=1, stride=1, padding=0)
+    self.conv6_2 = BasicConv2d(128, 256, kernel_size=3, stride=2, padding=1)
+    self.conv7_1 = BasicConv2d(256, 128, kernel_size=1, stride=1, padding=0)
+    self.conv7_2 = BasicConv2d(128, 256, kernel_size=3, stride=2, padding=1)
+    self.loc, self.conf = self.multibox(self.num_classes)
+    if self.phase == 'test':
+        self.softmax = nn.Softmax(dim=-1)
+    if self.phase == 'train':
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                if m.bias is not None:
+                    nn.init.xavier_normal_(m.weight.data)
+                    m.bias.data.fill_(0.02)
+                else:
+                    m.weight.data.normal_(0, 0.01)
+            elif isinstance(m, nn.BatchNorm2d):
+                m.weight.data.fill_(1)
+                m.bias.data.zero_()
+  def multibox(self, num_classes):
+    loc_layers = []
+    conf_layers = []
+    loc_layers += [nn.Conv2d(128, 21 * 4, kernel_size=3, padding=1)]
+    conf_layers += [nn.Conv2d(128, 21 * num_classes, kernel_size=3, padding=1)]
+    loc_layers += [nn.Conv2d(256, 1 * 4, kernel_size=3, padding=1)]
+    conf_layers += [nn.Conv2d(256, 1 * num_classes, kernel_size=3, padding=1)]
+    loc_layers += [nn.Conv2d(256, 1 * 4, kernel_size=3, padding=1)]
+    conf_layers += [nn.Conv2d(256, 1 * num_classes, kernel_size=3, padding=1)]
+    return nn.Sequential(*loc_layers), nn.Sequential(*conf_layers)
+  def forward(self, x):
+    sources = list()
+    loc = list()
+    conf = list()
+    x = self.conv1(x)
+    x = self.conv2(x)
+    x = self.conv3(x)
+    x = self.conv4(x)
+    x = self.conv5(x)
+    x = self.inception1(x)
+    x = self.inception2(x)
+    x = self.inception3(x)
+    sources.append(x)
+    x = self.conv6_1(x)
+    x = self.conv6_2(x)
+    sources.append(x)
+    x = self.conv7_1(x)
+    x = self.conv7_2(x)
+    sources.append(x)
+    for (x, l, c) in zip(sources, self.loc, self.conf):
+        loc.append(l(x).permute(0, 2, 3, 1).contiguous())
+        conf.append(c(x).permute(0, 2, 3, 1).contiguous())
+    loc = torch.cat([o.view(o.size(0), -1) for o in loc], 1)
+    conf = torch.cat([o.view(o.size(0), -1) for o in conf], 1)
+    if self.phase == "test":
+      output = (loc.view(loc.size(0), -1, 4),
+                self.softmax(conf.view(-1, self.num_classes)))
+    else:
+      output = (loc.view(loc.size(0), -1, 4),
+                conf.view(conf.size(0), -1, self.num_classes))
+    return output

external/landmark_detection/FaceBoxesV2/utils/make.sh ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ #!/usr/bin/env bash
2	+ python3 build.py build_ext --inplace
3	+

external/landmark_detection/FaceBoxesV2/utils/nms/__init__.py ADDED Viewed

File without changes

external/landmark_detection/FaceBoxesV2/utils/nms/cpu_nms.c ADDED Viewed

The diff for this file is too large to render. See raw diff

external/landmark_detection/FaceBoxesV2/utils/nms/cpu_nms.py ADDED Viewed

File without changes

external/landmark_detection/FaceBoxesV2/utils/nms/cpu_nms.pyx ADDED Viewed

	@@ -0,0 +1,163 @@

+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import numpy as np
+cimport numpy as np
+cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
+    return a if a >= b else b
+cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
+    return a if a <= b else b
+def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
+    cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
+    cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
+    cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
+    cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
+    cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
+    cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
+    cdef int ndets = dets.shape[0]
+    cdef np.ndarray[np.int_t, ndim=1] suppressed = \
+            np.zeros((ndets), dtype=np.int)
+    # nominal indices
+    cdef int _i, _j
+    # sorted indices
+    cdef int i, j
+    # temp variables for box i's (the box currently under consideration)
+    cdef np.float32_t ix1, iy1, ix2, iy2, iarea
+    # variables for computing overlap with box j (lower scoring box)
+    cdef np.float32_t xx1, yy1, xx2, yy2
+    cdef np.float32_t w, h
+    cdef np.float32_t inter, ovr
+    keep = []
+    for _i in range(ndets):
+        i = order[_i]
+        if suppressed[i] == 1:
+            continue
+        keep.append(i)
+        ix1 = x1[i]
+        iy1 = y1[i]
+        ix2 = x2[i]
+        iy2 = y2[i]
+        iarea = areas[i]
+        for _j in range(_i + 1, ndets):
+            j = order[_j]
+            if suppressed[j] == 1:
+                continue
+            xx1 = max(ix1, x1[j])
+            yy1 = max(iy1, y1[j])
+            xx2 = min(ix2, x2[j])
+            yy2 = min(iy2, y2[j])
+            w = max(0.0, xx2 - xx1 + 1)
+            h = max(0.0, yy2 - yy1 + 1)
+            inter = w * h
+            ovr = inter / (iarea + areas[j] - inter)
+            if ovr >= thresh:
+                suppressed[j] = 1
+    return keep
+def cpu_soft_nms(np.ndarray[float, ndim=2] boxes, float sigma=0.5, float Nt=0.3, float threshold=0.001, unsigned int method=0):
+    cdef unsigned int N = boxes.shape[0]
+    cdef float iw, ih, box_area
+    cdef float ua
+    cdef int pos = 0
+    cdef float maxscore = 0
+    cdef int maxpos = 0
+    cdef float x1,x2,y1,y2,tx1,tx2,ty1,ty2,ts,area,weight,ov
+    for i in range(N):
+        maxscore = boxes[i, 4]
+        maxpos = i
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+        pos = i + 1
+	# get max box
+        while pos < N:
+            if maxscore < boxes[pos, 4]:
+                maxscore = boxes[pos, 4]
+                maxpos = pos
+            pos = pos + 1
+	# add max box as a detection
+        boxes[i,0] = boxes[maxpos,0]
+        boxes[i,1] = boxes[maxpos,1]
+        boxes[i,2] = boxes[maxpos,2]
+        boxes[i,3] = boxes[maxpos,3]
+        boxes[i,4] = boxes[maxpos,4]
+	# swap ith box with position of max box
+        boxes[maxpos,0] = tx1
+        boxes[maxpos,1] = ty1
+        boxes[maxpos,2] = tx2
+        boxes[maxpos,3] = ty2
+        boxes[maxpos,4] = ts
+        tx1 = boxes[i,0]
+        ty1 = boxes[i,1]
+        tx2 = boxes[i,2]
+        ty2 = boxes[i,3]
+        ts = boxes[i,4]
+        pos = i + 1
+	# NMS iterations, note that N changes if detection boxes fall below threshold
+        while pos < N:
+            x1 = boxes[pos, 0]
+            y1 = boxes[pos, 1]
+            x2 = boxes[pos, 2]
+            y2 = boxes[pos, 3]
+            s = boxes[pos, 4]
+            area = (x2 - x1 + 1) * (y2 - y1 + 1)
+            iw = (min(tx2, x2) - max(tx1, x1) + 1)
+            if iw > 0:
+                ih = (min(ty2, y2) - max(ty1, y1) + 1)
+                if ih > 0:
+                    ua = float((tx2 - tx1 + 1) * (ty2 - ty1 + 1) + area - iw * ih)
+                    ov = iw * ih / ua #iou between max box and detection box
+                    if method == 1: # linear
+                        if ov > Nt:
+                            weight = 1 - ov
+                        else:
+                            weight = 1
+                    elif method == 2: # gaussian
+                        weight = np.exp(-(ov * ov)/sigma)
+                    else: # original NMS
+                        if ov > Nt:
+                            weight = 0
+                        else:
+                            weight = 1
+                    boxes[pos, 4] = weight*boxes[pos, 4]
+		    # if box score falls below threshold, discard the box by swapping with last box
+		    # update N
+                    if boxes[pos, 4] < threshold:
+                        boxes[pos,0] = boxes[N-1, 0]
+                        boxes[pos,1] = boxes[N-1, 1]
+                        boxes[pos,2] = boxes[N-1, 2]
+                        boxes[pos,3] = boxes[N-1, 3]
+                        boxes[pos,4] = boxes[N-1, 4]
+                        N = N - 1
+                        pos = pos - 1
+            pos = pos + 1
+    keep = [i for i in range(N)]
+    return keep

external/landmark_detection/FaceBoxesV2/utils/nms/gpu_nms.hpp ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2	+ int boxes_dim, float nms_overlap_thresh, int device_id);

external/landmark_detection/FaceBoxesV2/utils/nms/gpu_nms.pyx ADDED Viewed

	@@ -0,0 +1,31 @@

+# --------------------------------------------------------
+# Faster R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import numpy as np
+cimport numpy as np
+assert sizeof(int) == sizeof(np.int32_t)
+cdef extern from "gpu_nms.hpp":
+    void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
+def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
+            np.int32_t device_id=0):
+    cdef int boxes_num = dets.shape[0]
+    cdef int boxes_dim = dets.shape[1]
+    cdef int num_out
+    cdef np.ndarray[np.int32_t, ndim=1] \
+        keep = np.zeros(boxes_num, dtype=np.int32)
+    cdef np.ndarray[np.float32_t, ndim=1] \
+        scores = dets[:, 4]
+    cdef np.ndarray[np.int_t, ndim=1] \
+        order = scores.argsort()[::-1]
+    cdef np.ndarray[np.float32_t, ndim=2] \
+        sorted_dets = dets[order, :]
+    _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
+    keep = keep[:num_out]
+    return list(order[keep])

external/landmark_detection/FaceBoxesV2/utils/nms/nms_kernel.cu ADDED Viewed

	@@ -0,0 +1,144 @@

+// ------------------------------------------------------------------
+// Faster R-CNN
+// Copyright (c) 2015 Microsoft
+// Licensed under The MIT License [see fast-rcnn/LICENSE for details]
+// Written by Shaoqing Ren
+// ------------------------------------------------------------------
+#include "gpu_nms.hpp"
+#include <vector>
+#include <iostream>
+#define CUDA_CHECK(condition) \
+  /* Code block avoids redefinition of cudaError_t error */ \
+  do { \
+    cudaError_t error = condition; \
+    if (error != cudaSuccess) { \
+      std::cout << cudaGetErrorString(error) << std::endl; \
+    } \
+  } while (0)
+#define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
+int const threadsPerBlock = sizeof(unsigned long long) * 8;
+__device__ inline float devIoU(float const * const a, float const * const b) {
+  float left = max(a[0], b[0]), right = min(a[2], b[2]);
+  float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
+  float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
+  float interS = width * height;
+  float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
+  float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
+  return interS / (Sa + Sb - interS);
+}
+__global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
+                           const float *dev_boxes, unsigned long long *dev_mask) {
+  const int row_start = blockIdx.y;
+  const int col_start = blockIdx.x;
+  // if (row_start > col_start) return;
+  const int row_size =
+        min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
+  const int col_size =
+        min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
+  __shared__ float block_boxes[threadsPerBlock * 5];
+  if (threadIdx.x < col_size) {
+    block_boxes[threadIdx.x * 5 + 0] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
+    block_boxes[threadIdx.x * 5 + 1] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
+    block_boxes[threadIdx.x * 5 + 2] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
+    block_boxes[threadIdx.x * 5 + 3] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
+    block_boxes[threadIdx.x * 5 + 4] =
+        dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
+  }
+  __syncthreads();
+  if (threadIdx.x < row_size) {
+    const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
+    const float *cur_box = dev_boxes + cur_box_idx * 5;
+    int i = 0;
+    unsigned long long t = 0;
+    int start = 0;
+    if (row_start == col_start) {
+      start = threadIdx.x + 1;
+    }
+    for (i = start; i < col_size; i++) {
+      if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
+        t |= 1ULL << i;
+      }
+    }
+    const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
+    dev_mask[cur_box_idx * col_blocks + col_start] = t;
+  }
+}
+void _set_device(int device_id) {
+  int current_device;
+  CUDA_CHECK(cudaGetDevice(&current_device));
+  if (current_device == device_id) {
+    return;
+  }
+  // The call to cudaSetDevice must come before any calls to Get, which
+  // may perform initialization using the GPU.
+  CUDA_CHECK(cudaSetDevice(device_id));
+}
+void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
+          int boxes_dim, float nms_overlap_thresh, int device_id) {
+  _set_device(device_id);
+  float* boxes_dev = NULL;
+  unsigned long long* mask_dev = NULL;
+  const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
+  CUDA_CHECK(cudaMalloc(&boxes_dev,
+                        boxes_num * boxes_dim * sizeof(float)));
+  CUDA_CHECK(cudaMemcpy(boxes_dev,
+                        boxes_host,
+                        boxes_num * boxes_dim * sizeof(float),
+                        cudaMemcpyHostToDevice));
+  CUDA_CHECK(cudaMalloc(&mask_dev,
+                        boxes_num * col_blocks * sizeof(unsigned long long)));
+  dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
+              DIVUP(boxes_num, threadsPerBlock));
+  dim3 threads(threadsPerBlock);
+  nms_kernel<<<blocks, threads>>>(boxes_num,
+                                  nms_overlap_thresh,
+                                  boxes_dev,
+                                  mask_dev);
+  std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
+  CUDA_CHECK(cudaMemcpy(&mask_host[0],
+                        mask_dev,
+                        sizeof(unsigned long long) * boxes_num * col_blocks,
+                        cudaMemcpyDeviceToHost));
+  std::vector<unsigned long long> remv(col_blocks);
+  memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
+  int num_to_keep = 0;
+  for (int i = 0; i < boxes_num; i++) {
+    int nblock = i / threadsPerBlock;
+    int inblock = i % threadsPerBlock;
+    if (!(remv[nblock] & (1ULL << inblock))) {
+      keep_out[num_to_keep++] = i;
+      unsigned long long *p = &mask_host[0] + i * col_blocks;
+      for (int j = nblock; j < col_blocks; j++) {
+        remv[j] |= p[j];
+      }
+    }
+  }
+  *num_out = num_to_keep;
+  CUDA_CHECK(cudaFree(boxes_dev));
+  CUDA_CHECK(cudaFree(mask_dev));
+}

external/landmark_detection/FaceBoxesV2/utils/nms/py_cpu_nms.py ADDED Viewed

	@@ -0,0 +1,38 @@

+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import numpy as np
+def py_cpu_nms(dets, thresh):
+    """Pure Python NMS baseline."""
+    x1 = dets[:, 0]
+    y1 = dets[:, 1]
+    x2 = dets[:, 2]
+    y2 = dets[:, 3]
+    scores = dets[:, 4]
+    areas = (x2 - x1 + 1) * (y2 - y1 + 1)
+    order = scores.argsort()[::-1]
+    keep = []
+    while order.size > 0:
+        i = order[0]
+        keep.append(i)
+        xx1 = np.maximum(x1[i], x1[order[1:]])
+        yy1 = np.maximum(y1[i], y1[order[1:]])
+        xx2 = np.minimum(x2[i], x2[order[1:]])
+        yy2 = np.minimum(y2[i], y2[order[1:]])
+        w = np.maximum(0.0, xx2 - xx1 + 1)
+        h = np.maximum(0.0, yy2 - yy1 + 1)
+        inter = w * h
+        ovr = inter / (areas[i] + areas[order[1:]] - inter)
+        inds = np.where(ovr <= thresh)[0]
+        order = order[inds + 1]
+    return keep

external/landmark_detection/FaceBoxesV2/utils/nms_wrapper.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+from .nms.cpu_nms import cpu_nms, cpu_soft_nms
+def nms(dets, thresh):
+    """Dispatch to either CPU or GPU NMS implementations."""
+    if dets.shape[0] == 0:
+        return []
+    return cpu_nms(dets, thresh)

external/landmark_detection/FaceBoxesV2/utils/prior_box.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import torch
+from itertools import product as product
+import numpy as np
+from math import ceil
+class PriorBox(object):
+    def __init__(self, cfg, image_size=None, phase='train'):
+        super(PriorBox, self).__init__()
+        #self.aspect_ratios = cfg['aspect_ratios']
+        self.min_sizes = cfg['min_sizes']
+        self.steps = cfg['steps']
+        self.clip = cfg['clip']
+        self.image_size = image_size
+        self.feature_maps = [[ceil(self.image_size[0]/step), ceil(self.image_size[1]/step)] for step in self.steps]
+    def forward(self):
+        anchors = []
+        for k, f in enumerate(self.feature_maps):
+            min_sizes = self.min_sizes[k]
+            for i, j in product(range(f[0]), range(f[1])):
+                for min_size in min_sizes:
+                    s_kx = min_size / self.image_size[1]
+                    s_ky = min_size / self.image_size[0]
+                    if min_size == 32:
+                        dense_cx = [x*self.steps[k]/self.image_size[1] for x in [j+0, j+0.25, j+0.5, j+0.75]]
+                        dense_cy = [y*self.steps[k]/self.image_size[0] for y in [i+0, i+0.25, i+0.5, i+0.75]]
+                        for cy, cx in product(dense_cy, dense_cx):
+                            anchors += [cx, cy, s_kx, s_ky]
+                    elif min_size == 64:
+                        dense_cx = [x*self.steps[k]/self.image_size[1] for x in [j+0, j+0.5]]
+                        dense_cy = [y*self.steps[k]/self.image_size[0] for y in [i+0, i+0.5]]
+                        for cy, cx in product(dense_cy, dense_cx):
+                            anchors += [cx, cy, s_kx, s_ky]
+                    else:
+                        cx = (j + 0.5) * self.steps[k] / self.image_size[1]
+                        cy = (i + 0.5) * self.steps[k] / self.image_size[0]
+                        anchors += [cx, cy, s_kx, s_ky]
+        # back to torch land
+        output = torch.Tensor(anchors).view(-1, 4)
+        if self.clip:
+            output.clamp_(max=1, min=0)
+        return output

external/landmark_detection/FaceBoxesV2/utils/timer.py ADDED Viewed

	@@ -0,0 +1,40 @@

+# --------------------------------------------------------
+# Fast R-CNN
+# Copyright (c) 2015 Microsoft
+# Licensed under The MIT License [see LICENSE for details]
+# Written by Ross Girshick
+# --------------------------------------------------------
+import time
+class Timer(object):
+    """A simple timer."""
+    def __init__(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.
+    def tic(self):
+        # using time.time instead of time.clock because time time.clock
+        # does not normalize for multithreading
+        self.start_time = time.time()
+    def toc(self, average=True):
+        self.diff = time.time() - self.start_time
+        self.total_time += self.diff
+        self.calls += 1
+        self.average_time = self.total_time / self.calls
+        if average:
+            return self.average_time
+        else:
+            return self.diff
+    def clear(self):
+        self.total_time = 0.
+        self.calls = 0
+        self.start_time = 0.
+        self.diff = 0.
+        self.average_time = 0.

external/landmark_detection/README.md ADDED Viewed

	@@ -0,0 +1,110 @@

+# STAR Loss: Reducing Semantic Ambiguity in Facial Landmark Detection.
+Paper Link: [arxiv](https://arxiv.org/abs/2306.02763) | [CVPR 2023](https://openaccess.thecvf.com/content/CVPR2023/papers/Zhou_STAR_Loss_Reducing_Semantic_Ambiguity_in_Facial_Landmark_Detection_CVPR_2023_paper.pdf)
+- Pytorch implementation of **S**elf-adap**T**ive **A**mbiguity **R**eduction (**STAR**) loss.
+- STAR loss is a self-adaptive anisotropic direction loss, which can be used in heatmap regression-based methods for facial landmark detection.
+- Specifically, we find that semantic ambiguity results in the anisotropic predicted distribution, which inspires us to use predicted distribution to represent semantic ambiguity. So, we use PCA to indicate the character of the predicted distribution and indirectly formulate the direction and intensity of semantic ambiguity. Based on this, STAR loss adaptively suppresses the prediction error in the ambiguity direction to mitigate the impact of ambiguity annotation in training. More details can be found in our paper.
+<p align="center">
+    <img src="./images/framework.png" width="80%">
+</p>
+## Dependencies
+* python==3.7.3
+* PyTorch=1.6.0
+* requirements.txt
+## Dataset Preparation
+ - Step1: Download the raw images from [COFW](http://www.vision.caltech.edu/xpburgos/ICCV13/#dataset), [300W](https://ibug.doc.ic.ac.uk/resources/300-W/), and [WFLW](https://wywu.github.io/projects/LAB/WFLW.html).
+ - Step2: We follow the data preprocess in [ADNet](https://openaccess.thecvf.com/content/ICCV2021/papers/Huang_ADNet_Leveraging_Error-Bias_Towards_Normal_Direction_in_Face_Alignment_ICCV_2021_paper.pdf), and the metadata can be download from [the corresponding repository](https://github.com/huangyangyu/ADNet).
+ - Step3: Make them look like this:
+```script
+# the dataset directory:
+|-- ${image_dir}
+   |-- WFLW
+      | -- WFLW_images
+   |-- 300W
+      | -- afw
+      | -- helen
+      | -- ibug
+      | -- lfpw
+   |-- COFW
+      | -- train
+      | -- test
+|-- ${annot_dir}
+   |-- WFLW
+      |-- train.tsv, test.tsv
+   |-- 300W
+      |-- train.tsv, test.tsv
+   |--COFW
+      |-- train.tsv, test.tsv
+```
+## Usage
+* Work directory: set the ${ckpt_dir} in ./conf/alignment.py.
+* Pretrained model:
+| Dataset                                                          | Model                                                                                                                                                               |
+|:-----------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| WFLW                                                             | [google](https://drive.google.com/file/d/1aOx0wYEZUfBndYy_8IYszLPG_D2fhxrT/view?usp=sharing) / [baidu](https://pan.baidu.com/s/10vvI-ovs3x9NrdmpnXK6sg?pwd=u0yu)    |
+| 300W                                                             | [google](https://drive.google.com/file/d/1Fiu3hjjkQRdKsWE9IgyNPdiJSz9_MzA5/view?usp=sharing) / [baidu](https://pan.baidu.com/s/1bjUhLq1zS1XSl1nX78fU7A?pwd=yb2s)    |
+| COFW                                                             | [google](https://drive.google.com/file/d/1NFcZ9jzql_jnn3ulaSzUlyhS05HWB9n_/view?usp=drive_link) / [baidu](https://pan.baidu.com/s/1XO6hDZ8siJLTgFcpyu1Tzw?pwd=m57n) |
+### Training
+```shell
+python main.py --mode=train --device_ids=0,1,2,3 \
+               --image_dir=${image_dir} --annot_dir=${annot_dir} \
+               --data_definition={WFLW, 300W, COFW}
+```
+### Testing
+```shell
+python main.py --mode=test --device_ids=0 \
+               --image_dir=${image_dir} --annot_dir=${annot_dir} \
+               --data_definition={WFLW, 300W, COFW} \
+               --pretrained_weight=${model_path} \
+```
+### Evaluation
+```shell
+python evaluate.py --device_ids=0 \
+                   --model_path=${model_path} --metadata_path=${metadata_path} \
+                   --image_dir=${image_dir} --data_definition={WFLW, 300W, COFW} \
+```
+To test on your own image, the following code could be considered:
+```shell
+python demo.py
+```
+## Results
+The models trained by STAR Loss achieved **SOTA** performance in all of COFW, 300W and WFLW datasets.
+<p align="center">
+    <img src="./images/results.png" width="80%">
+</p>
+## BibTeX Citation
+Please consider citing our papers in your publications if the project helps your research. BibTeX reference is as follows.
+```
+@inproceedings{Zhou_2023_CVPR,
+    author    = {Zhou, Zhenglin and Li, Huaxia and Liu, Hong and Wang, Nanyang and Yu, Gang and Ji, Rongrong},
+    title     = {STAR Loss: Reducing Semantic Ambiguity in Facial Landmark Detection},
+    booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+    month     = {June},
+    year      = {2023},
+    pages     = {15475-15484}
+}
+```
+## Acknowledgments
+This repository is built on top of [ADNet](https://github.com/huangyangyu/ADNet).
+Thanks for this strong baseline.

external/landmark_detection/conf/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .alignment import Alignment

external/landmark_detection/conf/alignment.py ADDED Viewed

	@@ -0,0 +1,239 @@

+import os.path as osp
+from .base import Base
+class Alignment(Base):
+    """
+    Alignment configure file, which contains training parameters of alignment.
+    """
+    def __init__(self, args):
+        super(Alignment, self).__init__('alignment')
+        self.ckpt_dir = '/mnt/workspace/humanAIGC/project/STAR/weights'
+        self.net = "stackedHGnet_v1"
+        self.nstack = 4
+        self.loader_type = "alignment"
+        self.data_definition = "300W"  # COFW, 300W, WFLW
+        self.test_file = "test.tsv"
+        # image
+        self.channels = 3
+        self.width = 256
+        self.height = 256
+        self.means = (127.5, 127.5, 127.5)
+        self.scale = 1 / 127.5
+        self.aug_prob = 1.0
+        self.display_iteration = 10
+        self.val_epoch = 1
+        self.valset = "test.tsv"
+        self.norm_type = 'default'
+        self.encoder_type = 'default'
+        self.decoder_type = 'default'
+        # scheduler & optimizer
+        self.milestones = [200, 350, 450]
+        self.max_epoch = 260
+        self.optimizer = "adam"
+        self.learn_rate = 0.001
+        self.weight_decay = 0.00001
+        self.betas = [0.9, 0.999]
+        self.gamma = 0.1
+        # batch_size & workers
+        self.batch_size = 32
+        self.train_num_workers = 16
+        self.val_batch_size = 32
+        self.val_num_workers = 16
+        self.test_batch_size = 16
+        self.test_num_workers = 0
+        # tricks
+        self.ema = True
+        self.add_coord = True
+        self.use_AAM = True
+        # loss
+        self.loss_func = "STARLoss_v2"
+        # STAR Loss paras
+        self.star_w = 1
+        self.star_dist = 'smoothl1'
+        self.init_from_args(args)
+        # COFW
+        if self.data_definition == "COFW":
+            self.edge_info = (
+                (True, (0, 4, 2, 5)),  # RightEyebrow
+                (True, (1, 6, 3, 7)),  # LeftEyebrow
+                (True, (8, 12, 10, 13)),  # RightEye
+                (False, (9, 14, 11, 15)),  # LeftEye
+                (True, (18, 20, 19, 21)),  # Nose
+                (True, (22, 26, 23, 27)),  # LowerLip
+                (True, (22, 24, 23, 25)),  # UpperLip
+            )
+            if self.norm_type == 'ocular':
+                self.nme_left_index = 8  # ocular
+                self.nme_right_index = 9  # ocular
+            elif self.norm_type in ['pupil', 'default']:
+                self.nme_left_index = 16  # pupil
+                self.nme_right_index = 17  # pupil
+            else:
+                raise NotImplementedError
+            self.classes_num = [29, 7, 29]
+            self.crop_op = True
+            self.flip_mapping = (
+                [0, 1], [4, 6], [2, 3], [5, 7], [8, 9], [10, 11], [12, 14], [16, 17], [13, 15], [18, 19], [22, 23],
+            )
+            self.image_dir = osp.join(self.image_dir, 'COFW')
+        # 300W
+        elif self.data_definition == "300W":
+            self.edge_info = (
+                (False, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)),  # FaceContour
+                (False, (17, 18, 19, 20, 21)),  # RightEyebrow
+                (False, (22, 23, 24, 25, 26)),  # LeftEyebrow
+                (False, (27, 28, 29, 30)),  # NoseLine
+                (False, (31, 32, 33, 34, 35)),  # Nose
+                (True, (36, 37, 38, 39, 40, 41)),  # RightEye
+                (True, (42, 43, 44, 45, 46, 47)),  # LeftEye
+                (True, (48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)),  # OuterLip
+                (True, (60, 61, 62, 63, 64, 65, 66, 67)),  # InnerLip
+            )
+            if self.norm_type in ['ocular', 'default']:
+                self.nme_left_index = 36  # ocular
+                self.nme_right_index = 45  # ocular
+            elif self.norm_type == 'pupil':
+                self.nme_left_index = [36, 37, 38, 39, 40, 41]  # pupil
+                self.nme_right_index = [42, 43, 44, 45, 46, 47]  # pupil
+            else:
+                raise NotImplementedError
+            self.classes_num = [68, 9, 68]
+            self.crop_op = True
+            self.flip_mapping = (
+                [0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11], [6, 10], [7, 9],
+                [17, 26], [18, 25], [19, 24], [20, 23], [21, 22],
+                [31, 35], [32, 34],
+                [36, 45], [37, 44], [38, 43], [39, 42], [40, 47], [41, 46],
+                [48, 54], [49, 53], [50, 52], [61, 63], [60, 64], [67, 65], [58, 56], [59, 55],
+            )
+            self.image_dir = osp.join(self.image_dir, '300W')
+            # self.image_dir = osp.join(self.image_dir, '300VW_images')
+        # 300VW
+        elif self.data_definition == "300VW":
+            self.edge_info = (
+                (False, (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)),  # FaceContour
+                (False, (17, 18, 19, 20, 21)),  # RightEyebrow
+                (False, (22, 23, 24, 25, 26)),  # LeftEyebrow
+                (False, (27, 28, 29, 30)),  # NoseLine
+                (False, (31, 32, 33, 34, 35)),  # Nose
+                (True, (36, 37, 38, 39, 40, 41)),  # RightEye
+                (True, (42, 43, 44, 45, 46, 47)),  # LeftEye
+                (True, (48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59)),  # OuterLip
+                (True, (60, 61, 62, 63, 64, 65, 66, 67)),  # InnerLip
+            )
+            if self.norm_type in ['ocular', 'default']:
+                self.nme_left_index = 36  # ocular
+                self.nme_right_index = 45  # ocular
+            elif self.norm_type == 'pupil':
+                self.nme_left_index = [36, 37, 38, 39, 40, 41]  # pupil
+                self.nme_right_index = [42, 43, 44, 45, 46, 47]  # pupil
+            else:
+                raise NotImplementedError
+            self.classes_num = [68, 9, 68]
+            self.crop_op = True
+            self.flip_mapping = (
+                [0, 16], [1, 15], [2, 14], [3, 13], [4, 12], [5, 11], [6, 10], [7, 9],
+                [17, 26], [18, 25], [19, 24], [20, 23], [21, 22],
+                [31, 35], [32, 34],
+                [36, 45], [37, 44], [38, 43], [39, 42], [40, 47], [41, 46],
+                [48, 54], [49, 53], [50, 52], [61, 63], [60, 64], [67, 65], [58, 56], [59, 55],
+            )
+            self.image_dir = osp.join(self.image_dir, '300VW_Dataset_2015_12_14')
+        # WFLW
+        elif self.data_definition == "WFLW":
+            self.edge_info = (
+                (False, (
+                    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+                    27,
+                    28, 29, 30, 31, 32)),  # FaceContour
+                (True, (33, 34, 35, 36, 37, 38, 39, 40, 41)),  # RightEyebrow
+                (True, (42, 43, 44, 45, 46, 47, 48, 49, 50)),  # LeftEyebrow
+                (False, (51, 52, 53, 54)),  # NoseLine
+                (False, (55, 56, 57, 58, 59)),  # Nose
+                (True, (60, 61, 62, 63, 64, 65, 66, 67)),  # RightEye
+                (True, (68, 69, 70, 71, 72, 73, 74, 75)),  # LeftEye
+                (True, (76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87)),  # OuterLip
+                (True, (88, 89, 90, 91, 92, 93, 94, 95)),  # InnerLip
+            )
+            if self.norm_type in ['ocular', 'default']:
+                self.nme_left_index = 60  # ocular
+                self.nme_right_index = 72  # ocular
+            elif self.norm_type == 'pupil':
+                self.nme_left_index = 96  # pupils
+                self.nme_right_index = 97  # pupils
+            else:
+                raise NotImplementedError
+            self.classes_num = [98, 9, 98]
+            self.crop_op = True
+            self.flip_mapping = (
+                [0, 32], [1, 31], [2, 30], [3, 29], [4, 28], [5, 27], [6, 26], [7, 25], [8, 24], [9, 23], [10, 22],
+                [11, 21], [12, 20], [13, 19], [14, 18], [15, 17],  # cheek
+                [33, 46], [34, 45], [35, 44], [36, 43], [37, 42], [38, 50], [39, 49], [40, 48], [41, 47],  # elbrow
+                [60, 72], [61, 71], [62, 70], [63, 69], [64, 68], [65, 75], [66, 74], [67, 73],
+                [55, 59], [56, 58],
+                [76, 82], [77, 81], [78, 80], [87, 83], [86, 84],
+                [88, 92], [89, 91], [95, 93], [96, 97]
+            )
+            self.image_dir = osp.join(self.image_dir, 'WFLW', 'WFLW_images')
+        self.label_num = self.nstack * 3 if self.use_AAM else self.nstack
+        self.loss_weights, self.criterions, self.metrics = [], [], []
+        for i in range(self.nstack):
+            factor = (2 ** i) / (2 ** (self.nstack - 1))
+            if self.use_AAM:
+                self.loss_weights += [factor * weight for weight in [1.0, 10.0, 10.0]]
+                self.criterions += [self.loss_func, "AWingLoss", "AWingLoss"]
+                self.metrics += ["NME", None, None]
+            else:
+                self.loss_weights += [factor * weight for weight in [1.0]]
+                self.criterions += [self.loss_func, ]
+                self.metrics += ["NME", ]
+        self.key_metric_index = (self.nstack - 1) * 3 if self.use_AAM else (self.nstack - 1)
+        # data
+        self.folder = self.get_foldername()
+        self.work_dir = osp.join(self.ckpt_dir, self.data_definition, self.folder)
+        self.model_dir = osp.join(self.work_dir, 'model')
+        self.log_dir = osp.join(self.work_dir, 'log')
+        self.train_tsv_file = osp.join(self.annot_dir, self.data_definition, "train.tsv")
+        self.train_pic_dir = self.image_dir
+        self.val_tsv_file = osp.join(self.annot_dir, self.data_definition, self.valset)
+        self.val_pic_dir = self.image_dir
+        self.test_tsv_file = osp.join(self.annot_dir, self.data_definition, self.test_file)
+        self.test_pic_dir = self.image_dir
+        # self.train_tsv_file = osp.join(self.annot_dir, '300VW', "train.tsv")
+        # self.train_pic_dir = self.image_dir
+        # self.val_tsv_file = osp.join(self.annot_dir, '300VW', self.valset)
+        # self.val_pic_dir = self.image_dir
+        # self.test_tsv_file = osp.join(self.annot_dir, '300VW', self.test_file)
+        # self.test_pic_dir = self.image_dir
+    def get_foldername(self):
+        str = ''
+        str += '{}_{}x{}_{}_ep{}_lr{}_bs{}'.format(self.data_definition, self.height, self.width,
+                                                   self.optimizer, self.max_epoch, self.learn_rate, self.batch_size)
+        str += '_{}'.format(self.loss_func)
+        str += '_{}_{}'.format(self.star_dist, self.star_w) if self.loss_func == 'STARLoss' else ''
+        str += '_AAM' if self.use_AAM else ''
+        str += '_{}'.format(self.valset[:-4]) if self.valset != 'test.tsv' else ''
+        str += '_{}'.format(self.id)
+        return str

external/landmark_detection/conf/base.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import uuid
+import logging
+import os.path as osp
+from argparse import Namespace
+# from tensorboardX import SummaryWriter
+class Base:
+    """
+    Base configure file, which contains the basic training parameters and should be inherited by other attribute configure file.
+    """
+    def __init__(self, config_name, ckpt_dir='./', image_dir='./', annot_dir='./'):
+        self.type = config_name
+        self.id = str(uuid.uuid4())
+        self.note = ""
+        self.ckpt_dir = ckpt_dir
+        self.image_dir = image_dir
+        self.annot_dir = annot_dir
+        self.loader_type = "alignment"
+        self.loss_func = "STARLoss"
+        # train
+        self.batch_size = 128
+        self.val_batch_size = 1
+        self.test_batch_size = 32
+        self.channels = 3
+        self.width = 256
+        self.height = 256
+        # mean values in r, g, b channel.
+        self.means = (127, 127, 127)
+        self.scale = 0.0078125
+        self.display_iteration = 100
+        self.milestones = [50, 80]
+        self.max_epoch = 100
+        self.net = "stackedHGnet_v1"
+        self.nstack = 4
+        # ["adam", "sgd"]
+        self.optimizer = "adam"
+        self.learn_rate = 0.1
+        self.momentum = 0.01  # caffe: 0.99
+        self.weight_decay = 0.0
+        self.nesterov = False
+        self.scheduler = "MultiStepLR"
+        self.gamma = 0.1
+        self.loss_weights = [1.0]
+        self.criterions = ["SoftmaxWithLoss"]
+        self.metrics = ["Accuracy"]
+        self.key_metric_index = 0
+        self.classes_num = [1000]
+        self.label_num = len(self.classes_num)
+        # model
+        self.ema = False
+        self.use_AAM = True
+        # visualization
+        self.writer = None
+        # log file
+        self.logger = None
+    def init_instance(self):
+        # self.writer = SummaryWriter(logdir=self.log_dir, comment=self.type)
+        log_formatter = logging.Formatter("%(asctime)s %(levelname)-8s: %(message)s")
+        root_logger = logging.getLogger()
+        file_handler = logging.FileHandler(osp.join(self.log_dir, "log.txt"))
+        file_handler.setFormatter(log_formatter)
+        file_handler.setLevel(logging.NOTSET)
+        root_logger.addHandler(file_handler)
+        console_handler = logging.StreamHandler()
+        console_handler.setFormatter(log_formatter)
+        console_handler.setLevel(logging.NOTSET)
+        root_logger.addHandler(console_handler)
+        root_logger.setLevel(logging.NOTSET)
+        self.logger = root_logger
+    def __del__(self):
+        # tensorboard --logdir self.log_dir
+        if self.writer is not None:
+            # self.writer.export_scalars_to_json(self.log_dir + "visual.json")
+            self.writer.close()
+    def init_from_args(self, args: Namespace):
+        args_vars = vars(args)
+        for key, value in args_vars.items():
+            if hasattr(self, key) and value is not None:
+                setattr(self, key, value)

external/landmark_detection/config.json ADDED Viewed

	@@ -0,0 +1,15 @@

+{
+  "Token":"bpt4JPotFA6bpdknR9ZDCw",
+  "business_flag": "shadow_cv_face",
+  "model_local_file_path": "/apdcephfs_cq3/share_1134483/charlinzhou/Documents/awesome-tools/jizhi/",
+  "host_num": 1,
+  "host_gpu_num": 1,
+  "GPUName": "V100",
+  "is_elasticity": true,
+  "enable_evicted_pulled_up": true,
+  "task_name": "20230312_slpt_star_bb_init_eigen_box_align_smoothl1-1",
+  "task_flag": "20230312_slpt_star_bb_init_eigen_box_align_smoothl1-1",
+  "model_name": "20230312_slpt_star_bb_init_eigen_box_align_smoothl1-1",
+  "image_full_name": "mirrors.tencent.com/haroldzcli/py36-pytorch1.7.1-torchvision0.8.2-cuda10.1-cudnn7.6",
+  "start_cmd": "./start_slpt.sh /apdcephfs_cq3/share_1134483/charlinzhou/Documents/SLPT_Training train.py --loss_func=star --bb_init --eigen_box --dist_func=align_smoothl1"
+}

external/landmark_detection/data_processor/CheckFaceKeyPoint.py ADDED Viewed

	@@ -0,0 +1,147 @@

+import os
+import cv2
+import numpy as np
+from PIL import Image
+selected_indices_old = [
+    2311,
+    2416,
+    2437,
+    2460,
+    2495,
+    2518,
+    2520,
+    2627,
+    4285,
+    4315,
+    6223,
+    6457,
+    6597,
+    6642,
+    6974,
+    7054,
+    7064,
+    7182,
+    7303,
+    7334,
+    7351,
+    7368,
+    7374,
+    7493,
+    7503,
+    7626,
+    8443,
+    8562,
+    8597,
+    8701,
+    8817,
+    8953,
+    11213,
+    11261,
+    11317,
+    11384,
+    11600,
+    11755,
+    11852,
+    11891,
+    11945,
+    12010,
+    12354,
+    12534,
+    12736,
+    12880,
+    12892,
+    13004,
+    13323,
+    13371,
+    13534,
+    13575,
+    14874,
+    14949,
+    14977,
+    15052,
+    15076,
+    15291,
+    15620,
+    15758,
+    16309,
+    16325,
+    16348,
+    16390,
+    16489,
+    16665,
+    16891,
+    17147,
+    17183,
+    17488,
+    17549,
+    17657,
+    17932,
+    19661,
+    20162,
+    20200,
+    20238,
+    20286,
+    20432,
+    20834,
+    20954,
+    21015,
+    21036,
+    21117,
+    21299,
+    21611,
+    21632,
+    21649,
+    22722,
+    22759,
+    22873,
+    23028,
+    23033,
+    23082,
+    23187,
+    23232,
+    23302,
+    23413,
+    23430,
+    23446,
+    23457,
+    23548,
+    23636,
+    32060,
+    32245,
+]
+selected_indices = list()
+with open('/home/gyalex/Desktop/face_anno.txt', 'r') as f:
+    lines = f.readlines()
+    for line in lines:
+        hh = line.strip().split()
+        if len(hh) > 0:
+            pid = hh[0].find('.')
+            if pid != -1:
+                s = hh[0][pid+1:len(hh[0])]
+                print(s)
+                selected_indices.append(int(s))
+f.close()
+dir = '/media/gyalex/Data/face_ldk_dataset/MHC_LightingPreset_Portrait_RT_0_19/MHC_LightingPreset_Portrait_RT_seq_000015'
+for idx in range(500):
+    img = os.path.join(dir, "view_1/MHC_LightingPreset_Portrait_RT_seq_000015_FinalImage_" + str(idx).zfill(4) + ".jpeg")
+    lmd = os.path.join(dir, "mesh/mesh_screen" + str(idx+5).zfill(7) + ".npy")
+    img = cv2.imread(img)
+    # c = 511 / 2
+    # lmd = np.load(lmd) * c + c
+    # lmd[:, 1] = 511 - lmd[:, 1]
+    lmd = np.load(lmd)[selected_indices]
+    for i in range(lmd.shape[0]):
+        p = lmd[i]
+        x, y = round(float(p[0])), round(float(p[1]))
+        print(p)
+        cv2.circle(img, (x, y), 2, (0, 0, 255), -1)
+    cv2.imshow('win', img)
+    cv2.waitKey(0)

external/landmark_detection/data_processor/align.py ADDED Viewed

	@@ -0,0 +1,193 @@

+import numpy as np
+import open3d as o3d
+from scipy.spatial.transform import Rotation
+from scipy.linalg import orthogonal_procrustes
+from open3d.pipelines.registration import registration_ransac_based_on_correspondence
+def rigid_transform_3D(A, B):
+    assert A.shape == B.shape, "Input arrays must have the same shape"
+    assert A.shape[1] == 3, "Input arrays must be Nx3"
+    N = A.shape[0]  # Number of points
+    # Compute centroids of A and B
+    centroid_A = np.mean(A, axis=0)
+    centroid_B = np.mean(B, axis=0)
+    # Center the points around the centroids
+    AA = A - centroid_A
+    BB = B - centroid_B
+    # H = AA^T * BB
+    H = np.dot(AA.T, BB)
+    # Singular Value Decomposition
+    U, S, Vt = np.linalg.svd(H)
+    # Compute rotation
+    R = np.dot(Vt.T, U.T)
+    # Ensure a proper rotation (det(R) should be +1)
+    if np.linalg.det(R) < 0:
+        Vt[2, :] *= -1
+        R = np.dot(Vt.T, U.T)
+    # Compute translation
+    t = centroid_B - np.dot(R, centroid_A)
+    # Construct the transform matrix (4x4)
+    transform_matrix = np.eye(4)
+    transform_matrix[:3, :3] = R
+    transform_matrix[:3, 3] = t
+    return transform_matrix
+def compute_rigid_transform(points1, points2):
+    """
+    计算从points1到points2的刚体变换（包括尺度、旋转和平移）。
+    参数:
+    points1, points2: np.ndarray, 形状为(68, 3)的数组，分别为两组3D对应点。
+    返回:
+    scale: float, 尺度因子
+    R: np.ndarray, 3x3的旋转矩阵
+    t: np.ndarray, 3维的平移向量
+    """
+    # 中心化
+    mean1 = np.mean(points1, axis=0)
+    centered_points1 = points1 - mean1
+    mean2 = np.mean(points2, axis=0)
+    centered_points2 = points2 - mean2
+    # 使用orthogonal_procrustes计算旋转和平移
+    R, _ = orthogonal_procrustes(centered_points1, centered_points2)
+    t = mean2 - R @ mean1  # 计算平移向量
+    # 计算尺度因子
+    scale = np.mean(np.linalg.norm(centered_points2, axis=1) /
+                    np.linalg.norm(centered_points1, axis=1))
+    return scale, R, t
+def compute_rigid_transform_new(points_A, points_B):
+    # 中心化
+    center_A = np.mean(points_A, axis=0)
+    center_B = np.mean(points_B, axis=0)
+    points_A_centered = points_A - center_A
+    points_B_centered = points_B - center_B
+    # 计算协方差矩阵
+    cov_matrix = np.dot(points_A_centered.T, points_B_centered)
+    # SVD分解
+    U, S, Vt = np.linalg.svd(cov_matrix)
+    # 确保旋转矩阵为正交且右手系，这里我们取Vt的转置作为旋转矩阵
+    rotation_matrix = np.dot(Vt.T, U.T)
+    # 检查行列式是否为-1（表示反射，不满足旋转矩阵要求），如果是，则调整一个列的符号
+    if np.linalg.det(rotation_matrix) < 0:
+        Vt[2,:] *= -1
+        rotation_matrix = np.dot(Vt.T, U.T)
+    # 计算尺度因子
+    scale = np.trace(np.dot(points_A_centered.T, points_B_centered)) / np.trace(np.dot(points_A_centered.T, points_A_centered))
+    # 计算平移向量
+    translation_vector = center_B - scale * np.dot(rotation_matrix, center_A)
+    return scale, rotation_matrix, translation_vector
+# 示范用法
+obj_A = '/home/gyalex/Desktop/our_face.obj'
+obj_B = '/home/gyalex/Desktop/Neutral.obj'
+mesh_A = o3d.io.read_triangle_mesh(obj_A)
+mesh_B = o3d.io.read_triangle_mesh(obj_B)
+vertices_A = np.asarray(mesh_A.vertices)
+vertices_B = np.asarray(mesh_B.vertices)
+list_A = list()
+list_B = list()
+with open('/home/gyalex/Desktop/our_marker.txt', 'r') as f:
+    lines_A = f.readlines()
+    for line in lines_A:
+        hh = line.strip().split()
+        list_A.append(int(hh[0]))
+with open('/home/gyalex/Desktop/ARKit_landmarks.txt', 'r') as f:
+    lines_B = f.readlines()
+    for line in lines_B:
+        hh = line.strip().split()
+        list_B.append(int(hh[0]))
+A = vertices_A[list_A,:]  # 第一组3D点
+B = vertices_B[list_B,:]  # 第二组3D点
+# scale, R, t = compute_rigid_transform(A, B)
+# # 定义尺度变换矩阵
+# scale_matrix = np.eye(4)
+# scale_matrix[0, 0] = scale  # x轴方向放大2倍
+# scale_matrix[1, 1] = scale  # y轴方向放大2倍
+# scale_matrix[2, 2] = scale  # z轴方向放大2倍
+# transform_matrix = np.eye(4)
+# transform_matrix[:3, :3] = scale
+# transform_matrix[:3, 3] = R*t
+# mesh_A.transform(transform_matrix)
+# # mesh_A.transform(scale_matrix)
+# o3d.io.write_triangle_mesh('/home/gyalex/Desktop/our_face_new.obj', mesh_A)
+pcd_source = o3d.utility.Vector3dVector(A)  # 示例源点云数据
+pcd_target = o3d.utility.Vector3dVector(B)  # 示例目标点云数据 + 1偏移，仅作示例
+corres_source = list()
+for idx in range(68): corres_source.append(idx)
+corres_target = list()
+for idx in range(68): corres_target.append(idx)
+# 根据对应点索引获取实际的对应点坐标
+corres_source_points = pcd_source
+corres_target_points = pcd_target
+corres = o3d.utility.Vector2iVector([[src, tgt] for src, tgt in zip(corres_source, corres_target)])
+# 应用RANSAC进行基于对应点的配准
+reg_result = registration_ransac_based_on_correspondence(
+    pcd_source,
+    pcd_target,
+    corres,
+    estimation_method=o3d.pipelines.registration.TransformationEstimationPointToPoint(),
+    ransac_n=3,
+    criteria=o3d.pipelines.registration.RANSACConvergenceCriteria(max_iteration=100000, epsilon=1e-6)
+)
+# # 使用RANSAC进行配准
+# convergence_criteria = o3d.pipelines.registration.RANSACConvergenceCriteria(max_iteration=50000, max_validation=500)
+# ransac_result = o3d.pipelines.registration.registration_ransac_based_on_correspondence(
+#     pcd_source,
+#     pcd_target,
+#     corres,
+#     o3d.pipelines.registration.TransformationEstimationPointToPoint(),
+#     3,  # RANSAC阈值，根据实际情况调整
+#     convergence_criteria,
+#     [o3d.pipelines.registration.CorrespondenceCheckerBasedOnEdgeLength(0.9),
+#      o3d.pipelines.registration.CorrespondenceCheckerBasedOnDistance(0.05)],
+#     o3d.pipelines.registration.RANSACLoss())
+# 应用变换到源mesh
+# mesh_source_aligned = mesh_source.transform(reg_result.transformation)
+a = 0

external/landmark_detection/data_processor/process_pcd.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import os
+import cv2
+import numpy as np
+import open3d as o3d
+# import pyrender
+# from pyrender import mesh, DirectionalLight, Material, PerspectiveCamera
+os.environ['__GL_THREADED_OPTIMIZATIONS'] = '1'
+cord_list = []
+with open('./cord.txt', 'r') as f:
+    lines = f.readlines()
+    for line in lines:
+        m = line.split()
+        x = int(m[0])
+        y = int(m[1])
+        x = 1000 - x
+        y = 1000 - y
+        cord_list.append([x, y])
+# 假设TXT文件的路径
+output_folder = '/media/gyalex/Data/face_det_dataset/rgbd_data/rgbd'
+if not os.path.exists(output_folder):
+    os.mkdir(output_folder)
+for idx in range(32, 33):
+    txt_file_path = '/media/gyalex/Data/face_det_dataset/rgbd_data/PointImage'+ str(idx) + '.txt'
+    _, name = os.path.split(txt_file_path)
+    print(txt_file_path)
+    with open(txt_file_path, 'r') as file:
+        points   = []
+        rgb_list = []
+        ori_rgb_list = []
+        normal_list = []
+        # 逐行读取数据
+        for line in file:
+            # 去除行尾的换行符并分割字符串
+            x, y, z, r, g, b, nx, ny, nz, w = line.split()
+            # 将字符串转换为浮点数
+            x = float(x)
+            y = float(y)
+            z = float(z)
+            r = float(r)
+            g = float(g)
+            b = float(b)
+            nx = float(nx)
+            ny = float(ny)
+            nz = float(nz)
+            # 将点添加到列表中
+            points.append((x, y, z))
+            rgb_list.append((r/255.0, g/255.0 , b/255.0))
+            normal_list.append((nx, ny, nz))
+            ori_r = int(r)
+            ori_g = int(g)
+            ori_b = int(b)
+            ori_rgb_list.append((ori_r, ori_g , ori_b))
+    np_points  = np.asarray(points)
+    np_points_a = np_points
+    np_colors  = np.asarray(rgb_list)
+    np_normals = np.asarray(normal_list)
+    np_colors_ori = np.asarray(ori_rgb_list)
+    pcd = o3d.geometry.PointCloud()
+    pcd.points  = o3d.utility.Vector3dVector(np_points)
+    pcd.colors  = o3d.utility.Vector3dVector(np_colors)
+    pcd.normals = o3d.utility.Vector3dVector(np_normals)
+    map_dict = {}
+    image = np.ones((1000, 1000, 3),dtype=np.uint8)*255
+    for i in range(np.array(pcd.points).shape[0]):
+        x = np.array(pcd.points)[i,0]+400
+        y = np.array(pcd.points)[i,1]+400
+        image[int(x),int(y),:] = (np.array(pcd.colors)[i,:]*255).astype(np.uint8)
+        image[int(x+1),int(y),:] = (np.array(pcd.colors)[i,:]*255).astype(np.uint8)
+        image[int(x),int(y+1),:] = (np.array(pcd.colors)[i,:]*255).astype(np.uint8)
+        image[int(x-1),int(y),:] = (np.array(pcd.colors)[i,:]*255).astype(np.uint8)
+        image[int(x),int(y-1),:] = (np.array(pcd.colors)[i,:]*255).astype(np.uint8)
+        map_dict[str(int(x)) + '_' + str(int(y))] = i
+        map_dict[str(int(x+1)) + '_' + str(int(y))] = i
+        map_dict[str(int(x)) + '_' + str(int(y+1))] = i
+        map_dict[str(int(x-1)) + '_' + str(int(y))] = i
+        map_dict[str(int(x)) + '_' + str(int(y-1))] = i
+        # if [int(y), int(x)] in cord_list:
+        #     image[int(x),int(y),:] = np.array([0, 255, 0])
+        # if [int(y), int(x+1)] in cord_list:
+        #     image[int(x+1),int(y),:] = np.array([0, 255, 0])
+        # if [int(y+1), int(x)] in cord_list:
+        #     image[int(x),int(y+1),:] = np.array([0, 255, 0])
+        # if [int(y), int(x-1)] in cord_list:
+        #     image[int(x-1),int(y),:] = np.array([0, 255, 0])
+        # if [int(y-1), int(x)] in cord_list:
+        #     image[int(x),int(y-1),:] = np.array([0, 255, 0])
+        # if [int(y-1), int(x-1)] in cord_list:
+        #     image[int(x-1),int(y-1),:] = np.array([0, 255, 0])
+        # if [int(y+1), int(x+1)] in cord_list:
+        #     image[int(x+1),int(y+1),:] = np.array([0, 255, 0])
+    h_list = []
+    for m in cord_list:
+        a, b = m[0], m[1]
+        c = image[int(b),int(a),:][0]
+        flag = False
+        if image[int(b),int(a),:][1] != 255:
+            h_list.append(str(int(b))+'_'+str(int(a)))
+            flag = True
+        else:
+            if image[int(b)-2,int(a)-2,:][1] != 255:
+                h_list.append(str(int(b)-2)+'_'+str(int(a)-2))
+                flag = True
+            elif image[int(b)+2,int(a)+2,:][1] != 255:
+                h_list.append(str(int(b)+2)+'_'+str(int(a)+2))
+                flag = True
+            elif image[int(b),int(a)-3,:][1] != 255:
+                h_list.append(str(int(b))+'_'+str(int(a)-3))
+                flag = True
+        # if flag == False:
+        #     cc = image[int(b),int(a),:][1]
+    # cv2.circle(image, (465,505), 2, (0, 255, 0), -1)
+    # cv2.imshow('win', image)
+    # cv2.waitKey(0)
+    with open('pid.txt', 'w') as f:
+        for h in h_list:
+            pid = map_dict[h]
+            s = str(pid) + '\n'
+            f.write(s)
+            np_colors[pid,:] = np.array([0, 255, 0])
+    f.close()
+    pcd0 = o3d.geometry.PointCloud()
+    pcd0.points  = o3d.utility.Vector3dVector(np_points)
+    pcd0.colors  = o3d.utility.Vector3dVector(np_colors)
+    pcd0.normals = o3d.utility.Vector3dVector(np_normals)
+    o3d.io.write_point_cloud('aa.ply', pcd0)
+    mm = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+    image3 = cv2.flip(mm, -1)
+    # cv2.imwrite('./rgb.png', image3)
+with open('./cord.txt', 'r') as f:
+    lines = f.readlines()
+    for line in lines:
+        m = line.split()
+        x = int(m[0])
+        y = int(m[1])
+        x = 1000 - x
+        y = 1000 - y
+        cv2.circle(image, (x,y), 2, (0, 255, 0), -1)
+        idx = map_dict[str(x)+'_'+str(y)]
+        a = 0
+# cv2.imshow("win", image)
+# cv2.waitKey(0)
+    # import matplotlib.pyplot as plt
+    # plt.imshow(image)
+    # plt.show()
+    # save_pcd_path = os.path.join(output_folder, name[:-3]+'ply')
+    # # o3d.io.write_point_cloud(save_pcd_path, pcd)
+    # # render
+    # import trimesh
+    # # fuze_trimesh = trimesh.load('/home/gyalex/Desktop/PointImage32.obj')
+    # # mesh = pyrender.Mesh.from_trimesh(fuze_trimesh)
+    # mesh = pyrender.Mesh.from_points(np_points, np_colors_ori, np_normals)
+    # import math
+    # camera = PerspectiveCamera(yfov=math.pi / 3, aspectRatio=1.0)
+    # camera_pose = np.array([[-1.0, 0.0, 0.0, 0], \
+    #                         [0.0, 1.0, 0.0,  0],  \
+    #                         [0.0, 0.0, -1.0, 0], \
+    #                         [0.0, 0.0, 0.0,  1.0]])
+    # # 创建场景
+    # scene = pyrender.Scene()
+    # scene.add(mesh)
+    # scene.add(camera, pose=camera_pose)
+    # # light = pyrender.SpotLight(color=np.ones(3), intensity=3.0, innerConeAngle=np.pi/16.0, outerConeAngle=np.pi/6.0)
+    # # scene.add(light, pose=camera_pose)
+    # # 渲染场景
+    # renderer = pyrender.OffscreenRenderer(viewport_width=1280, viewport_height=1024)
+    # color, depth = renderer.render(scene)
+    # # # 设置场景和光源
+    # # scene = pyrender.Scene()
+    # # scene.add(point_cloud_mesh, 'point_cloud')
+    # # camera = PerspectiveCamera(yfov=45.0, aspectRatio=1.0)
+    # # scene.add(camera)
+    # # # 渲染场景
+    # # renderer = pyrender.OffscreenRenderer(viewport_width=1280, viewport_height=1024)
+    # # color, depth = renderer.render(scene)
+    # # 保存渲染结果为图片
+    # import cv2
+    # cv2.imshow('win', color)
+    # rgb_img = cv2.imread('/media/gyalex/Data/face_det_dataset/rgbd_data/color_32.bmp')
+    # cv2.imshow('win0', rgb_img)
+    # cv2.waitKey(0)

external/landmark_detection/evaluate.py ADDED Viewed

	@@ -0,0 +1,258 @@

+import os
+import cv2
+import math
+import argparse
+import numpy as np
+from tqdm import tqdm
+import torch
+# private package
+from lib import utility
+class GetCropMatrix():
+    """
+    from_shape -> transform_matrix
+    """
+    def __init__(self, image_size, target_face_scale, align_corners=False):
+        self.image_size = image_size
+        self.target_face_scale = target_face_scale
+        self.align_corners = align_corners
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def process(self, scale, center_w, center_h):
+        if self.align_corners:
+            to_w, to_h = self.image_size - 1, self.image_size - 1
+        else:
+            to_w, to_h = self.image_size, self.image_size
+        rot_mu = 0
+        scale_mu = self.image_size / (scale * self.target_face_scale * 200.0)
+        shift_xy_mu = (0, 0)
+        matrix = self._compose_rotate_and_scale(
+            rot_mu, scale_mu, shift_xy_mu,
+            from_center=[center_w, center_h],
+            to_center=[to_w / 2.0, to_h / 2.0])
+        return matrix
+class TransformPerspective():
+    """
+    image, matrix3x3 -> transformed_image
+    """
+    def __init__(self, image_size):
+        self.image_size = image_size
+    def process(self, image, matrix):
+        return cv2.warpPerspective(
+            image, matrix, dsize=(self.image_size, self.image_size),
+            flags=cv2.INTER_LINEAR, borderValue=0)
+class TransformPoints2D():
+    """
+    points (nx2), matrix (3x3) -> points (nx2)
+    """
+    def process(self, srcPoints, matrix):
+        # nx3
+        desPoints = np.concatenate([srcPoints, np.ones_like(srcPoints[:, [0]])], axis=1)
+        desPoints = desPoints @ np.transpose(matrix)  # nx3
+        desPoints = desPoints[:, :2] / desPoints[:, [2, 2]]
+        return desPoints.astype(srcPoints.dtype)
+class Alignment:
+    def __init__(self, args, model_path, dl_framework, device_ids):
+        self.input_size = 256
+        self.target_face_scale = 1.0
+        self.dl_framework = dl_framework
+        # model
+        if self.dl_framework == "pytorch":
+            # conf
+            self.config = utility.get_config(args)
+            self.config.device_id = device_ids[0]
+            # set environment
+            utility.set_environment(self.config)
+            self.config.init_instance()
+            if self.config.logger is not None:
+                self.config.logger.info("Loaded configure file %s: %s" % (args.config_name, self.config.id))
+                self.config.logger.info("\n" + "\n".join(["%s: %s" % item for item in self.config.__dict__.items()]))
+            net = utility.get_net(self.config)
+            if device_ids == [-1]:
+                checkpoint = torch.load(model_path, map_location="cpu")
+            else:
+                checkpoint = torch.load(model_path)
+            net.load_state_dict(checkpoint["net"])
+            net = net.to(self.config.device_id)
+            net.eval()
+            self.alignment = net
+        else:
+            assert False
+        self.getCropMatrix = GetCropMatrix(image_size=self.input_size, target_face_scale=self.target_face_scale,
+                                           align_corners=True)
+        self.transformPerspective = TransformPerspective(image_size=self.input_size)
+        self.transformPoints2D = TransformPoints2D()
+    def norm_points(self, points, align_corners=False):
+        if align_corners:
+            # [0, SIZE-1] -> [-1, +1]
+            return points / torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2) * 2 - 1
+        else:
+            # [-0.5, SIZE-0.5] -> [-1, +1]
+            return (points * 2 + 1) / torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1
+    def denorm_points(self, points, align_corners=False):
+        if align_corners:
+            # [-1, +1] -> [0, SIZE-1]
+            return (points + 1) / 2 * torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2)
+        else:
+            # [-1, +1] -> [-0.5, SIZE-0.5]
+            return ((points + 1) * torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1) / 2
+    def preprocess(self, image, scale, center_w, center_h):
+        matrix = self.getCropMatrix.process(scale, center_w, center_h)
+        input_tensor = self.transformPerspective.process(image, matrix)
+        input_tensor = input_tensor[np.newaxis, :]
+        input_tensor = torch.from_numpy(input_tensor)
+        input_tensor = input_tensor.float().permute(0, 3, 1, 2)
+        input_tensor = input_tensor / 255.0 * 2.0 - 1.0
+        input_tensor = input_tensor.to(self.config.device_id)
+        return input_tensor, matrix
+    def postprocess(self, srcPoints, coeff):
+        # dstPoints = self.transformPoints2D.process(srcPoints, coeff)
+        # matrix^(-1) * src = dst
+        # src = matrix * dst
+        dstPoints = np.zeros(srcPoints.shape, dtype=np.float32)
+        for i in range(srcPoints.shape[0]):
+            dstPoints[i][0] = coeff[0][0] * srcPoints[i][0] + coeff[0][1] * srcPoints[i][1] + coeff[0][2]
+            dstPoints[i][1] = coeff[1][0] * srcPoints[i][0] + coeff[1][1] * srcPoints[i][1] + coeff[1][2]
+        return dstPoints
+    def analyze(self, image, scale, center_w, center_h):
+        input_tensor, matrix = self.preprocess(image, scale, center_w, center_h)
+        if self.dl_framework == "pytorch":
+            with torch.no_grad():
+                output = self.alignment(input_tensor)
+            landmarks = output[-1][0]
+        else:
+            assert False
+        landmarks = self.denorm_points(landmarks)
+        landmarks = landmarks.data.cpu().numpy()[0]
+        landmarks = self.postprocess(landmarks, np.linalg.inv(matrix))
+        return landmarks
+def L2(p1, p2):
+    return np.linalg.norm(p1 - p2)
+def NME(landmarks_gt, landmarks_pv):
+    pts_num = landmarks_gt.shape[0]
+    if pts_num == 29:
+        left_index = 16
+        right_index = 17
+    elif pts_num == 68:
+        left_index = 36
+        right_index = 45
+    elif pts_num == 98:
+        left_index = 60
+        right_index = 72
+    nme = 0
+    eye_span = L2(landmarks_gt[left_index], landmarks_gt[right_index])
+    for i in range(pts_num):
+        error = L2(landmarks_pv[i], landmarks_gt[i])
+        nme += error / eye_span
+    nme /= pts_num
+    return nme
+def evaluate(args, model_path, metadata_path, device_ids, mode):
+    alignment = Alignment(args, model_path, dl_framework="pytorch", device_ids=device_ids)
+    config = alignment.config
+    nme_sum = 0
+    with open(metadata_path, 'r') as f:
+        lines = f.readlines()
+    for k, line in enumerate(tqdm(lines)):
+        item = line.strip().split("\t")
+        image_name, landmarks_5pts, landmarks_gt, scale, center_w, center_h = item[:6]
+        # image & keypoints alignment
+        image_name = image_name.replace('\\', '/')
+        image_name = image_name.replace('//msr-facestore/Workspace/MSRA_EP_Allergan/users/yanghuan/training_data/wflw/rawImages/', '')
+        image_name = image_name.replace('./rawImages/', '')
+        image_path = os.path.join(config.image_dir, image_name)
+        landmarks_gt = np.array(list(map(float, landmarks_gt.split(","))), dtype=np.float32).reshape(-1, 2)
+        scale, center_w, center_h = float(scale), float(center_w), float(center_h)
+        image = cv2.imread(image_path)
+        landmarks_pv = alignment.analyze(image, scale, center_w, center_h)
+        # NME
+        if mode == "nme":
+            nme = NME(landmarks_gt, landmarks_pv)
+            nme_sum += nme
+            # print("Current NME(%d): %f" % (k + 1, (nme_sum / (k + 1))))
+        else:
+            pass
+    if mode == "nme":
+        print("Final NME: %f" % (100*nme_sum / (k + 1)))
+    else:
+        pass
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Evaluation script")
+    parser.add_argument("--config_name", type=str, default="alignment", help="set configure file name")
+    parser.add_argument("--model_path", type=str, default="./train.pkl", help="the path of model")
+    parser.add_argument("--data_definition", type=str, default='WFLW', help="COFW/300W/WFLW")
+    parser.add_argument("--metadata_path", type=str, default="", help="the path of metadata")
+    parser.add_argument("--image_dir", type=str, default="", help="the path of image")
+    parser.add_argument("--device_ids", type=str, default="0", help="set device ids, -1 means use cpu device, >= 0 means use gpu device")
+    parser.add_argument("--mode", type=str, default="nme", help="set the evaluate mode: nme")
+    args = parser.parse_args()
+    device_ids = list(map(int, args.device_ids.split(",")))
+    evaluate(
+        args,
+        model_path=args.model_path,
+        metadata_path=args.metadata_path,
+        device_ids=device_ids,
+        mode=args.mode)

external/landmark_detection/infer_folder.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import cv2
+import math
+import copy
+import numpy as np
+import argparse
+import torch
+import json
+# private package
+from lib import utility
+from FaceBoxesV2.faceboxes_detector import *
+class GetCropMatrix():
+    """
+    from_shape -> transform_matrix
+    """
+    def __init__(self, image_size, target_face_scale, align_corners=False):
+        self.image_size = image_size
+        self.target_face_scale = target_face_scale
+        self.align_corners = align_corners
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def process(self, scale, center_w, center_h):
+        if self.align_corners:
+            to_w, to_h = self.image_size - 1, self.image_size - 1
+        else:
+            to_w, to_h = self.image_size, self.image_size
+        rot_mu = 0
+        scale_mu = self.image_size / (scale * self.target_face_scale * 200.0)
+        shift_xy_mu = (0, 0)
+        matrix = self._compose_rotate_and_scale(
+            rot_mu, scale_mu, shift_xy_mu,
+            from_center=[center_w, center_h],
+            to_center=[to_w / 2.0, to_h / 2.0])
+        return matrix
+class TransformPerspective():
+    """
+    image, matrix3x3 -> transformed_image
+    """
+    def __init__(self, image_size):
+        self.image_size = image_size
+    def process(self, image, matrix):
+        return cv2.warpPerspective(
+            image, matrix, dsize=(self.image_size, self.image_size),
+            flags=cv2.INTER_LINEAR, borderValue=0)
+class TransformPoints2D():
+    """
+    points (nx2), matrix (3x3) -> points (nx2)
+    """
+    def process(self, srcPoints, matrix):
+        # nx3
+        desPoints = np.concatenate([srcPoints, np.ones_like(srcPoints[:, [0]])], axis=1)
+        desPoints = desPoints @ np.transpose(matrix)  # nx3
+        desPoints = desPoints[:, :2] / desPoints[:, [2, 2]]
+        return desPoints.astype(srcPoints.dtype)
+class Alignment:
+    def __init__(self, args, model_path, dl_framework, device_ids):
+        self.input_size = 256
+        self.target_face_scale = 1.0
+        self.dl_framework = dl_framework
+        # model
+        if self.dl_framework == "pytorch":
+            # conf
+            self.config = utility.get_config(args)
+            self.config.device_id = device_ids[0]
+            # set environment
+            utility.set_environment(self.config)
+            # self.config.init_instance()
+            # if self.config.logger is not None:
+            #     self.config.logger.info("Loaded configure file %s: %s" % (args.config_name, self.config.id))
+            #     self.config.logger.info("\n" + "\n".join(["%s: %s" % item for item in self.config.__dict__.items()]))
+            net = utility.get_net(self.config)
+            if device_ids == [-1]:
+                checkpoint = torch.load(model_path, map_location="cpu")
+            else:
+                checkpoint = torch.load(model_path)
+            net.load_state_dict(checkpoint["net"])
+            if self.config.device_id == -1:
+                net = net.cpu()
+            else:
+                net = net.to(self.config.device_id)
+            net.eval()
+            self.alignment = net
+        else:
+            assert False
+        self.getCropMatrix = GetCropMatrix(image_size=self.input_size, target_face_scale=self.target_face_scale,
+                                           align_corners=True)
+        self.transformPerspective = TransformPerspective(image_size=self.input_size)
+        self.transformPoints2D = TransformPoints2D()
+    def norm_points(self, points, align_corners=False):
+        if align_corners:
+            # [0, SIZE-1] -> [-1, +1]
+            return points / torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2) * 2 - 1
+        else:
+            # [-0.5, SIZE-0.5] -> [-1, +1]
+            return (points * 2 + 1) / torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1
+    def denorm_points(self, points, align_corners=False):
+        if align_corners:
+            # [-1, +1] -> [0, SIZE-1]
+            return (points + 1) / 2 * torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2)
+        else:
+            # [-1, +1] -> [-0.5, SIZE-0.5]
+            return ((points + 1) * torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1) / 2
+    def preprocess(self, image, scale, center_w, center_h):
+        matrix = self.getCropMatrix.process(scale, center_w, center_h)
+        input_tensor = self.transformPerspective.process(image, matrix)
+        input_tensor = input_tensor[np.newaxis, :]
+        input_tensor = torch.from_numpy(input_tensor)
+        input_tensor = input_tensor.float().permute(0, 3, 1, 2)
+        input_tensor = input_tensor / 255.0 * 2.0 - 1.0
+        if self.config.device_id == -1:
+            input_tensor = input_tensor.cpu()
+        else:
+            input_tensor = input_tensor.to(self.config.device_id)
+        return input_tensor, matrix
+    def postprocess(self, srcPoints, coeff):
+        # dstPoints = self.transformPoints2D.process(srcPoints, coeff)
+        # matrix^(-1) * src = dst
+        # src = matrix * dst
+        dstPoints = np.zeros(srcPoints.shape, dtype=np.float32)
+        for i in range(srcPoints.shape[0]):
+            dstPoints[i][0] = coeff[0][0] * srcPoints[i][0] + coeff[0][1] * srcPoints[i][1] + coeff[0][2]
+            dstPoints[i][1] = coeff[1][0] * srcPoints[i][0] + coeff[1][1] * srcPoints[i][1] + coeff[1][2]
+        return dstPoints
+    def analyze(self, image, scale, center_w, center_h):
+        input_tensor, matrix = self.preprocess(image, scale, center_w, center_h)
+        if self.dl_framework == "pytorch":
+            with torch.no_grad():
+                output = self.alignment(input_tensor)
+            landmarks = output[-1][0]
+        else:
+            assert False
+        landmarks = self.denorm_points(landmarks)
+        landmarks = landmarks.data.cpu().numpy()[0]
+        landmarks = self.postprocess(landmarks, np.linalg.inv(matrix))
+        return landmarks
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="inference script")
+    parser.add_argument('--folder_path', type=str, help='Path to image folder')
+    args = parser.parse_args()
+    # args.folder_path = '/media/gyalex/Data/flame/ph_test/head_images/flame/image'
+    current_path = os.getcwd()
+    use_gpu = True
+    ########### face detection ############
+    if use_gpu:
+        device = torch.device("cuda:0")
+    else:
+        device = torch.device("cpu")
+    current_path = os.getcwd()
+    det_model_path = os.path.join(current_path, 'preprocess', 'submodules', 'Landmark_detection', 'FaceBoxesV2/weights/FaceBoxesV2.pth')
+    detector = FaceBoxesDetector('FaceBoxes', det_model_path, use_gpu, device)
+    ########### facial alignment ############
+    model_path = os.path.join(current_path, 'preprocess', 'submodules', 'Landmark_detection', 'weights/68_keypoints_model.pkl')
+    if use_gpu:
+        device_ids = [0]
+    else:
+        device_ids = [-1]
+    args.config_name = 'alignment'
+    alignment = Alignment(args, model_path, dl_framework="pytorch", device_ids=device_ids)
+    img_path_list = os.listdir(args.folder_path)
+    kpts_code = dict()
+    ########### inference ############
+    for file_name in img_path_list:
+        abs_path = os.path.join(args.folder_path, file_name)
+        image = cv2.imread(abs_path)
+        image_draw = copy.deepcopy(image)
+        detections, _ = detector.detect(image, 0.6, 1)
+        for idx in range(len(detections)):
+            x1_ori = detections[idx][2]
+            y1_ori = detections[idx][3]
+            x2_ori = x1_ori + detections[idx][4]
+            y2_ori = y1_ori + detections[idx][5]
+            scale    = max(x2_ori - x1_ori, y2_ori - y1_ori) / 180
+            center_w = (x1_ori + x2_ori) / 2
+            center_h = (y1_ori + y2_ori) / 2
+            scale, center_w, center_h = float(scale), float(center_w), float(center_h)
+            landmarks_pv = alignment.analyze(image, scale, center_w, center_h)
+            landmarks_pv_list = landmarks_pv.tolist()
+            for num in range(landmarks_pv.shape[0]):
+                cv2.circle(image_draw, (round(landmarks_pv[num][0]), round(landmarks_pv[num][1])),
+                        2, (0, 255, 0), -1)
+            kpts_code[file_name] = landmarks_pv_list
+            save_path = args.folder_path[:-5] + 'landmark'
+            cv2.imwrite(os.path.join(save_path, file_name), image_draw)
+    path = args.folder_path[:-5]
+    json.dump(kpts_code, open(os.path.join(path, 'keypoint.json'), 'w'))

external/landmark_detection/infer_image.py ADDED Viewed

	@@ -0,0 +1,251 @@

+import cv2
+import math
+import copy
+import numpy as np
+import argparse
+import torch
+# private package
+from external.landmark_detection.lib import utility
+from external.landmark_detection.FaceBoxesV2.faceboxes_detector import *
+class GetCropMatrix():
+    """
+    from_shape -> transform_matrix
+    """
+    def __init__(self, image_size, target_face_scale, align_corners=False):
+        self.image_size = image_size
+        self.target_face_scale = target_face_scale
+        self.align_corners = align_corners
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def process(self, scale, center_w, center_h):
+        if self.align_corners:
+            to_w, to_h = self.image_size - 1, self.image_size - 1
+        else:
+            to_w, to_h = self.image_size, self.image_size
+        rot_mu = 0
+        scale_mu = self.image_size / (scale * self.target_face_scale * 200.0)
+        shift_xy_mu = (0, 0)
+        matrix = self._compose_rotate_and_scale(
+            rot_mu, scale_mu, shift_xy_mu,
+            from_center=[center_w, center_h],
+            to_center=[to_w / 2.0, to_h / 2.0])
+        return matrix
+class TransformPerspective():
+    """
+    image, matrix3x3 -> transformed_image
+    """
+    def __init__(self, image_size):
+        self.image_size = image_size
+    def process(self, image, matrix):
+        return cv2.warpPerspective(
+            image, matrix, dsize=(self.image_size, self.image_size),
+            flags=cv2.INTER_LINEAR, borderValue=0)
+class TransformPoints2D():
+    """
+    points (nx2), matrix (3x3) -> points (nx2)
+    """
+    def process(self, srcPoints, matrix):
+        # nx3
+        desPoints = np.concatenate([srcPoints, np.ones_like(srcPoints[:, [0]])], axis=1)
+        desPoints = desPoints @ np.transpose(matrix)  # nx3
+        desPoints = desPoints[:, :2] / desPoints[:, [2, 2]]
+        return desPoints.astype(srcPoints.dtype)
+class Alignment:
+    def __init__(self, args, model_path, dl_framework, device_ids):
+        self.input_size = 256
+        self.target_face_scale = 1.0
+        self.dl_framework = dl_framework
+        # model
+        if self.dl_framework == "pytorch":
+            # conf
+            self.config = utility.get_config(args)
+            self.config.device_id = device_ids[0]
+            # set environment
+            # utility.set_environment(self.config)
+            # self.config.init_instance()
+            # if self.config.logger is not None:
+            #     self.config.logger.info("Loaded configure file %s: %s" % (args.config_name, self.config.id))
+            #     self.config.logger.info("\n" + "\n".join(["%s: %s" % item for item in self.config.__dict__.items()]))
+            net = utility.get_net(self.config)
+            if device_ids == [-1]:
+                checkpoint = torch.load(model_path, map_location="cpu")
+            else:
+                checkpoint = torch.load(model_path)
+            net.load_state_dict(checkpoint["net"])
+            if self.config.device_id == -1:
+                net = net.cpu()
+            else:
+                net = net.to(self.config.device_id)
+            net.eval()
+            self.alignment = net
+        else:
+            assert False
+        self.getCropMatrix = GetCropMatrix(image_size=self.input_size, target_face_scale=self.target_face_scale,
+                                           align_corners=True)
+        self.transformPerspective = TransformPerspective(image_size=self.input_size)
+        self.transformPoints2D = TransformPoints2D()
+    def norm_points(self, points, align_corners=False):
+        if align_corners:
+            # [0, SIZE-1] -> [-1, +1]
+            return points / torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2) * 2 - 1
+        else:
+            # [-0.5, SIZE-0.5] -> [-1, +1]
+            return (points * 2 + 1) / torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1
+    def denorm_points(self, points, align_corners=False):
+        if align_corners:
+            # [-1, +1] -> [0, SIZE-1]
+            return (points + 1) / 2 * torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2)
+        else:
+            # [-1, +1] -> [-0.5, SIZE-0.5]
+            return ((points + 1) * torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1) / 2
+    def preprocess(self, image, scale, center_w, center_h):
+        matrix = self.getCropMatrix.process(scale, center_w, center_h)
+        input_tensor = self.transformPerspective.process(image, matrix)
+        input_tensor = input_tensor[np.newaxis, :]
+        input_tensor = torch.from_numpy(input_tensor)
+        input_tensor = input_tensor.float().permute(0, 3, 1, 2)
+        input_tensor = input_tensor / 255.0 * 2.0 - 1.0
+        if self.config.device_id == -1:
+            input_tensor = input_tensor.cpu()
+        else:
+            input_tensor = input_tensor.to(self.config.device_id)
+        return input_tensor, matrix
+    def postprocess(self, srcPoints, coeff):
+        # dstPoints = self.transformPoints2D.process(srcPoints, coeff)
+        # matrix^(-1) * src = dst
+        # src = matrix * dst
+        dstPoints = np.zeros(srcPoints.shape, dtype=np.float32)
+        for i in range(srcPoints.shape[0]):
+            dstPoints[i][0] = coeff[0][0] * srcPoints[i][0] + coeff[0][1] * srcPoints[i][1] + coeff[0][2]
+            dstPoints[i][1] = coeff[1][0] * srcPoints[i][0] + coeff[1][1] * srcPoints[i][1] + coeff[1][2]
+        return dstPoints
+    def analyze(self, image, scale, center_w, center_h):
+        input_tensor, matrix = self.preprocess(image, scale, center_w, center_h)
+        if self.dl_framework == "pytorch":
+            with torch.no_grad():
+                output = self.alignment(input_tensor)
+            landmarks = output[-1][0]
+        else:
+            assert False
+        landmarks = self.denorm_points(landmarks)
+        landmarks = landmarks.data.cpu().numpy()[0]
+        landmarks = self.postprocess(landmarks, np.linalg.inv(matrix))
+        return landmarks
+# parser = argparse.ArgumentParser(description="Evaluation script")
+# args = parser.parse_args()
+# image_path = './rgb.png'
+# image = cv2.imread(image_path)
+#
+# use_gpu = False
+# ########### face detection ############
+# if use_gpu:
+#     device = torch.device("cuda:0")
+# else:
+#     device = torch.device("cpu")
+#
+# detector = FaceBoxesDetector('FaceBoxes', 'FaceBoxesV2/weights/FaceBoxesV2.pth', use_gpu, device)
+#
+# ########### facial alignment ############
+# model_path = './weights/68_keypoints_model.pkl'
+#
+# if use_gpu:
+#     device_ids = [0]
+# else:
+#     device_ids = [-1]
+#
+# args.config_name = 'alignment'
+# alignment = Alignment(args, model_path, dl_framework="pytorch", device_ids=device_ids)
+# image_draw = copy.deepcopy(image)
+#
+# ########### inference ############
+# ldk_list = []
+#
+# detections, _ = detector.detect(image, 0.9, 1)
+# for idx in range(len(detections)):
+#     x1_ori = detections[idx][2]
+#     y1_ori = detections[idx][3]
+#     x2_ori = x1_ori + detections[idx][4]
+#     y2_ori = y1_ori + detections[idx][5]
+#
+#     scale    = max(x2_ori - x1_ori, y2_ori - y1_ori) / 180
+#     center_w = (x1_ori + x2_ori) / 2
+#     center_h = (y1_ori + y2_ori) / 2
+#     scale, center_w, center_h = float(scale), float(center_w), float(center_h)
+#
+#     landmarks_pv = alignment.analyze(image, scale, center_w, center_h)
+#
+#     for num in range(landmarks_pv.shape[0]):
+#         cv2.circle(image_draw, (round(landmarks_pv[num][0]), round(landmarks_pv[num][1])),
+#                    2, (0, 255, 0), -1)
+#
+#         ldk_list.append([round(landmarks_pv[num][0]), round(landmarks_pv[num][1])])
+#
+# cv2.imshow("win", image_draw)
+#
+# # ldk_img = cv2.imread('/home/gyalex/Desktop/image_landmark_149/all.jpg')
+# # cv2.imshow("win1", ldk_img)
+#
+# cv2.waitKey(0)
+#
+# with open('./cord.txt', 'w') as f:
+#     for num in range(len(ldk_list)):
+#         s = str(ldk_list[num][0]) + ' ' + str(ldk_list[num][1]) + '\n'
+#         f.write(s)
+#
+# f.close()

external/landmark_detection/infer_video.py ADDED Viewed

	@@ -0,0 +1,287 @@

+import cv2
+import math
+import copy
+import numpy as np
+import argparse
+import torch
+import json
+# private package
+from lib import utility
+from FaceBoxesV2.faceboxes_detector import *
+class GetCropMatrix():
+    """
+    from_shape -> transform_matrix
+    """
+    def __init__(self, image_size, target_face_scale, align_corners=False):
+        self.image_size = image_size
+        self.target_face_scale = target_face_scale
+        self.align_corners = align_corners
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def process(self, scale, center_w, center_h):
+        if self.align_corners:
+            to_w, to_h = self.image_size - 1, self.image_size - 1
+        else:
+            to_w, to_h = self.image_size, self.image_size
+        rot_mu = 0
+        scale_mu = self.image_size / (scale * self.target_face_scale * 200.0)
+        shift_xy_mu = (0, 0)
+        matrix = self._compose_rotate_and_scale(
+            rot_mu, scale_mu, shift_xy_mu,
+            from_center=[center_w, center_h],
+            to_center=[to_w / 2.0, to_h / 2.0])
+        return matrix
+class TransformPerspective():
+    """
+    image, matrix3x3 -> transformed_image
+    """
+    def __init__(self, image_size):
+        self.image_size = image_size
+    def process(self, image, matrix):
+        return cv2.warpPerspective(
+            image, matrix, dsize=(self.image_size, self.image_size),
+            flags=cv2.INTER_LINEAR, borderValue=0)
+class TransformPoints2D():
+    """
+    points (nx2), matrix (3x3) -> points (nx2)
+    """
+    def process(self, srcPoints, matrix):
+        # nx3
+        desPoints = np.concatenate([srcPoints, np.ones_like(srcPoints[:, [0]])], axis=1)
+        desPoints = desPoints @ np.transpose(matrix)  # nx3
+        desPoints = desPoints[:, :2] / desPoints[:, [2, 2]]
+        return desPoints.astype(srcPoints.dtype)
+class Alignment:
+    def __init__(self, args, model_path, dl_framework, device_ids):
+        self.input_size = 256
+        self.target_face_scale = 1.0
+        self.dl_framework = dl_framework
+        # model
+        if self.dl_framework == "pytorch":
+            # conf
+            self.config = utility.get_config(args)
+            self.config.device_id = device_ids[0]
+            # set environment
+            utility.set_environment(self.config)
+            # self.config.init_instance()
+            # if self.config.logger is not None:
+            #     self.config.logger.info("Loaded configure file %s: %s" % (args.config_name, self.config.id))
+            #     self.config.logger.info("\n" + "\n".join(["%s: %s" % item for item in self.config.__dict__.items()]))
+            net = utility.get_net(self.config)
+            if device_ids == [-1]:
+                checkpoint = torch.load(model_path, map_location="cpu")
+            else:
+                checkpoint = torch.load(model_path)
+            net.load_state_dict(checkpoint["net"])
+            if self.config.device_id == -1:
+                net = net.cpu()
+            else:
+                net = net.to(self.config.device_id)
+            net.eval()
+            self.alignment = net
+        else:
+            assert False
+        self.getCropMatrix = GetCropMatrix(image_size=self.input_size, target_face_scale=self.target_face_scale,
+                                           align_corners=True)
+        self.transformPerspective = TransformPerspective(image_size=self.input_size)
+        self.transformPoints2D = TransformPoints2D()
+    def norm_points(self, points, align_corners=False):
+        if align_corners:
+            # [0, SIZE-1] -> [-1, +1]
+            return points / torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2) * 2 - 1
+        else:
+            # [-0.5, SIZE-0.5] -> [-1, +1]
+            return (points * 2 + 1) / torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1
+    def denorm_points(self, points, align_corners=False):
+        if align_corners:
+            # [-1, +1] -> [0, SIZE-1]
+            return (points + 1) / 2 * torch.tensor([self.input_size - 1, self.input_size - 1]).to(points).view(1, 1, 2)
+        else:
+            # [-1, +1] -> [-0.5, SIZE-0.5]
+            return ((points + 1) * torch.tensor([self.input_size, self.input_size]).to(points).view(1, 1, 2) - 1) / 2
+    def preprocess(self, image, scale, center_w, center_h):
+        matrix = self.getCropMatrix.process(scale, center_w, center_h)
+        input_tensor = self.transformPerspective.process(image, matrix)
+        input_tensor = input_tensor[np.newaxis, :]
+        input_tensor = torch.from_numpy(input_tensor)
+        input_tensor = input_tensor.float().permute(0, 3, 1, 2)
+        input_tensor = input_tensor / 255.0 * 2.0 - 1.0
+        if self.config.device_id == -1:
+            input_tensor = input_tensor.cpu()
+        else:
+            input_tensor = input_tensor.to(self.config.device_id)
+        return input_tensor, matrix
+    def postprocess(self, srcPoints, coeff):
+        # dstPoints = self.transformPoints2D.process(srcPoints, coeff)
+        # matrix^(-1) * src = dst
+        # src = matrix * dst
+        dstPoints = np.zeros(srcPoints.shape, dtype=np.float32)
+        for i in range(srcPoints.shape[0]):
+            dstPoints[i][0] = coeff[0][0] * srcPoints[i][0] + coeff[0][1] * srcPoints[i][1] + coeff[0][2]
+            dstPoints[i][1] = coeff[1][0] * srcPoints[i][0] + coeff[1][1] * srcPoints[i][1] + coeff[1][2]
+        return dstPoints
+    def analyze(self, image, scale, center_w, center_h):
+        input_tensor, matrix = self.preprocess(image, scale, center_w, center_h)
+        if self.dl_framework == "pytorch":
+            with torch.no_grad():
+                output = self.alignment(input_tensor)
+            landmarks = output[-1][0]
+        else:
+            assert False
+        landmarks = self.denorm_points(landmarks)
+        landmarks = landmarks.data.cpu().numpy()[0]
+        landmarks = self.postprocess(landmarks, np.linalg.inv(matrix))
+        return landmarks
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser(description="inference script")
+    parser.add_argument('--video_path', type=str, help='Path to videos',default='/media/yuanzhen/HH/DATASET/VFTH/TESTVIDEO/Clip+7CzHzeeVRlE+P0+C0+F101007-101139.mp4')
+    args = parser.parse_args()
+    # args.video_path = '/media/gyalex/Data/flame/ph_test/test.mp4'
+    current_path = os.getcwd()
+    use_gpu = True
+    ########### face detection ############
+    if use_gpu:
+        device = torch.device("cuda:0")
+    else:
+        device = torch.device("cpu")
+    current_path = os.getcwd()
+    det_model_path = '/home/yuanzhen/code/landmark_detection/FaceBoxesV2/weights/FaceBoxesV2.pth'
+    detector = FaceBoxesDetector('FaceBoxes', det_model_path, use_gpu, device)
+    ########### facial alignment ############
+    model_path = '/home/yuanzhen/code/landmark_detection/weights/68_keypoints_model.pkl'
+    if use_gpu:
+        device_ids = [0]
+    else:
+        device_ids = [-1]
+    args.config_name = 'alignment'
+    alignment = Alignment(args, model_path, dl_framework="pytorch", device_ids=device_ids)
+    video_file = args.video_path
+    cap = cv2.VideoCapture(video_file)
+    frame_width  = int(cap.get(3))
+    frame_height = int(cap.get(4))
+    # out_video_file = './output_video.mp4'
+    # fps = 30
+    # size = (frame_width, frame_height)
+    # out = cv2.VideoWriter(out_video_file, cv2.VideoWriter_fourcc(*'mp4v'), fps, size)
+    count = 0
+    kpts_code = dict()
+    keypoint_data_path = args.video_path.replace('.mp4','.json')
+    with open(keypoint_data_path,'r') as f:
+        keypoint_data = json.load(f)
+    ########### inference ############
+    path = video_file[:-4]
+    while(cap.isOpened()):
+        ret, image = cap.read()
+        if ret:
+            detections, _ = detector.detect(image, 0.8, 1)
+            image_draw = copy.deepcopy(image)
+            cv2.imwrite(os.path.join(path, 'image', str(count+1)+'.png'), image_draw)
+            for idx in range(len(detections)):
+                x1_ori = detections[idx][2]
+                y1_ori = detections[idx][3]
+                x2_ori = x1_ori + detections[idx][4]
+                y2_ori = y1_ori + detections[idx][5]
+                scale    = max(x2_ori - x1_ori, y2_ori - y1_ori) / 180
+                center_w = (x1_ori + x2_ori) / 2
+                center_h = (y1_ori + y2_ori) / 2
+                scale, center_w, center_h = float(scale), float(center_w), float(center_h)
+                # landmarks_pv = alignment.analyze(image, scale, center_w, center_h)
+                landmarks_pv = np.array(keypoint_data[str(count+1)+'.png'])
+                landmarks_pv_list = landmarks_pv.tolist()
+                for num in range(landmarks_pv.shape[0]):
+                    cv2.circle(image_draw, (round(landmarks_pv[num][0]), round(landmarks_pv[num][1])),
+                            2, (0, 255, 0), -1)
+                    cv2.putText(image_draw, str(num),
+                                (round(landmarks_pv[num][0]) + 5, round(landmarks_pv[num][1]) + 5),  # 文本位置
+                                cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 1, cv2.LINE_AA)
+                kpts_code[str(count+1)+'.png'] = landmarks_pv_list
+                cv2.imwrite(os.path.join(path, 'landmark', str(count+1)+'.png'), image_draw)
+        else:
+            break
+        count += 1
+    cap.release()
+    # out.release()
+    # cv2.destroyAllWindows()
+    path = video_file[:-4]
+    json.dump(kpts_code, open(os.path.join(path, 'keypoint.json'), 'w'))
+    print(path)

external/landmark_detection/lib/__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+from .dataset import get_encoder, get_decoder
+from .dataset import AlignmentDataset, Augmentation
+from .backbone import StackedHGNetV1
+from .metric import NME, Accuracy
+from .utils import time_print, time_string, time_for_file, time_string_short
+from .utils import convert_secs2time, convert_size2str
+from .utility import get_dataloader, get_config, get_net, get_criterions
+from .utility import get_optimizer, get_scheduler

external/landmark_detection/lib/backbone/__init__.py ADDED Viewed

	@@ -0,0 +1,5 @@

+from .stackedHGNetV1 import StackedHGNetV1
+__all__ = [
+    "StackedHGNetV1",
+]

external/landmark_detection/lib/backbone/core/coord_conv.py ADDED Viewed

	@@ -0,0 +1,157 @@

+import torch
+import torch.nn as nn
+class AddCoordsTh(nn.Module):
+    def __init__(self, x_dim, y_dim, with_r=False, with_boundary=False):
+        super(AddCoordsTh, self).__init__()
+        self.x_dim = x_dim
+        self.y_dim = y_dim
+        self.with_r = with_r
+        self.with_boundary = with_boundary
+    def forward(self, input_tensor, heatmap=None):
+        """
+        input_tensor: (batch, c, x_dim, y_dim)
+        """
+        batch_size_tensor = input_tensor.shape[0]
+        xx_ones = torch.ones([1, self.y_dim], dtype=torch.int32).to(input_tensor)
+        xx_ones = xx_ones.unsqueeze(-1)
+        xx_range = torch.arange(self.x_dim, dtype=torch.int32).unsqueeze(0).to(input_tensor)
+        xx_range = xx_range.unsqueeze(1)
+        xx_channel = torch.matmul(xx_ones.float(), xx_range.float())
+        xx_channel = xx_channel.unsqueeze(-1)
+        yy_ones = torch.ones([1, self.x_dim], dtype=torch.int32).to(input_tensor)
+        yy_ones = yy_ones.unsqueeze(1)
+        yy_range = torch.arange(self.y_dim, dtype=torch.int32).unsqueeze(0).to(input_tensor)
+        yy_range = yy_range.unsqueeze(-1)
+        yy_channel = torch.matmul(yy_range.float(), yy_ones.float())
+        yy_channel = yy_channel.unsqueeze(-1)
+        xx_channel = xx_channel.permute(0, 3, 2, 1)
+        yy_channel = yy_channel.permute(0, 3, 2, 1)
+        xx_channel = xx_channel / (self.x_dim - 1)
+        yy_channel = yy_channel / (self.y_dim - 1)
+        xx_channel = xx_channel * 2 - 1
+        yy_channel = yy_channel * 2 - 1
+        xx_channel = xx_channel.repeat(batch_size_tensor, 1, 1, 1)
+        yy_channel = yy_channel.repeat(batch_size_tensor, 1, 1, 1)
+        if self.with_boundary and type(heatmap) != type(None):
+            boundary_channel = torch.clamp(heatmap[:, -1:, :, :],
+                                        0.0, 1.0)
+            zero_tensor = torch.zeros_like(xx_channel).to(xx_channel)
+            xx_boundary_channel = torch.where(boundary_channel>0.05,
+                                              xx_channel, zero_tensor)
+            yy_boundary_channel = torch.where(boundary_channel>0.05,
+                                              yy_channel, zero_tensor)
+        ret = torch.cat([input_tensor, xx_channel, yy_channel], dim=1)
+        if self.with_r:
+            rr = torch.sqrt(torch.pow(xx_channel, 2) + torch.pow(yy_channel, 2))
+            rr = rr / torch.max(rr)
+            ret = torch.cat([ret, rr], dim=1)
+        if self.with_boundary and type(heatmap) != type(None):
+            ret = torch.cat([ret, xx_boundary_channel,
+                             yy_boundary_channel], dim=1)
+        return ret
+class CoordConvTh(nn.Module):
+    """CoordConv layer as in the paper."""
+    def __init__(self, x_dim, y_dim, with_r, with_boundary,
+                 in_channels, out_channels, first_one=False, relu=False, bn=False, *args, **kwargs):
+        super(CoordConvTh, self).__init__()
+        self.addcoords = AddCoordsTh(x_dim=x_dim, y_dim=y_dim, with_r=with_r,
+                                    with_boundary=with_boundary)
+        in_channels += 2
+        if with_r:
+            in_channels += 1
+        if with_boundary and not first_one:
+            in_channels += 2
+        self.conv = nn.Conv2d(in_channels=in_channels, out_channels=out_channels, *args, **kwargs)
+        self.relu = nn.ReLU() if relu else None
+        self.bn = nn.BatchNorm2d(out_channels) if bn else None
+        self.with_boundary = with_boundary
+        self.first_one = first_one
+    def forward(self, input_tensor, heatmap=None):
+        assert (self.with_boundary and not self.first_one) == (heatmap is not None)
+        ret = self.addcoords(input_tensor, heatmap)
+        ret = self.conv(ret)
+        if self.bn is not None:
+            ret = self.bn(ret)
+        if self.relu is not None:
+            ret = self.relu(ret)
+        return ret
+'''
+An alternative implementation for PyTorch with auto-infering the x-y dimensions.
+'''
+class AddCoords(nn.Module):
+    def __init__(self, with_r=False):
+        super().__init__()
+        self.with_r = with_r
+    def forward(self, input_tensor):
+        """
+        Args:
+            input_tensor: shape(batch, channel, x_dim, y_dim)
+        """
+        batch_size, _, x_dim, y_dim = input_tensor.size()
+        xx_channel = torch.arange(x_dim).repeat(1, y_dim, 1).to(input_tensor)
+        yy_channel = torch.arange(y_dim).repeat(1, x_dim, 1).transpose(1, 2).to(input_tensor)
+        xx_channel = xx_channel / (x_dim - 1)
+        yy_channel = yy_channel / (y_dim - 1)
+        xx_channel = xx_channel * 2 - 1
+        yy_channel = yy_channel * 2 - 1
+        xx_channel = xx_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)
+        yy_channel = yy_channel.repeat(batch_size, 1, 1, 1).transpose(2, 3)
+        ret = torch.cat([
+            input_tensor,
+            xx_channel.type_as(input_tensor),
+            yy_channel.type_as(input_tensor)], dim=1)
+        if self.with_r:
+            rr = torch.sqrt(torch.pow(xx_channel - 0.5, 2) + torch.pow(yy_channel - 0.5, 2))
+            ret = torch.cat([ret, rr], dim=1)
+        return ret
+class CoordConv(nn.Module):
+    def __init__(self, in_channels, out_channels, with_r=False, **kwargs):
+        super().__init__()
+        self.addcoords = AddCoords(with_r=with_r)
+        in_channels += 2
+        if with_r:
+            in_channels += 1
+        self.conv = nn.Conv2d(in_channels, out_channels, **kwargs)
+    def forward(self, x):
+        ret = self.addcoords(x)
+        ret = self.conv(ret)
+        return ret

external/landmark_detection/lib/backbone/stackedHGNetV1.py ADDED Viewed

	@@ -0,0 +1,307 @@

+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .core.coord_conv import CoordConvTh
+from external.landmark_detection.lib.dataset import get_decoder
+class Activation(nn.Module):
+    def __init__(self, kind: str = 'relu', channel=None):
+        super().__init__()
+        self.kind = kind
+        if '+' in kind:
+            norm_str, act_str = kind.split('+')
+        else:
+            norm_str, act_str = 'none', kind
+        self.norm_fn = {
+            'in': F.instance_norm,
+            'bn': nn.BatchNorm2d(channel),
+            'bn_noaffine': nn.BatchNorm2d(channel, affine=False, track_running_stats=True),
+            'none': None
+        }[norm_str]
+        self.act_fn = {
+            'relu': F.relu,
+            'softplus': nn.Softplus(),
+            'exp': torch.exp,
+            'sigmoid': torch.sigmoid,
+            'tanh': torch.tanh,
+            'none': None
+        }[act_str]
+        self.channel = channel
+    def forward(self, x):
+        if self.norm_fn is not None:
+            x = self.norm_fn(x)
+        if self.act_fn is not None:
+            x = self.act_fn(x)
+        return x
+    def extra_repr(self):
+        return f'kind={self.kind}, channel={self.channel}'
+class ConvBlock(nn.Module):
+    def __init__(self, inp_dim, out_dim, kernel_size=3, stride=1, bn=False, relu=True, groups=1):
+        super(ConvBlock, self).__init__()
+        self.inp_dim = inp_dim
+        self.conv = nn.Conv2d(inp_dim, out_dim, kernel_size,
+                              stride, padding=(kernel_size - 1) // 2, groups=groups, bias=True)
+        self.relu = None
+        self.bn = None
+        if relu:
+            self.relu = nn.ReLU()
+        if bn:
+            self.bn = nn.BatchNorm2d(out_dim)
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        if self.relu is not None:
+            x = self.relu(x)
+        return x
+class ResBlock(nn.Module):
+    def __init__(self, inp_dim, out_dim, mid_dim=None):
+        super(ResBlock, self).__init__()
+        if mid_dim is None:
+            mid_dim = out_dim // 2
+        self.relu = nn.ReLU()
+        self.bn1 = nn.BatchNorm2d(inp_dim)
+        self.conv1 = ConvBlock(inp_dim, mid_dim, 1, relu=False)
+        self.bn2 = nn.BatchNorm2d(mid_dim)
+        self.conv2 = ConvBlock(mid_dim, mid_dim, 3, relu=False)
+        self.bn3 = nn.BatchNorm2d(mid_dim)
+        self.conv3 = ConvBlock(mid_dim, out_dim, 1, relu=False)
+        self.skip_layer = ConvBlock(inp_dim, out_dim, 1, relu=False)
+        if inp_dim == out_dim:
+            self.need_skip = False
+        else:
+            self.need_skip = True
+    def forward(self, x):
+        if self.need_skip:
+            residual = self.skip_layer(x)
+        else:
+            residual = x
+        out = self.bn1(x)
+        out = self.relu(out)
+        out = self.conv1(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn3(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out += residual
+        return out
+class Hourglass(nn.Module):
+    def __init__(self, n, f, increase=0, up_mode='nearest',
+                 add_coord=False, first_one=False, x_dim=64, y_dim=64):
+        super(Hourglass, self).__init__()
+        nf = f + increase
+        Block = ResBlock
+        if add_coord:
+            self.coordconv = CoordConvTh(x_dim=x_dim, y_dim=y_dim,
+                                         with_r=True, with_boundary=True,
+                                         relu=False, bn=False,
+                                         in_channels=f, out_channels=f,
+                                         first_one=first_one,
+                                         kernel_size=1,
+                                         stride=1, padding=0)
+        else:
+            self.coordconv = None
+        self.up1 = Block(f, f)
+        # Lower branch
+        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
+        self.low1 = Block(f, nf)
+        self.n = n
+        # Recursive hourglass
+        if self.n > 1:
+            self.low2 = Hourglass(n=n - 1, f=nf, increase=increase, up_mode=up_mode, add_coord=False)
+        else:
+            self.low2 = Block(nf, nf)
+        self.low3 = Block(nf, f)
+        self.up2 = nn.Upsample(scale_factor=2, mode=up_mode)
+    def forward(self, x, heatmap=None):
+        if self.coordconv is not None:
+            x = self.coordconv(x, heatmap)
+        up1 = self.up1(x)
+        pool1 = self.pool1(x)
+        low1 = self.low1(pool1)
+        low2 = self.low2(low1)
+        low3 = self.low3(low2)
+        up2 = self.up2(low3)
+        return up1 + up2
+class E2HTransform(nn.Module):
+    def __init__(self, edge_info, num_points, num_edges):
+        super().__init__()
+        e2h_matrix = np.zeros([num_points, num_edges])
+        for edge_id, isclosed_indices in enumerate(edge_info):
+            is_closed, indices = isclosed_indices
+            for point_id in indices:
+                e2h_matrix[point_id, edge_id] = 1
+        e2h_matrix = torch.from_numpy(e2h_matrix).float()
+        # pn x en x 1 x 1.
+        self.register_buffer('weight', e2h_matrix.view(
+            e2h_matrix.size(0), e2h_matrix.size(1), 1, 1))
+        # some keypoints are not coverred by any edges,
+        # in these cases, we must add a constant bias to their heatmap weights.
+        bias = ((e2h_matrix @ torch.ones(e2h_matrix.size(1)).to(
+            e2h_matrix)) < 0.5).to(e2h_matrix)
+        # pn x 1.
+        self.register_buffer('bias', bias)
+    def forward(self, edgemaps):
+        # input: batch_size x en x hw x hh.
+        # output: batch_size x pn x hw x hh.
+        return F.conv2d(edgemaps, weight=self.weight, bias=self.bias)
+class StackedHGNetV1(nn.Module):
+    def __init__(self, config, classes_num, edge_info,
+                 nstack=4, nlevels=4, in_channel=256, increase=0,
+                 add_coord=True, decoder_type='default'):
+        super(StackedHGNetV1, self).__init__()
+        self.cfg = config
+        self.coder_type = decoder_type
+        self.decoder = get_decoder(decoder_type=decoder_type)
+        self.nstack = nstack
+        self.add_coord = add_coord
+        self.num_heats = classes_num[0]
+        if self.add_coord:
+            convBlock = CoordConvTh(x_dim=self.cfg.width, y_dim=self.cfg.height,
+                                    with_r=True, with_boundary=False,
+                                    relu=True, bn=True,
+                                    in_channels=3, out_channels=64,
+                                    kernel_size=7,
+                                    stride=2, padding=3)
+        else:
+            convBlock = ConvBlock(3, 64, 7, 2, bn=True, relu=True)
+        pool = nn.MaxPool2d(kernel_size=2, stride=2)
+        Block = ResBlock
+        self.pre = nn.Sequential(
+            convBlock,
+            Block(64, 128),
+            pool,
+            Block(128, 128),
+            Block(128, in_channel)
+        )
+        self.hgs = nn.ModuleList(
+            [Hourglass(n=nlevels, f=in_channel, increase=increase, add_coord=self.add_coord, first_one=(_ == 0),
+                       x_dim=int(self.cfg.width / self.nstack), y_dim=int(self.cfg.height / self.nstack))
+             for _ in range(nstack)])
+        self.features = nn.ModuleList([
+            nn.Sequential(
+                Block(in_channel, in_channel),
+                ConvBlock(in_channel, in_channel, 1, bn=True, relu=True)
+            ) for _ in range(nstack)])
+        self.out_heatmaps = nn.ModuleList(
+            [ConvBlock(in_channel, self.num_heats, 1, relu=False, bn=False)
+             for _ in range(nstack)])
+        if self.cfg.use_AAM:
+            self.num_edges = classes_num[1]
+            self.num_points = classes_num[2]
+            self.e2h_transform = E2HTransform(edge_info, self.num_points, self.num_edges)
+            self.out_edgemaps = nn.ModuleList(
+                [ConvBlock(in_channel, self.num_edges, 1, relu=False, bn=False)
+                 for _ in range(nstack)])
+            self.out_pointmaps = nn.ModuleList(
+                [ConvBlock(in_channel, self.num_points, 1, relu=False, bn=False)
+                 for _ in range(nstack)])
+            self.merge_edgemaps = nn.ModuleList(
+                [ConvBlock(self.num_edges, in_channel, 1, relu=False, bn=False)
+                 for _ in range(nstack - 1)])
+            self.merge_pointmaps = nn.ModuleList(
+                [ConvBlock(self.num_points, in_channel, 1, relu=False, bn=False)
+                 for _ in range(nstack - 1)])
+            self.edgemap_act = Activation("sigmoid", self.num_edges)
+            self.pointmap_act = Activation("sigmoid", self.num_points)
+        self.merge_features = nn.ModuleList(
+            [ConvBlock(in_channel, in_channel, 1, relu=False, bn=False)
+             for _ in range(nstack - 1)])
+        self.merge_heatmaps = nn.ModuleList(
+            [ConvBlock(self.num_heats, in_channel, 1, relu=False, bn=False)
+             for _ in range(nstack - 1)])
+        self.nstack = nstack
+        self.heatmap_act = Activation("in+relu", self.num_heats)
+        self.inference = False
+    def set_inference(self, inference):
+        self.inference = inference
+    def forward(self, x):
+        x = self.pre(x)
+        y, fusionmaps = [], []
+        heatmaps = None
+        for i in range(self.nstack):
+            hg = self.hgs[i](x, heatmap=heatmaps)
+            feature = self.features[i](hg)
+            heatmaps0 = self.out_heatmaps[i](feature)
+            heatmaps = self.heatmap_act(heatmaps0)
+            if self.cfg.use_AAM:
+                pointmaps0 = self.out_pointmaps[i](feature)
+                pointmaps = self.pointmap_act(pointmaps0)
+                edgemaps0 = self.out_edgemaps[i](feature)
+                edgemaps = self.edgemap_act(edgemaps0)
+                mask = self.e2h_transform(edgemaps) * pointmaps
+                fusion_heatmaps = mask * heatmaps
+            else:
+                fusion_heatmaps = heatmaps
+            landmarks = self.decoder.get_coords_from_heatmap(fusion_heatmaps)
+            if i < self.nstack - 1:
+                x = x + self.merge_features[i](feature) + \
+                    self.merge_heatmaps[i](heatmaps)
+                if self.cfg.use_AAM:
+                    x += self.merge_pointmaps[i](pointmaps)
+                    x += self.merge_edgemaps[i](edgemaps)
+            y.append(landmarks)
+            if self.cfg.use_AAM:
+                y.append(pointmaps)
+                y.append(edgemaps)
+            fusionmaps.append(fusion_heatmaps)
+        return y, fusionmaps, landmarks

external/landmark_detection/lib/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+from .encoder import get_encoder
+from .decoder import get_decoder
+from .augmentation import Augmentation
+from .alignmentDataset import AlignmentDataset
+__all__ = [
+    "Augmentation",
+    "AlignmentDataset",
+    "get_encoder",
+    "get_decoder"
+]

external/landmark_detection/lib/dataset/alignmentDataset.py ADDED Viewed

	@@ -0,0 +1,316 @@

+import os
+import sys
+import cv2
+import math
+import copy
+import hashlib
+import imageio
+import numpy as np
+import pandas as pd
+from scipy import interpolate
+from PIL import Image, ImageEnhance, ImageFile
+import torch
+import torch.nn.functional as F
+from torch.utils.data import Dataset
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+sys.path.append("./")
+from external.landmark_detection.lib.dataset.augmentation import Augmentation
+from external.landmark_detection.lib.dataset.encoder import get_encoder
+class AlignmentDataset(Dataset):
+    def __init__(self, tsv_flie, image_dir="", transform=None,
+                 width=256, height=256, channels=3,
+                 means=(127.5, 127.5, 127.5), scale=1 / 127.5,
+                 classes_num=None, crop_op=True, aug_prob=0.0, edge_info=None, flip_mapping=None, is_train=True,
+                 encoder_type='default',
+                 ):
+        super(AlignmentDataset, self).__init__()
+        self.use_AAM = True
+        self.encoder_type = encoder_type
+        self.encoder = get_encoder(height, width, encoder_type=encoder_type)
+        self.items = pd.read_csv(tsv_flie, sep="\t")
+        self.image_dir = image_dir
+        self.landmark_num = classes_num[0]
+        self.transform = transform
+        self.image_width = width
+        self.image_height = height
+        self.channels = channels
+        assert self.image_width == self.image_height
+        self.means = means
+        self.scale = scale
+        self.aug_prob = aug_prob
+        self.edge_info = edge_info
+        self.is_train = is_train
+        std_lmk_5pts = np.array([
+            196.0, 226.0,
+            316.0, 226.0,
+            256.0, 286.0,
+            220.0, 360.4,
+            292.0, 360.4], np.float32) / 256.0 - 1.0
+        std_lmk_5pts = np.reshape(std_lmk_5pts, (5, 2))  # [-1 1]
+        target_face_scale = 1.0 if crop_op else 1.25
+        self.augmentation = Augmentation(
+            is_train=self.is_train,
+            aug_prob=self.aug_prob,
+            image_size=self.image_width,
+            crop_op=crop_op,
+            std_lmk_5pts=std_lmk_5pts,
+            target_face_scale=target_face_scale,
+            flip_rate=0.5,
+            flip_mapping=flip_mapping,
+            random_shift_sigma=0.05,
+            random_rot_sigma=math.pi / 180 * 18,
+            random_scale_sigma=0.1,
+            random_gray_rate=0.2,
+            random_occ_rate=0.4,
+            random_blur_rate=0.3,
+            random_gamma_rate=0.2,
+            random_nose_fusion_rate=0.2)
+    def _circle(self, img, pt, sigma=1.0, label_type='Gaussian'):
+        # Check that any part of the gaussian is in-bounds
+        tmp_size = sigma * 3
+        ul = [int(pt[0] - tmp_size), int(pt[1] - tmp_size)]
+        br = [int(pt[0] + tmp_size + 1), int(pt[1] + tmp_size + 1)]
+        if (ul[0] > img.shape[1] - 1 or ul[1] > img.shape[0] - 1 or
+                br[0] - 1 < 0 or br[1] - 1 < 0):
+            # If not, just return the image as is
+            return img
+        # Generate gaussian
+        size = 2 * tmp_size + 1
+        x = np.arange(0, size, 1, np.float32)
+        y = x[:, np.newaxis]
+        x0 = y0 = size // 2
+        # The gaussian is not normalized, we want the center value to equal 1
+        if label_type == 'Gaussian':
+            g = np.exp(- ((x - x0) ** 2 + (y - y0) ** 2) / (2 * sigma ** 2))
+        else:
+            g = sigma / (((x - x0) ** 2 + (y - y0) ** 2 + sigma ** 2) ** 1.5)
+        # Usable gaussian range
+        g_x = max(0, -ul[0]), min(br[0], img.shape[1]) - ul[0]
+        g_y = max(0, -ul[1]), min(br[1], img.shape[0]) - ul[1]
+        # Image range
+        img_x = max(0, ul[0]), min(br[0], img.shape[1])
+        img_y = max(0, ul[1]), min(br[1], img.shape[0])
+        img[img_y[0]:img_y[1], img_x[0]:img_x[1]] = 255 * g[g_y[0]:g_y[1], g_x[0]:g_x[1]]
+        return img
+    def _polylines(self, img, lmks, is_closed, color=255, thickness=1, draw_mode=cv2.LINE_AA,
+                   interpolate_mode=cv2.INTER_AREA, scale=4):
+        h, w = img.shape
+        img_scale = cv2.resize(img, (w * scale, h * scale), interpolation=interpolate_mode)
+        lmks_scale = (lmks * scale + 0.5).astype(np.int32)
+        cv2.polylines(img_scale, [lmks_scale], is_closed, color, thickness * scale, draw_mode)
+        img = cv2.resize(img_scale, (w, h), interpolation=interpolate_mode)
+        return img
+    def _generate_edgemap(self, points, scale=0.25, thickness=1):
+        h, w = self.image_height, self.image_width
+        edgemaps = []
+        for is_closed, indices in self.edge_info:
+            edgemap = np.zeros([h, w], dtype=np.float32)
+            # align_corners: False.
+            part = copy.deepcopy(points[np.array(indices)])
+            part = self._fit_curve(part, is_closed)
+            part[:, 0] = np.clip(part[:, 0], 0, w - 1)
+            part[:, 1] = np.clip(part[:, 1], 0, h - 1)
+            edgemap = self._polylines(edgemap, part, is_closed, 255, thickness)
+            edgemaps.append(edgemap)
+        edgemaps = np.stack(edgemaps, axis=0) / 255.0
+        edgemaps = torch.from_numpy(edgemaps).float().unsqueeze(0)
+        edgemaps = F.interpolate(edgemaps, size=(int(w * scale), int(h * scale)), mode='bilinear',
+                                 align_corners=False).squeeze()
+        return edgemaps
+    def _fit_curve(self, lmks, is_closed=False, density=5):
+        try:
+            x = lmks[:, 0].copy()
+            y = lmks[:, 1].copy()
+            if is_closed:
+                x = np.append(x, x[0])
+                y = np.append(y, y[0])
+            tck, u = interpolate.splprep([x, y], s=0, per=is_closed, k=3)
+            # bins = (x.shape[0] - 1) * density + 1
+            # lmk_x, lmk_y = interpolate.splev(np.linspace(0, 1, bins), f)
+            intervals = np.array([])
+            for i in range(len(u) - 1):
+                intervals = np.concatenate((intervals, np.linspace(u[i], u[i + 1], density, endpoint=False)))
+            if not is_closed:
+                intervals = np.concatenate((intervals, [u[-1]]))
+            lmk_x, lmk_y = interpolate.splev(intervals, tck, der=0)
+            # der_x, der_y = interpolate.splev(intervals, tck, der=1)
+            curve_lmks = np.stack([lmk_x, lmk_y], axis=-1)
+            # curve_ders = np.stack([der_x, der_y], axis=-1)
+            # origin_indices = np.arange(0, curve_lmks.shape[0], density)
+            return curve_lmks
+        except:
+            return lmks
+    def _image_id(self, image_path):
+        if not os.path.exists(image_path):
+            image_path = os.path.join(self.image_dir, image_path)
+        return hashlib.md5(open(image_path, "rb").read()).hexdigest()
+    def _load_image(self, image_path):
+        if not os.path.exists(image_path):
+            image_path = os.path.join(self.image_dir, image_path)
+        try:
+            # img = cv2.imdecode(np.fromfile(image_path, dtype=np.uint8), cv2.IMREAD_COLOR)#HWC, BGR, [0-255]
+            img = cv2.imread(image_path, cv2.IMREAD_COLOR)  # HWC, BGR, [0-255]
+            assert img is not None and len(img.shape) == 3 and img.shape[2] == 3
+        except:
+            try:
+                img = imageio.imread(image_path)  # HWC, RGB, [0-255]
+                img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # HWC, BGR, [0-255]
+                assert img is not None and len(img.shape) == 3 and img.shape[2] == 3
+            except:
+                try:
+                    gifImg = imageio.mimread(image_path)  # BHWC, RGB, [0-255]
+                    img = gifImg[0]  # HWC, RGB, [0-255]
+                    img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)  # HWC, BGR, [0-255]
+                    assert img is not None and len(img.shape) == 3 and img.shape[2] == 3
+                except:
+                    img = None
+        return img
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def _transformPoints2D(self, points, matrix):
+        """
+        points (nx2), matrix (3x3) -> points (nx2)
+        """
+        dtype = points.dtype
+        # nx3
+        points = np.concatenate([points, np.ones_like(points[:, [0]])], axis=1)
+        points = points @ np.transpose(matrix)  # nx3
+        points = points[:, :2] / points[:, [2, 2]]
+        return points.astype(dtype)
+    def _transformPerspective(self, image, matrix, target_shape):
+        """
+        image, matrix3x3 -> transformed_image
+        """
+        return cv2.warpPerspective(
+            image, matrix,
+            dsize=(target_shape[1], target_shape[0]),
+            flags=cv2.INTER_LINEAR, borderValue=0)
+    def _norm_points(self, points, h, w, align_corners=False):
+        if align_corners:
+            # [0, SIZE-1] -> [-1, +1]
+            des_points = points / torch.tensor([w - 1, h - 1]).to(points).view(1, 2) * 2 - 1
+        else:
+            # [-0.5, SIZE-0.5] -> [-1, +1]
+            des_points = (points * 2 + 1) / torch.tensor([w, h]).to(points).view(1, 2) - 1
+        des_points = torch.clamp(des_points, -1, 1)
+        return des_points
+    def _denorm_points(self, points, h, w, align_corners=False):
+        if align_corners:
+            # [-1, +1] -> [0, SIZE-1]
+            des_points = (points + 1) / 2 * torch.tensor([w - 1, h - 1]).to(points).view(1, 1, 2)
+        else:
+            # [-1, +1] -> [-0.5, SIZE-0.5]
+            des_points = ((points + 1) * torch.tensor([w, h]).to(points).view(1, 1, 2) - 1) / 2
+        return des_points
+    def __len__(self):
+        return len(self.items)
+    def __getitem__(self, index):
+        sample = dict()
+        image_path = self.items.iloc[index, 0]
+        landmarks_5pts = self.items.iloc[index, 1]
+        landmarks_5pts = np.array(list(map(float, landmarks_5pts.split(","))), dtype=np.float32).reshape(5, 2)
+        landmarks_target = self.items.iloc[index, 2]
+        landmarks_target = np.array(list(map(float, landmarks_target.split(","))), dtype=np.float32).reshape(
+            self.landmark_num, 2)
+        scale = float(self.items.iloc[index, 3])
+        center_w, center_h = float(self.items.iloc[index, 4]), float(self.items.iloc[index, 5])
+        if len(self.items.iloc[index]) > 6:
+            tags = np.array(list(map(lambda x: int(float(x)), self.items.iloc[index, 6].split(","))))
+        else:
+            tags = np.array([])
+        # image & keypoints alignment
+        image_path = image_path.replace('\\', '/')
+        # wflw testset
+        image_path = image_path.replace(
+            '//msr-facestore/Workspace/MSRA_EP_Allergan/users/yanghuan/training_data/wflw/rawImages/', '')
+        # trainset
+        image_path = image_path.replace('./rawImages/', '')
+        image_path = os.path.join(self.image_dir, image_path)
+        # image path
+        sample["image_path"] = image_path
+        img = self._load_image(image_path)  # HWC, BGR, [0, 255]
+        assert img is not None
+        # augmentation
+        # landmarks_target = [-0.5, edge-0.5]
+        img, landmarks_target, matrix = \
+            self.augmentation.process(img, landmarks_target, landmarks_5pts, scale, center_w, center_h)
+        landmarks = self._norm_points(torch.from_numpy(landmarks_target), self.image_height, self.image_width)
+        sample["label"] = [landmarks, ]
+        if self.use_AAM:
+            pointmap = self.encoder.generate_heatmap(landmarks_target)
+            edgemap = self._generate_edgemap(landmarks_target)
+            sample["label"] += [pointmap, edgemap]
+        sample['matrix'] = matrix
+        # image normalization
+        img = img.transpose(2, 0, 1).astype(np.float32)  # CHW, BGR, [0, 255]
+        img[0, :, :] = (img[0, :, :] - self.means[0]) * self.scale
+        img[1, :, :] = (img[1, :, :] - self.means[1]) * self.scale
+        img[2, :, :] = (img[2, :, :] - self.means[2]) * self.scale
+        sample["data"] = torch.from_numpy(img)  # CHW, BGR, [-1, 1]
+        sample["tags"] = tags
+        return sample

external/landmark_detection/lib/dataset/augmentation.py ADDED Viewed

	@@ -0,0 +1,355 @@

+import os
+import cv2
+import math
+import random
+import numpy as np
+from skimage import transform
+class Augmentation:
+    def __init__(self,
+                 is_train=True,
+                 aug_prob=1.0,
+                 image_size=256,
+                 crop_op=True,
+                 std_lmk_5pts=None,
+                 target_face_scale=1.0,
+                 flip_rate=0.5,
+                 flip_mapping=None,
+                 random_shift_sigma=0.05,
+                 random_rot_sigma=math.pi/180*18,
+                 random_scale_sigma=0.1,
+                 random_gray_rate=0.2,
+                 random_occ_rate=0.4,
+                 random_blur_rate=0.3,
+                 random_gamma_rate=0.2,
+                 random_nose_fusion_rate=0.2):
+        self.is_train = is_train
+        self.aug_prob = aug_prob
+        self.crop_op = crop_op
+        self._flip = Flip(flip_mapping, flip_rate)
+        if self.crop_op:
+            self._cropMatrix = GetCropMatrix(
+                                    image_size=image_size,
+                                    target_face_scale=target_face_scale,
+                                    align_corners=True)
+        else:
+            self._alignMatrix = GetAlignMatrix(
+                                    image_size=image_size,
+                                    target_face_scale=target_face_scale,
+                                    std_lmk_5pts=std_lmk_5pts)
+        self._randomGeometryMatrix = GetRandomGeometryMatrix(
+                                        target_shape=(image_size, image_size),
+                                        from_shape=(image_size, image_size),
+                                        shift_sigma=random_shift_sigma,
+                                        rot_sigma=random_rot_sigma,
+                                        scale_sigma=random_scale_sigma,
+                                        align_corners=True)
+        self._transform = Transform(image_size=image_size)
+        self._randomTexture = RandomTexture(
+                                random_gray_rate=random_gray_rate,
+                                random_occ_rate=random_occ_rate,
+                                random_blur_rate=random_blur_rate,
+                                random_gamma_rate=random_gamma_rate,
+                                random_nose_fusion_rate=random_nose_fusion_rate)
+    def process(self, img, lmk, lmk_5pts=None, scale=1.0, center_w=0, center_h=0, is_train=True):
+        if self.is_train and random.random() < self.aug_prob:
+            img, lmk, lmk_5pts, center_w, center_h = self._flip.process(img, lmk, lmk_5pts, center_w, center_h)
+            matrix_geoaug = self._randomGeometryMatrix.process()
+            if self.crop_op:
+                matrix_pre = self._cropMatrix.process(scale, center_w, center_h)
+            else:
+                matrix_pre = self._alignMatrix.process(lmk_5pts)
+            matrix = matrix_geoaug @ matrix_pre
+            aug_img, aug_lmk = self._transform.process(img, lmk, matrix)
+            aug_img = self._randomTexture.process(aug_img)
+        else:
+            if self.crop_op:
+                matrix = self._cropMatrix.process(scale, center_w, center_h)
+            else:
+                matrix = self._alignMatrix.process(lmk_5pts)
+            aug_img, aug_lmk = self._transform.process(img, lmk, matrix)
+        return aug_img, aug_lmk, matrix
+class GetCropMatrix:
+    def __init__(self, image_size, target_face_scale, align_corners=False):
+        self.image_size = image_size
+        self.target_face_scale = target_face_scale
+        self.align_corners = align_corners
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def process(self, scale, center_w, center_h):
+        if self.align_corners:
+            to_w, to_h = self.image_size-1, self.image_size-1
+        else:
+            to_w, to_h = self.image_size, self.image_size
+        rot_mu = 0
+        scale_mu = self.image_size / (scale * self.target_face_scale * 200.0)
+        shift_xy_mu = (0, 0)
+        matrix = self._compose_rotate_and_scale(
+            rot_mu, scale_mu, shift_xy_mu,
+            from_center=[center_w, center_h],
+            to_center=[to_w/2.0, to_h/2.0])
+        return matrix
+class GetAlignMatrix:
+    def __init__(self, image_size, target_face_scale, std_lmk_5pts):
+        """
+        points in std_lmk_5pts range from -1 to 1.
+        """
+        self.std_lmk_5pts = (std_lmk_5pts * target_face_scale + 1) * \
+            np.array([image_size, image_size], np.float32) / 2.0
+    def process(self, lmk_5pts):
+        assert lmk_5pts.shape[-2:] == (5, 2)
+        tform = transform.SimilarityTransform()
+        tform.estimate(lmk_5pts, self.std_lmk_5pts)
+        return tform.params
+class GetRandomGeometryMatrix:
+    def __init__(self, target_shape, from_shape,
+                 shift_sigma=0.1, rot_sigma=18*math.pi/180, scale_sigma=0.1,
+                 shift_mu=0.0, rot_mu=0.0, scale_mu=1.0,
+                 shift_normal=True, rot_normal=True, scale_normal=True,
+                 align_corners=False):
+        self.target_shape = target_shape
+        self.from_shape = from_shape
+        self.shift_config = (shift_mu, shift_sigma, shift_normal)
+        self.rot_config = (rot_mu, rot_sigma, rot_normal)
+        self.scale_config = (scale_mu, scale_sigma, scale_normal)
+        self.align_corners = align_corners
+    def _compose_rotate_and_scale(self, angle, scale, shift_xy, from_center, to_center):
+        cosv = math.cos(angle)
+        sinv = math.sin(angle)
+        fx, fy = from_center
+        tx, ty = to_center
+        acos = scale * cosv
+        asin = scale * sinv
+        a0 = acos
+        a1 = -asin
+        a2 = tx - acos * fx + asin * fy + shift_xy[0]
+        b0 = asin
+        b1 = acos
+        b2 = ty - asin * fx - acos * fy + shift_xy[1]
+        rot_scale_m = np.array([
+            [a0, a1, a2],
+            [b0, b1, b2],
+            [0.0, 0.0, 1.0]
+        ], np.float32)
+        return rot_scale_m
+    def _random(self, mu_sigma_normal, size=None):
+        mu, sigma, is_normal = mu_sigma_normal
+        if is_normal:
+            return np.random.normal(mu, sigma, size=size)
+        else:
+            return np.random.uniform(low=mu-sigma, high=mu+sigma, size=size)
+    def process(self):
+        if self.align_corners:
+            from_w, from_h = self.from_shape[1]-1, self.from_shape[0]-1
+            to_w, to_h = self.target_shape[1]-1, self.target_shape[0]-1
+        else:
+            from_w, from_h = self.from_shape[1], self.from_shape[0]
+            to_w, to_h = self.target_shape[1], self.target_shape[0]
+        if self.shift_config[:2] != (0.0, 0.0) or \
+           self.rot_config[:2] != (0.0, 0.0) or \
+           self.scale_config[:2] != (1.0, 0.0):
+            shift_xy = self._random(self.shift_config, size=[2]) * \
+                min(to_h, to_w)
+            rot_angle = self._random(self.rot_config)
+            scale = self._random(self.scale_config)
+            matrix_geoaug = self._compose_rotate_and_scale(
+                rot_angle, scale, shift_xy,
+                from_center=[from_w/2.0, from_h/2.0],
+                to_center=[to_w/2.0, to_h/2.0])
+        return matrix_geoaug
+class Transform:
+    def __init__(self, image_size):
+        self.image_size = image_size
+    def _transformPoints2D(self, points, matrix):
+        """
+        points (nx2), matrix (3x3) -> points (nx2)
+        """
+        dtype = points.dtype
+        # nx3
+        points = np.concatenate([points, np.ones_like(points[:, [0]])], axis=1)
+        points = points @ np.transpose(matrix)
+        points = points[:, :2] / points[:, [2, 2]]
+        return points.astype(dtype)
+    def _transformPerspective(self, image, matrix):
+        """
+        image, matrix3x3 -> transformed_image
+        """
+        return cv2.warpPerspective(
+            image, matrix,
+            dsize=(self.image_size, self.image_size),
+            flags=cv2.INTER_LINEAR, borderValue=0)
+    def process(self, image, landmarks, matrix):
+        t_landmarks = self._transformPoints2D(landmarks, matrix)
+        t_image = self._transformPerspective(image, matrix)
+        return t_image, t_landmarks
+class RandomTexture:
+    def __init__(self, random_gray_rate=0, random_occ_rate=0, random_blur_rate=0, random_gamma_rate=0, random_nose_fusion_rate=0):
+        self.random_gray_rate = random_gray_rate
+        self.random_occ_rate = random_occ_rate
+        self.random_blur_rate = random_blur_rate
+        self.random_gamma_rate = random_gamma_rate
+        self.random_nose_fusion_rate = random_nose_fusion_rate
+        self.texture_augs = (
+            (self.add_occ, self.random_occ_rate),
+            (self.add_blur, self.random_blur_rate),
+            (self.add_gamma, self.random_gamma_rate),
+            (self.add_nose_fusion, self.random_nose_fusion_rate)
+        )
+    def add_gray(self, image):
+        assert image.ndim == 3 and image.shape[-1] == 3
+        image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+        image = np.tile(np.expand_dims(image, -1), [1, 1, 3])
+        return image
+    def add_occ(self, image):
+        h, w, c = image.shape
+        rh = 0.2 + 0.6 * random.random() # [0.2, 0.8]
+        rw = rh - 0.2 + 0.4 * random.random()
+        cx = int((h - 1) * random.random())
+        cy = int((w - 1) * random.random())
+        dh = int(h / 2 * rh)
+        dw = int(w / 2 * rw)
+        x0 = max(0, cx - dw // 2)
+        y0 = max(0, cy - dh // 2)
+        x1 = min(w - 1, cx + dw // 2)
+        y1 = min(h - 1, cy + dh // 2)
+        image[y0:y1+1, x0:x1+1] = 0
+        return image
+    def add_blur(self, image):
+        blur_kratio = 0.05 * random.random()
+        blur_ksize = int((image.shape[0] + image.shape[1]) / 2 * blur_kratio)
+        if blur_ksize > 1:
+            image = cv2.blur(image, (blur_ksize, blur_ksize))
+        return image
+    def add_gamma(self, image):
+        if random.random() < 0.5:
+            gamma = 0.25 + 0.75 * random.random()
+        else:
+            gamma = 1.0 + 3.0 * random.random()
+        image = (((image / 255.0) ** gamma) * 255).astype("uint8")
+        return image
+    def add_nose_fusion(self, image):
+        h, w, c = image.shape
+        nose = np.array(bytearray(os.urandom(h * w * c)), dtype=image.dtype).reshape(h, w, c)
+        alpha = 0.5 * random.random()
+        image = (1 - alpha) * image + alpha * nose
+        return image.astype(np.uint8)
+    def process(self, image):
+        image = image.copy()
+        if random.random() < self.random_occ_rate:
+            image = self.add_occ(image)
+        if random.random() < self.random_blur_rate:
+            image = self.add_blur(image)
+        if random.random() < self.random_gamma_rate:
+            image = self.add_gamma(image)
+        if random.random() < self.random_nose_fusion_rate:
+            image = self.add_nose_fusion(image)
+        """
+        orders = list(range(len(self.texture_augs)))
+        random.shuffle(orders)
+        for order in orders:
+            if random.random() < self.texture_augs[order][1]:
+                image = self.texture_augs[order][0](image)
+        """
+        if random.random() < self.random_gray_rate:
+            image = self.add_gray(image)
+        return image
+class Flip:
+    def __init__(self, flip_mapping, random_rate):
+        self.flip_mapping = flip_mapping
+        self.random_rate = random_rate
+    def process(self, image, landmarks, landmarks_5pts, center_w, center_h):
+        if random.random() >= self.random_rate or self.flip_mapping is None:
+            return image, landmarks, landmarks_5pts, center_w, center_h
+        # COFW
+        if landmarks.shape[0] == 29:
+            flip_offset = 0
+        # 300W, WFLW
+        elif landmarks.shape[0] in (68, 98):
+            flip_offset = -1
+        else:
+            flip_offset = -1
+        h, w, _ = image.shape
+        #image_flip = cv2.flip(image, 1)
+        image_flip = np.fliplr(image).copy()
+        landmarks_flip = landmarks.copy()
+        for i, j in self.flip_mapping:
+            landmarks_flip[i] = landmarks[j]
+            landmarks_flip[j] = landmarks[i]
+        landmarks_flip[:, 0] = w + flip_offset - landmarks_flip[:, 0]
+        if landmarks_5pts is not None:
+            flip_mapping = ([0, 1], [3, 4])
+            landmarks_5pts_flip = landmarks_5pts.copy()
+            for i, j in flip_mapping:
+                landmarks_5pts_flip[i] = landmarks_5pts[j]
+                landmarks_5pts_flip[j] = landmarks_5pts[i]
+            landmarks_5pts_flip[:, 0] = w + flip_offset - landmarks_5pts_flip[:, 0]
+        else:
+            landmarks_5pts_flip = None
+        center_w = w + flip_offset - center_w
+        return image_flip, landmarks_flip, landmarks_5pts_flip, center_w, center_h

external/landmark_detection/lib/dataset/decoder/__init__.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from .decoder_default import decoder_default
+def get_decoder(decoder_type='default'):
+    if decoder_type == 'default':
+        decoder = decoder_default()
+    else:
+        raise NotImplementedError
+    return decoder

external/landmark_detection/lib/dataset/decoder/decoder_default.py ADDED Viewed

	@@ -0,0 +1,38 @@

+import torch
+class decoder_default:
+    def __init__(self, weight=1, use_weight_map=False):
+        self.weight = weight
+        self.use_weight_map = use_weight_map
+    def _make_grid(self, h, w):
+        yy, xx = torch.meshgrid(
+            torch.arange(h).float() / (h - 1) * 2 - 1,
+            torch.arange(w).float() / (w - 1) * 2 - 1)
+        return yy, xx
+    def get_coords_from_heatmap(self, heatmap):
+        """
+            inputs:
+            - heatmap: batch x npoints x h x w
+            outputs:
+            - coords: batch x npoints x 2 (x,y), [-1, +1]
+            - radius_sq: batch x npoints
+        """
+        batch, npoints, h, w = heatmap.shape
+        if self.use_weight_map:
+            heatmap = heatmap * self.weight
+        yy, xx = self._make_grid(h, w)
+        yy = yy.view(1, 1, h, w).to(heatmap)
+        xx = xx.view(1, 1, h, w).to(heatmap)
+        heatmap_sum = torch.clamp(heatmap.sum([2, 3]), min=1e-6)
+        yy_coord = (yy * heatmap).sum([2, 3]) / heatmap_sum  # batch x npoints
+        xx_coord = (xx * heatmap).sum([2, 3]) / heatmap_sum  # batch x npoints
+        coords = torch.stack([xx_coord, yy_coord], dim=-1)
+        return coords