Walter0807 · rdai324 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026 · Apr 6, 2026
diff --git a/configs/pose3d/MB_ft_h36m_global_lite.yaml b/configs/pose3d/MB_ft_h36m_global_lite.yaml
@@ -44,7 +44,7 @@ lambda_av: 0.0
 
 # Augmentation
 synthetic: False
-flip: True
+flip: False
 mask_ratio: 0.
 mask_T_ratio: 0.
 noise: False
diff --git a/configs/pretrain/MB_lite.yaml b/configs/pretrain/MB_lite.yaml
@@ -1,17 +1,18 @@
 # General  
-train_2d: True
-no_eval: False
-finetune: False
+train_2d: False
+no_eval: True
+eval_last: False
+finetune: True
 partial_train: null
 
 # Traning 
-epochs: 90
-checkpoint_frequency: 30
-batch_size: 64 
+epochs: 10
+checkpoint_frequency: 1
+batch_size: 16
 dropout: 0.0
-learning_rate: 0.0005
-weight_decay: 0.01
-lr_decay: 0.99
+learning_rate: 0.00025
+weight_decay: 0.02
+lr_decay: 0.98
 pretrain_3d_curriculum: 30
 
 # Model
@@ -24,16 +25,16 @@ num_heads: 8
 att_fuse: True
 
 # Data
-data_root: data/motion3d/MB3D_f243s81/
-subset_list: [AMASS, H36M-SH]
+data_root: ../DL_Project/pose_3d_v3/
+subset_list: [frame_81]
 dt_file: h36m_sh_conf_cam_source_final.pkl
 clip_len: 243
 data_stride: 81
-rootrel: True
+rootrel: False
 sample_stride: 1
 num_joints: 17
 no_conf: False
-gt_2d: False
+gt_2d: True
 
 # Loss
 lambda_3d_velocity: 20.0
@@ -44,10 +45,10 @@ lambda_a: 0.0
 lambda_av: 0.0
 
 # Augmentation
-synthetic: True   # synthetic: don't use 2D detection results, fake it (from 3D)
-flip: True
-mask_ratio: 0.05
-mask_T_ratio: 0.1
-noise: True
+synthetic: False   # synthetic: don't use 2D detection results, fake it (from 3D)
+flip: False
+mask_ratio: 0.0
+mask_T_ratio: 0
+noise: False
 noise_path: params/synthetic_noise.pth
 d2c_params_path: params/d2c_params.pkl
diff --git a/infer_wild.py b/infer_wild.py
@@ -12,6 +12,9 @@
 from lib.data.dataset_wild import WildDetDataset
 from lib.utils.vismo import render_and_save
 
+import os
+os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE'
+
 def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument("--config", type=str, default="configs/pose3d/MB_ft_h36m_global_lite.yaml", help="Path to the config file.")
@@ -28,23 +31,28 @@ def parse_args():
 opts = parse_args()
 args = get_config(opts.config)
 
+print('Loading checkpoint', opts.evaluate)
+checkpoint = torch.load(opts.evaluate, map_location=lambda storage, loc: storage)
+
 model_backbone = load_backbone(args)
 if torch.cuda.is_available():
     model_backbone = nn.DataParallel(model_backbone)
     model_backbone = model_backbone.cuda()
+    model_backbone.load_state_dict(checkpoint['model_pos'], strict=True)
+else:
+    state_dict = checkpoint['model_pos']
+    state_dict = {k.replace('module.', '', 1): v for k, v in state_dict.items()}
+    model_backbone.load_state_dict(state_dict, strict=True)
 
-print('Loading checkpoint', opts.evaluate)
-checkpoint = torch.load(opts.evaluate, map_location=lambda storage, loc: storage)
-model_backbone.load_state_dict(checkpoint['model_pos'], strict=True)
 model_pos = model_backbone
 model_pos.eval()
 testloader_params = {
           'batch_size': 1,
           'shuffle': False,
-          'num_workers': 8,
+          'num_workers': 0,
           'pin_memory': True,
-          'prefetch_factor': 4,
-          'persistent_workers': True,
+        #   'prefetch_factor': 4,
+        #   'persistent_workers': True,
           'drop_last': False
 }
 
@@ -81,7 +89,8 @@ def parse_args():
         if args.rootrel:
             predicted_3d_pos[:,:,0,:]=0                    # [N,T,17,3]
         else:
-            predicted_3d_pos[:,0,0,2]=0
+            # predicted_3d_pos[:,0,0,2]=0
+            predicted_3d_pos[:, :, 0, 2] = 0
             pass
         if args.gt_2d:
             predicted_3d_pos[...,:2] = batch_input[...,:2]

diff --git a/infer_wild_mesh.py b/infer_wild_mesh.py
@@ -61,7 +61,10 @@ def solve_scale(x, y):
 # root_rel
 # args.rootrel = True
 
-smpl = SMPL(args.data_root, batch_size=1).cuda()
+if torch.cuda.is_available():
+    smpl = SMPL(args.data_root, batch_size=1).cuda()
+else:
+    smpl = SMPL(args.data_root, batch_size=1)
 J_regressor = smpl.J_regressor_h36m
 
 end = time.time()
@@ -71,14 +74,20 @@ def solve_scale(x, y):
 model = MeshRegressor(args, backbone=model_backbone, dim_rep=args.dim_rep, hidden_dim=args.hidden_dim, dropout_ratio=args.dropout)
 print(f'init whole model time: {(time.time()-end):02f}s')
 
+chk_filename = opts.evaluate if opts.evaluate else opts.resume
+print('Loading checkpoint', chk_filename)
+checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
+
 if torch.cuda.is_available():
     model = nn.DataParallel(model)
     model = model.cuda()
+    model.load_state_dict(checkpoint['model'], strict=True)
+else:
+    state_dict = checkpoint['model']
+    state_dict = {k.replace('module.', '', 1): v for k, v in state_dict.items()}
+    model.load_state_dict(state_dict, strict=True)
+
 
-chk_filename = opts.evaluate if opts.evaluate else opts.resume
-print('Loading checkpoint', chk_filename)
-checkpoint = torch.load(chk_filename, map_location=lambda storage, loc: storage)
-model.load_state_dict(checkpoint['model'], strict=True)
 model.eval()
 
 testloader_params = {

diff --git a/lib/data/dataset_motion_3d.py b/lib/data/dataset_motion_3d.py
@@ -8,7 +8,7 @@
 from torch.utils.data import Dataset, DataLoader
 from lib.data.augmentation import Augmenter3D
 from lib.utils.tools import read_pkl
-from lib.utils.utils_data import flip_data
+from lib.utils.utils_data import flip_data, crop_scale
 
 class MotionDataset(Dataset):
     def __init__(self, args, subset_list, data_split): # data_split: train/test
@@ -38,31 +38,73 @@ def __init__(self, args, subset_list, data_split):
         self.synthetic = args.synthetic
         self.aug = Augmenter3D(args)
         self.gt_2d = args.gt_2d
+
+    def __coco_to_h36m(self, kps_coco, n_dim=2):
+        """
+        kps_coco: np.array of shape (N, 17, 2) in COCO format
+        returns:  np.array of shape (N, 17, 2) in H36M format
+        """
+        # this is the same function from `motionbert/motionbert_convert.ipynb` in the DL_project repo
+        N = kps_coco.shape[0]
+        h36m = np.zeros((N, 17, n_dim), dtype=np.float32)
+
+        # Synthesize H36M joints not present in COCO
+        # H36M[0] = Hip root = average of left and right hips
+        h36m[:, 0] = (kps_coco[:, 11] + kps_coco[:, 12]) / 2  # (LHip + RHip) / 2
+
+        # H36M[7] = Spine mid = average of hip root and neck
+        neck = (kps_coco[:, 5] + kps_coco[:, 6]) / 2  # avg of shoulders as proxy
+        h36m[:, 7] = (h36m[:, 0] + neck) / 2
+
+        # H36M[8] = Thorax/Neck = average of shoulders
+        h36m[:, 8] = neck
+
+        # H36M[10] = Head top = average of ears (or use nose as fallback)
+        h36m[:, 10] = (kps_coco[:, 3] + kps_coco[:, 4]) / 2  # avg of ears
+
+        # Direct mappings (COCO index -> H36M index)
+        h36m[:, 1]  = kps_coco[:, 12]  # RHip
+        h36m[:, 2]  = kps_coco[:, 14]  # RKnee
+        h36m[:, 3]  = kps_coco[:, 16]  # RAnkle
+        h36m[:, 4]  = kps_coco[:, 11]  # LHip
+        h36m[:, 5]  = kps_coco[:, 13]  # LKnee
+        h36m[:, 6]  = kps_coco[:, 15]  # LAnkle
+        h36m[:, 9]  = kps_coco[:, 0]   # Nose
+        h36m[:, 11] = kps_coco[:, 5]   # LShoulder
+        h36m[:, 12] = kps_coco[:, 7]   # LElbow
+        h36m[:, 13] = kps_coco[:, 9]   # LWrist
+        h36m[:, 14] = kps_coco[:, 6]   # RShoulder
+        h36m[:, 15] = kps_coco[:, 8]   # RElbow
+        h36m[:, 16] = kps_coco[:, 10]  # RWrist
+
+        return h36m
 
     def __getitem__(self, index):
         'Generates one sample of data'
         # Select sample
         file_path = self.file_list[index]
         motion_file = read_pkl(file_path)
-        motion_3d = motion_file["data_label"]  
+        motion_3d = self.__coco_to_h36m(motion_file["data_label"], 3)  
         if self.data_split=="train":
             if self.synthetic or self.gt_2d:
                 motion_3d = self.aug.augment3D(motion_3d)
                 motion_2d = np.zeros(motion_3d.shape, dtype=np.float32)
                 motion_2d[:,:,:2] = motion_3d[:,:,:2]
                 motion_2d[:,:,2] = 1                        # No 2D detection, use GT xy and c=1.
             elif motion_file["data_input"] is not None:     # Have 2D detection 
-                motion_2d = motion_file["data_input"]
+                motion_2d = self.__coco_to_h36m(motion_file["data_input"], 3)
+                motion_2d = crop_scale(motion_2d) 
                 if self.flip and random.random() > 0.5:                        # Training augmentation - random flipping
                     motion_2d = flip_data(motion_2d)
                     motion_3d = flip_data(motion_3d)
             else:
                 raise ValueError('Training illegal.') 
         elif self.data_split=="test":                                           
-            motion_2d = motion_file["data_input"]
+            motion_2d = self.__coco_to_h36m(motion_file["data_input"], 3)
+            motion_2d = crop_scale(motion_2d) 
             if self.gt_2d:
                 motion_2d[:,:,:2] = motion_3d[:,:,:2]
                 motion_2d[:,:,2] = 1
         else:
-            raise ValueError('Data split unknown.')    
+            raise ValueError('Data split unknown.')   
         return torch.FloatTensor(motion_2d), torch.FloatTensor(motion_3d)
diff --git a/lib/data/dataset_wild.py b/lib/data/dataset_wild.py
@@ -44,6 +44,9 @@ def halpe2h36m(x):
     {25, "RHeel"},
     '''
     T, V, C = x.shape
+    if V == 17:
+        return x
+
     y = np.zeros([T,17,C])
     y[:,0,:] = x[:,19,:]
     y[:,1,:] = x[:,12,:]

diff --git a/lib/utils/vismo.py b/lib/utils/vismo.py
@@ -263,12 +263,12 @@ def motion2video_3d(motion, save_path, fps=25, keep_imgs = False):
         ax.set_xlim(-512, 0)
         ax.set_ylim(-256, 256)
         ax.set_zlim(-512, 0)
-        # ax.set_xlabel('X')
-        # ax.set_ylabel('Y')
-        # ax.set_zlabel('Z')
+        ax.set_xlabel('X')
+        ax.set_ylabel('Y')
+        ax.set_zlabel('Z')
         ax.view_init(elev=12., azim=80)
-        plt.tick_params(left = False, right = False , labelleft = False ,
-                        labelbottom = False, bottom = False)
+        plt.tick_params(left = False, right = False , labelleft = True ,
+                        labelbottom = True, bottom = False)
         for i in range(len(joint_pairs)):
             limb = joint_pairs[i]
             xs, ys, zs = [np.array([j3d[limb[0], j], j3d[limb[1], j]]) for j in range(3)]