commaai · haraschax · May 21, 2026 · May 22, 2026 · May 22, 2026 · May 22, 2026
diff --git a/selfdrive/modeld/SConscript b/selfdrive/modeld/SConscript
@@ -19,9 +19,8 @@ def get_camera_configs():
     "tizi": (_ar_ox_fisheye.width, _ar_ox_fisheye.height),
     "mici": (_os_fisheye.width, _os_fisheye.height),
   }
-  if release or PC or 'CI' in os.environ:
-    return set(DEVICE_RESOLUTIONS.values())
-  return [DEVICE_RESOLUTIONS[HARDWARE.get_device_type()]]
+  return set(DEVICE_RESOLUTIONS.values())
+
 
 CAMERA_CONFIGS = get_camera_configs()
 
@@ -48,7 +47,7 @@ if 'CUDA' in available:
   tg_flags = f'DEV={tg_backend}'
 elif 'QCOM' in available:
   tg_backend = 'QCOM'
-  tg_flags = f'DEV={tg_backend} IMAGE=1 FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
+  tg_flags = f'DEV={tg_backend} IMAGE=1 FLOAT16=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
 else:
   tg_backend = 'CPU'
   tg_flags = f'DEV=CPU' if arch == 'Darwin' else 'DEV=CPU:LLVM'
@@ -121,12 +120,12 @@ lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_file
 
 dm_w, dm_h = DM_INPUT_SIZE
 compile_dm_warp_script = [File(f"{modeld_dir}/compile_dm_warp.py")]
-for cam_w, cam_h in CAMERA_CONFIGS:
-  dm_pkl_path = File(f"models/dm_warp_{cam_w}x{cam_h}_tinygrad.pkl").abspath
-  cmd = (f'{tg_flags} {mac_brew_string} python3 {modeld_dir}/compile_dm_warp.py '
-         f'--camera-resolution {cam_w}x{cam_h} --warp-to {dm_w}x{dm_h} '
-         f'--output {dm_pkl_path}')
-  lenv.Command(dm_pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [tg_devices_node], cmd)
+dm_pkl_path = File("models/dm_warp_tinygrad.pkl").abspath
+camera_res_args = ' '.join(f'{cw}x{ch}' for cw, ch in CAMERA_CONFIGS)
+cmd = (f'{tg_flags} {mac_brew_string} python3 {modeld_dir}/compile_dm_warp.py '
+       f'--camera-resolutions {camera_res_args} --warp-to {dm_w}x{dm_h} '
+       f'--output {dm_pkl_path}')
+lenv.Command(dm_pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [Value(camera_res_args), tg_devices_node], cmd)
 
 def tg_compile(flags, model_name):
   pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'

diff --git a/selfdrive/modeld/camera_frame.py b/selfdrive/modeld/camera_frame.py
@@ -0,0 +1,4 @@
+from collections import namedtuple
+
+
+NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])
diff --git a/selfdrive/modeld/compile_dm_warp.py b/selfdrive/modeld/compile_dm_warp.py
@@ -8,49 +8,54 @@
 from tinygrad.engine.jit import TinyJit
 
 from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
-from openpilot.selfdrive.modeld.compile_modeld import NV12Frame, warp_perspective_tinygrad, _parse_size
+from openpilot.selfdrive.modeld.compile_modeld import NV12Frame, bind_camera_vars, make_camera_vars, warp_perspective_tinygrad, _parse_size
 
 
-def make_warp_dm(nv12: NV12Frame, dm_w, dm_h):
-  cam_w, cam_h, stride, _, _, _ = nv12
-  stride_pad = stride - cam_w
-
-  def warp_dm(input_frame, M_inv):
+def make_warp_dm(dm_w, dm_h):
+  def warp_dm(input_frame, M_inv, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset):
     M_inv = M_inv.to(Device.DEFAULT).realize()
-    return warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv,
-                                     (dm_w, dm_h), (cam_h, cam_w), stride_pad, border_fill_val=0).reshape(-1, dm_h * dm_w)
+    return warp_perspective_tinygrad(input_frame, M_inv, (dm_w, dm_h),
+                                     (cam_h, cam_w), stride, border_fill_val=0).reshape(-1, dm_h * dm_w)
   return warp_dm
 
 
-def compile_dm_warp(nv12: NV12Frame, dm_w, dm_h, pkl_path):
-  print(f"Compiling DM warp for {nv12.width}x{nv12.height} -> {dm_w}x{dm_h}...")
+def compile_dm_warp(camera_configs: list[NV12Frame], dm_w, dm_h, pkl_path):
+  print(f"Compiling DM warp for {len(camera_configs)} camera sizes -> {dm_w}x{dm_h}...")
 
-  warp_dm_jit = TinyJit(make_warp_dm(nv12, dm_w, dm_h), prune=True)
+  camera_vars, max_frame_size = make_camera_vars(camera_configs)
+  warp_dm_jit = TinyJit(make_warp_dm(dm_w, dm_h), prune=True)
 
   for i in range(10):
-    frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
+    nv12 = camera_configs[i % len(camera_configs)]
+    frame = Tensor.randint(max_frame_size, low=0, high=256, dtype='uint8').realize()
     M_inv = Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')
     Device.default.synchronize()
     st = time.perf_counter()
-    warp_dm_jit(frame, M_inv).realize()
+    warp_dm_jit(frame, M_inv, **bind_camera_vars(camera_vars, nv12)).realize()
     mt = time.perf_counter()
     Device.default.synchronize()
     et = time.perf_counter()
-    print(f"  [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
+    print(f"  [{i+1}/10] {nv12.width}x{nv12.height} enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
 
   with open(pkl_path, "wb") as f:
-    pickle.dump(warp_dm_jit, f)
+    pickle.dump({
+      'warp': warp_dm_jit,
+      'camera_configs': {nv12[:2]: nv12 for nv12 in camera_configs},
+      'max_frame_size': max_frame_size,
+    }, f)
   print(f"  Saved to {pkl_path}")
 
 
 if __name__ == "__main__":
   p = argparse.ArgumentParser()
-  p.add_argument('--camera-resolution', type=_parse_size, required=True, help='camera resolution WxH')
+  p.add_argument('--camera-resolution', type=_parse_size, help='camera resolution WxH')
+  p.add_argument('--camera-resolutions', type=_parse_size, nargs='+', help='camera resolutions WxH (one or more)')
   p.add_argument('--warp-to', type=_parse_size, required=True, help='DM input WxH')
   p.add_argument('--output', required=True)
   args = p.parse_args()
 
-  cam_w, cam_h = args.camera_resolution
-  nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
+  camera_resolutions = args.camera_resolutions or ([args.camera_resolution] if args.camera_resolution else None)
+  assert camera_resolutions is not None, "one of --camera-resolution or --camera-resolutions is required"
+  camera_configs = [NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h)) for cam_w, cam_h in camera_resolutions]
   dm_w, dm_h = args.warp_to
-  compile_dm_warp(nv12, dm_w, dm_h, args.output)
+  compile_dm_warp(camera_configs, dm_w, dm_h, args.output)
diff --git a/selfdrive/modeld/compile_modeld.py b/selfdrive/modeld/compile_modeld.py
@@ -5,7 +5,7 @@
 import pickle
 import time
 from functools import partial
-from collections import namedtuple, defaultdict
+from collections import defaultdict
 
 import numpy as np
 
@@ -26,23 +26,50 @@ def fetch_fw(path, name, sha256):
 _patch_tinygrad_fetch_fw()
 
 from tinygrad.tensor import Tensor
+from tinygrad import Variable
 from tinygrad.helpers import Context
 from tinygrad.device import Device
 from tinygrad.engine.jit import TinyJit
 
 from openpilot.common.file_chunker import read_file_chunked
-from openpilot.system.hardware.hw import Paths
+from openpilot.selfdrive.modeld.camera_frame import NV12Frame
 
 
-NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])
-
 UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
 UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)
 
 WARP_DEV = os.getenv('WARP_DEV')
 
 
-def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad, border_fill_val=None):
+def make_camera_vars(camera_configs: list[NV12Frame]):
+  max_cam_w = max(nv12.width for nv12 in camera_configs)
+  max_cam_h = max(nv12.height for nv12 in camera_configs)
+  max_stride = max(nv12.stride for nv12 in camera_configs)
+  max_uv_offset = max(nv12.stride * nv12.y_height for nv12 in camera_configs)
+  max_frame_size = max(nv12.size for nv12 in camera_configs)
+  return {
+    'cam_w': Variable('cam_w', 1, max_cam_w),
+    'cam_h': Variable('cam_h', 1, max_cam_h),
+    'chroma_w': Variable('chroma_w', 1, max_cam_w // 2),
+    'chroma_h': Variable('chroma_h', 1, max_cam_h // 2),
+    'stride': Variable('stride', 1, max_stride),
+    'uv_offset': Variable('uv_offset', 1, max_uv_offset),
+  }, max_frame_size
+
+
+def bind_camera_vars(camera_vars, nv12: NV12Frame):
+  values = {
+    'cam_w': nv12.width,
+    'cam_h': nv12.height,
+    'chroma_w': nv12.width // 2,
+    'chroma_h': nv12.height // 2,
+    'stride': nv12.stride,
+    'uv_offset': nv12.stride * nv12.y_height,
+  }
+  return {k: v.bind(values[k]) for k, v in camera_vars.items()}
+
+
+def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride, src_offset=0, x_step=1, channel=0, border_fill_val=None):
   w_dst, h_dst = dst_shape
   h_src, w_src = src_shape
 
@@ -61,7 +88,7 @@ def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad,
   y_round = Tensor.round(src_y)
   x_nn_clipped = x_round.clip(0, w_src - 1).cast('int')
   y_nn_clipped = y_round.clip(0, h_src - 1).cast('int')
-  idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
+  idx = y_nn_clipped * stride + x_nn_clipped * x_step + src_offset + channel
   sampled = src_flat[idx]
 
   if border_fill_val is None:
@@ -84,26 +111,18 @@ def frames_to_tensor(frames):
   return in_img1
 
 
-def make_frame_prepare(nv12: NV12Frame, model_w, model_h):
-  cam_w, cam_h, stride, y_height, uv_height, _ = nv12
-  uv_offset = stride * y_height
-  stride_pad = stride - cam_w
-
-  def frame_prepare_tinygrad(input_frame, M_inv):
+def make_frame_prepare(model_w, model_h):
+  def frame_prepare_tinygrad(input_frame, M_inv, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset):
     # UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
     M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]], device=WARP_DEV)
-    # deinterleave NV12 UV plane (UVUV... -> separate U, V)
-    uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
     with Context(SPLIT_REDUCEOP=0):
-      y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
-                                    M_inv, (model_w, model_h),
-                                    (cam_h, cam_w), stride_pad).realize()
-      u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
-                                    M_inv_uv, (model_w//2, model_h//2),
-                                    (cam_h//2, cam_w//2), 0).realize()
-      v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
-                                    M_inv_uv, (model_w//2, model_h//2),
-                                    (cam_h//2, cam_w//2), 0).realize()
+      y = warp_perspective_tinygrad(input_frame, M_inv, (model_w, model_h),
+                                    (cam_h, cam_w), stride).realize()
+      # Gather directly from interleaved NV12 UV memory so symbolic widths avoid step=2 slicing.
+      u = warp_perspective_tinygrad(input_frame, M_inv_uv, (model_w//2, model_h//2),
+                                    (chroma_h, chroma_w), stride, uv_offset, x_step=2, channel=0).realize()
+      v = warp_perspective_tinygrad(input_frame, M_inv_uv, (model_w//2, model_h//2),
+                                    (chroma_h, chroma_w), stride, uv_offset, x_step=2, channel=1).realize()
     yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
     tensor = frames_to_tensor(yuv)
     return tensor
@@ -148,21 +167,22 @@ def sample_desire(buf, frame_skip):
   return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0)
 
 
-def make_run_policy(vision_runner, policy_runner, nv12: NV12Frame, model_w, model_h,
+def make_run_policy(vision_runner, policy_runner, model_w, model_h,
                     vision_features_slice, frame_skip, prepare_only=False):
-  frame_prepare = make_frame_prepare(nv12, model_w, model_h)
+  frame_prepare = make_frame_prepare(model_w, model_h)
   sample_skip_fn = partial(sample_skip, frame_skip=frame_skip)
   sample_desire_fn = partial(sample_desire, frame_skip=frame_skip)
 
-  def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame):
+  def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm,
+                 frame, big_frame, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset):
     tfm = tfm.to(WARP_DEV)
     big_tfm = big_tfm.to(WARP_DEV)
     desire = desire.to(Device.DEFAULT)
     traffic_convention = traffic_convention.to(Device.DEFAULT)
     Tensor.realize(tfm, big_tfm, desire, traffic_convention)
 
-    warped_frame = frame_prepare(frame, tfm).unsqueeze(0).to(Device.DEFAULT)
-    warped_big_frame = frame_prepare(big_frame, big_tfm).unsqueeze(0).to(Device.DEFAULT)
+    warped_frame = frame_prepare(frame, tfm, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset).unsqueeze(0).to(Device.DEFAULT)
+    warped_big_frame = frame_prepare(big_frame, big_tfm, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset).unsqueeze(0).to(Device.DEFAULT)
     img = shift_and_sample(img_q, warped_frame, sample_skip_fn)
     big_img = shift_and_sample(big_img_q, warped_big_frame, sample_skip_fn)
 
@@ -182,21 +202,24 @@ def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, t
   return run_policy
 
 
-def compile_modeld(nv12: NV12Frame, model_w, model_h, prepare_only, frame_skip,
+def compile_modeld(camera_configs: list[NV12Frame], model_w, model_h, prepare_only, frame_skip,
                    vision_runner, policy_runner, vision_metadata, policy_metadata):
-  print(f"Compiling combined policy JIT for {nv12.width}x{nv12.height} (prepare_only={prepare_only})...")
+  print(f"Compiling combined policy JIT for {len(camera_configs)} camera sizes (prepare_only={prepare_only})...")
 
   vision_features_slice = vision_metadata['output_slices']['hidden_state']
   vision_input_shapes = vision_metadata['input_shapes']
   policy_input_shapes = policy_metadata['input_shapes']
 
-  _run = make_run_policy(vision_runner, policy_runner, nv12, model_w, model_h,
+  camera_vars, max_frame_size = make_camera_vars(camera_configs)
+  max_nv12 = max(camera_configs, key=lambda n: n.size)
+
+  _run = make_run_policy(vision_runner, policy_runner, model_w, model_h,
                          vision_features_slice, frame_skip, prepare_only)
   run_policy_jit = TinyJit(_run, prune=True)
 
   SEED = 42
 
-  def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_match=True):
+  def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_match=True, camera_config=None):
     input_queues, npy = make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, Device.DEFAULT)
     np.random.seed(seed)
     Tensor.manual_seed(seed)
@@ -205,17 +228,19 @@ def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_matc
     n_runs = 1 if testing else 3
 
     for i in range(n_runs):
-      frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
-      big_frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
+      nv12 = camera_config or camera_configs[0]
+      camera_args = bind_camera_vars(camera_vars, nv12)
+      frame = Tensor.randint(max_frame_size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
+      big_frame = Tensor.randint(max_frame_size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
       for v in npy.values():
         v[:] = np.random.randn(*v.shape).astype(v.dtype)
       Device.default.synchronize()
       st = time.perf_counter()
-      outs = fn(**input_queues, frame=frame, big_frame=big_frame)
+      outs = fn(**input_queues, frame=frame, big_frame=big_frame, **camera_args)
       mt = time.perf_counter()
       Device.default.synchronize()
       et = time.perf_counter()
-      print(f"  [{i+1}/{n_runs}] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
+      print(f"  [{i+1}/{n_runs}] {nv12.width}x{nv12.height} enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
 
       if i == 0:
         val = [np.copy(v.numpy()) for v in outs]
@@ -236,6 +261,11 @@ def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_matc
   run_policy_jit = pickle.loads(pickle.dumps(run_policy_jit))
   random_inputs_run_fn(run_policy_jit, SEED, test_val, test_buffers, expect_match=True)
   random_inputs_run_fn(run_policy_jit, SEED+1, test_val, test_buffers, expect_match=False)
+  for i, nv12 in enumerate(camera_configs[1:]):
+    print(f'symbolic replay {nv12.width}x{nv12.height}')
+    random_inputs_run_fn(run_policy_jit, SEED+2+i, camera_config=nv12)
+  run_policy_jit.max_frame_size = max_frame_size
+  run_policy_jit.max_camera_size = (max_nv12.width, max_nv12.height)
   return run_policy_jit
 
 
@@ -245,6 +275,8 @@ def _parse_size(s):
 
 
 def read_file_chunked_to_shm(path):
+  from openpilot.system.hardware.hw import Paths
+
   shm_path = os.path.join(Paths.shm_path(), os.path.basename(path))
   atexit.register(lambda: os.path.exists(shm_path) and os.remove(shm_path))
   with open(shm_path, 'wb') as f:
@@ -274,14 +306,15 @@ def read_file_chunked_to_shm(path):
   out['metadata']['vision'] = make_metadata_dict(vision_path)
   out['metadata']['policy'] = make_metadata_dict(policy_path)
 
-  for cam_w, cam_h in args.camera_resolutions:
-    nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
-    model_w, model_h = args.model_size
-    out[(cam_w,cam_h)] = {
-      name: compile_modeld(nv12, model_w, model_h, prepare_only, args.frame_skip,
-                           vision_runner, policy_runner, out['metadata']['vision'], out['metadata']['policy'])
-      for name, prepare_only in [('warp_enqueue', True), ('run_policy', False)]
-    }
+  camera_configs = [NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h)) for cam_w, cam_h in args.camera_resolutions]
+  model_w, model_h = args.model_size
+  out['camera_configs'] = {nv12[:2]: nv12 for nv12 in camera_configs}
+  out['max_frame_size'] = max(nv12.size for nv12 in camera_configs)
+  out['symbolic'] = {
+    name: compile_modeld(camera_configs, model_w, model_h, prepare_only, args.frame_skip,
+                         vision_runner, policy_runner, out['metadata']['vision'], out['metadata']['policy'])
+    for name, prepare_only in [('warp_enqueue', True), ('run_policy', False)]
+  }
 
   with open(args.output, "wb") as f:
     pickle.dump(out, f)
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,4 @@
		from collections import namedtuple


		NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])