Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 9 additions & 10 deletions selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ def get_camera_configs():
"tizi": (_ar_ox_fisheye.width, _ar_ox_fisheye.height),
"mici": (_os_fisheye.width, _os_fisheye.height),
}
if release or PC or 'CI' in os.environ:
return set(DEVICE_RESOLUTIONS.values())
return [DEVICE_RESOLUTIONS[HARDWARE.get_device_type()]]
return set(DEVICE_RESOLUTIONS.values())


CAMERA_CONFIGS = get_camera_configs()

Expand All @@ -48,7 +47,7 @@ if 'CUDA' in available:
tg_flags = f'DEV={tg_backend}'
elif 'QCOM' in available:
tg_backend = 'QCOM'
tg_flags = f'DEV={tg_backend} IMAGE=1 FLOAT16=1 NOLOCALS=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
tg_flags = f'DEV={tg_backend} IMAGE=1 FLOAT16=1 JIT_BATCH_SIZE=0 OPENPILOT_HACKS=1'
else:
tg_backend = 'CPU'
tg_flags = f'DEV=CPU' if arch == 'Darwin' else 'DEV=CPU:LLVM'
Expand Down Expand Up @@ -121,12 +120,12 @@ lenv.Command(fn + "_metadata.pkl", [fn + ".onnx"] + tinygrad_files + script_file

dm_w, dm_h = DM_INPUT_SIZE
compile_dm_warp_script = [File(f"{modeld_dir}/compile_dm_warp.py")]
for cam_w, cam_h in CAMERA_CONFIGS:
dm_pkl_path = File(f"models/dm_warp_{cam_w}x{cam_h}_tinygrad.pkl").abspath
cmd = (f'{tg_flags} {mac_brew_string} python3 {modeld_dir}/compile_dm_warp.py '
f'--camera-resolution {cam_w}x{cam_h} --warp-to {dm_w}x{dm_h} '
f'--output {dm_pkl_path}')
lenv.Command(dm_pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [tg_devices_node], cmd)
dm_pkl_path = File("models/dm_warp_tinygrad.pkl").abspath
camera_res_args = ' '.join(f'{cw}x{ch}' for cw, ch in CAMERA_CONFIGS)
cmd = (f'{tg_flags} {mac_brew_string} python3 {modeld_dir}/compile_dm_warp.py '
f'--camera-resolutions {camera_res_args} --warp-to {dm_w}x{dm_h} '
f'--output {dm_pkl_path}')
lenv.Command(dm_pkl_path, tinygrad_files + compile_dm_warp_script + compile_modeld_script + [Value(camera_res_args), tg_devices_node], cmd)

def tg_compile(flags, model_name):
pythonpath_string = 'PYTHONPATH="${PYTHONPATH}:' + env.Dir("#tinygrad_repo").abspath + '"'
Expand Down
4 changes: 4 additions & 0 deletions selfdrive/modeld/camera_frame.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from collections import namedtuple


NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])
43 changes: 24 additions & 19 deletions selfdrive/modeld/compile_dm_warp.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,49 +8,54 @@
from tinygrad.engine.jit import TinyJit

from openpilot.system.camerad.cameras.nv12_info import get_nv12_info
from openpilot.selfdrive.modeld.compile_modeld import NV12Frame, warp_perspective_tinygrad, _parse_size
from openpilot.selfdrive.modeld.compile_modeld import NV12Frame, bind_camera_vars, make_camera_vars, warp_perspective_tinygrad, _parse_size


def make_warp_dm(nv12: NV12Frame, dm_w, dm_h):
cam_w, cam_h, stride, _, _, _ = nv12
stride_pad = stride - cam_w

def warp_dm(input_frame, M_inv):
def make_warp_dm(dm_w, dm_h):
def warp_dm(input_frame, M_inv, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset):
M_inv = M_inv.to(Device.DEFAULT).realize()
return warp_perspective_tinygrad(input_frame[:cam_h*stride], M_inv,
(dm_w, dm_h), (cam_h, cam_w), stride_pad, border_fill_val=0).reshape(-1, dm_h * dm_w)
return warp_perspective_tinygrad(input_frame, M_inv, (dm_w, dm_h),
(cam_h, cam_w), stride, border_fill_val=0).reshape(-1, dm_h * dm_w)
return warp_dm


def compile_dm_warp(nv12: NV12Frame, dm_w, dm_h, pkl_path):
print(f"Compiling DM warp for {nv12.width}x{nv12.height} -> {dm_w}x{dm_h}...")
def compile_dm_warp(camera_configs: list[NV12Frame], dm_w, dm_h, pkl_path):
print(f"Compiling DM warp for {len(camera_configs)} camera sizes -> {dm_w}x{dm_h}...")

warp_dm_jit = TinyJit(make_warp_dm(nv12, dm_w, dm_h), prune=True)
camera_vars, max_frame_size = make_camera_vars(camera_configs)
warp_dm_jit = TinyJit(make_warp_dm(dm_w, dm_h), prune=True)

for i in range(10):
frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8').realize()
nv12 = camera_configs[i % len(camera_configs)]
frame = Tensor.randint(max_frame_size, low=0, high=256, dtype='uint8').realize()
M_inv = Tensor(Tensor.randn(3, 3).mul(8).realize().numpy(), device='NPY')
Device.default.synchronize()
st = time.perf_counter()
warp_dm_jit(frame, M_inv).realize()
warp_dm_jit(frame, M_inv, **bind_camera_vars(camera_vars, nv12)).realize()
mt = time.perf_counter()
Device.default.synchronize()
et = time.perf_counter()
print(f" [{i+1}/10] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
print(f" [{i+1}/10] {nv12.width}x{nv12.height} enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")

with open(pkl_path, "wb") as f:
pickle.dump(warp_dm_jit, f)
pickle.dump({
'warp': warp_dm_jit,
'camera_configs': {nv12[:2]: nv12 for nv12 in camera_configs},
'max_frame_size': max_frame_size,
}, f)
print(f" Saved to {pkl_path}")


if __name__ == "__main__":
p = argparse.ArgumentParser()
p.add_argument('--camera-resolution', type=_parse_size, required=True, help='camera resolution WxH')
p.add_argument('--camera-resolution', type=_parse_size, help='camera resolution WxH')
p.add_argument('--camera-resolutions', type=_parse_size, nargs='+', help='camera resolutions WxH (one or more)')
p.add_argument('--warp-to', type=_parse_size, required=True, help='DM input WxH')
p.add_argument('--output', required=True)
args = p.parse_args()

cam_w, cam_h = args.camera_resolution
nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
camera_resolutions = args.camera_resolutions or ([args.camera_resolution] if args.camera_resolution else None)
assert camera_resolutions is not None, "one of --camera-resolution or --camera-resolutions is required"
camera_configs = [NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h)) for cam_w, cam_h in camera_resolutions]
dm_w, dm_h = args.warp_to
compile_dm_warp(nv12, dm_w, dm_h, args.output)
compile_dm_warp(camera_configs, dm_w, dm_h, args.output)
121 changes: 77 additions & 44 deletions selfdrive/modeld/compile_modeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pickle
import time
from functools import partial
from collections import namedtuple, defaultdict
from collections import defaultdict

import numpy as np

Expand All @@ -26,23 +26,50 @@ def fetch_fw(path, name, sha256):
_patch_tinygrad_fetch_fw()

from tinygrad.tensor import Tensor
from tinygrad import Variable
from tinygrad.helpers import Context
from tinygrad.device import Device
from tinygrad.engine.jit import TinyJit

from openpilot.common.file_chunker import read_file_chunked
from openpilot.system.hardware.hw import Paths
from openpilot.selfdrive.modeld.camera_frame import NV12Frame


NV12Frame = namedtuple("NV12Frame", ['width', 'height', 'stride', 'y_height', 'uv_height', 'size'])

UV_SCALE_MATRIX = np.array([[0.5, 0, 0], [0, 0.5, 0], [0, 0, 1]], dtype=np.float32)
UV_SCALE_MATRIX_INV = np.linalg.inv(UV_SCALE_MATRIX)

WARP_DEV = os.getenv('WARP_DEV')


def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad, border_fill_val=None):
def make_camera_vars(camera_configs: list[NV12Frame]):
max_cam_w = max(nv12.width for nv12 in camera_configs)
max_cam_h = max(nv12.height for nv12 in camera_configs)
max_stride = max(nv12.stride for nv12 in camera_configs)
max_uv_offset = max(nv12.stride * nv12.y_height for nv12 in camera_configs)
max_frame_size = max(nv12.size for nv12 in camera_configs)
return {
'cam_w': Variable('cam_w', 1, max_cam_w),
'cam_h': Variable('cam_h', 1, max_cam_h),
'chroma_w': Variable('chroma_w', 1, max_cam_w // 2),
'chroma_h': Variable('chroma_h', 1, max_cam_h // 2),
'stride': Variable('stride', 1, max_stride),
'uv_offset': Variable('uv_offset', 1, max_uv_offset),
}, max_frame_size


def bind_camera_vars(camera_vars, nv12: NV12Frame):
values = {
'cam_w': nv12.width,
'cam_h': nv12.height,
'chroma_w': nv12.width // 2,
'chroma_h': nv12.height // 2,
'stride': nv12.stride,
'uv_offset': nv12.stride * nv12.y_height,
}
return {k: v.bind(values[k]) for k, v in camera_vars.items()}


def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride, src_offset=0, x_step=1, channel=0, border_fill_val=None):
w_dst, h_dst = dst_shape
h_src, w_src = src_shape

Expand All @@ -61,7 +88,7 @@ def warp_perspective_tinygrad(src_flat, M_inv, dst_shape, src_shape, stride_pad,
y_round = Tensor.round(src_y)
x_nn_clipped = x_round.clip(0, w_src - 1).cast('int')
y_nn_clipped = y_round.clip(0, h_src - 1).cast('int')
idx = y_nn_clipped * (w_src + stride_pad) + x_nn_clipped
idx = y_nn_clipped * stride + x_nn_clipped * x_step + src_offset + channel
sampled = src_flat[idx]

if border_fill_val is None:
Expand All @@ -84,26 +111,18 @@ def frames_to_tensor(frames):
return in_img1


def make_frame_prepare(nv12: NV12Frame, model_w, model_h):
cam_w, cam_h, stride, y_height, uv_height, _ = nv12
uv_offset = stride * y_height
stride_pad = stride - cam_w

def frame_prepare_tinygrad(input_frame, M_inv):
def make_frame_prepare(model_w, model_h):
def frame_prepare_tinygrad(input_frame, M_inv, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset):
# UV_SCALE @ M_inv @ UV_SCALE_INV simplifies to elementwise scaling
M_inv_uv = M_inv * Tensor([[1.0, 1.0, 0.5], [1.0, 1.0, 0.5], [2.0, 2.0, 1.0]], device=WARP_DEV)
# deinterleave NV12 UV plane (UVUV... -> separate U, V)
uv = input_frame[uv_offset:uv_offset + uv_height * stride].reshape(uv_height, stride)
with Context(SPLIT_REDUCEOP=0):
y = warp_perspective_tinygrad(input_frame[:cam_h*stride],
M_inv, (model_w, model_h),
(cam_h, cam_w), stride_pad).realize()
u = warp_perspective_tinygrad(uv[:cam_h//2, :cam_w:2].flatten(),
M_inv_uv, (model_w//2, model_h//2),
(cam_h//2, cam_w//2), 0).realize()
v = warp_perspective_tinygrad(uv[:cam_h//2, 1:cam_w:2].flatten(),
M_inv_uv, (model_w//2, model_h//2),
(cam_h//2, cam_w//2), 0).realize()
y = warp_perspective_tinygrad(input_frame, M_inv, (model_w, model_h),
(cam_h, cam_w), stride).realize()
# Gather directly from interleaved NV12 UV memory so symbolic widths avoid step=2 slicing.
u = warp_perspective_tinygrad(input_frame, M_inv_uv, (model_w//2, model_h//2),
(chroma_h, chroma_w), stride, uv_offset, x_step=2, channel=0).realize()
v = warp_perspective_tinygrad(input_frame, M_inv_uv, (model_w//2, model_h//2),
(chroma_h, chroma_w), stride, uv_offset, x_step=2, channel=1).realize()
yuv = y.cat(u).cat(v).reshape((model_h * 3 // 2, model_w))
tensor = frames_to_tensor(yuv)
return tensor
Expand Down Expand Up @@ -148,21 +167,22 @@ def sample_desire(buf, frame_skip):
return buf.reshape(-1, frame_skip, *buf.shape[1:]).max(1).flatten(0, 1).unsqueeze(0)


def make_run_policy(vision_runner, policy_runner, nv12: NV12Frame, model_w, model_h,
def make_run_policy(vision_runner, policy_runner, model_w, model_h,
vision_features_slice, frame_skip, prepare_only=False):
frame_prepare = make_frame_prepare(nv12, model_w, model_h)
frame_prepare = make_frame_prepare(model_w, model_h)
sample_skip_fn = partial(sample_skip, frame_skip=frame_skip)
sample_desire_fn = partial(sample_desire, frame_skip=frame_skip)

def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm, frame, big_frame):
def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, tfm, big_tfm,
frame, big_frame, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset):
tfm = tfm.to(WARP_DEV)
big_tfm = big_tfm.to(WARP_DEV)
desire = desire.to(Device.DEFAULT)
traffic_convention = traffic_convention.to(Device.DEFAULT)
Tensor.realize(tfm, big_tfm, desire, traffic_convention)

warped_frame = frame_prepare(frame, tfm).unsqueeze(0).to(Device.DEFAULT)
warped_big_frame = frame_prepare(big_frame, big_tfm).unsqueeze(0).to(Device.DEFAULT)
warped_frame = frame_prepare(frame, tfm, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset).unsqueeze(0).to(Device.DEFAULT)
warped_big_frame = frame_prepare(big_frame, big_tfm, cam_w, cam_h, chroma_w, chroma_h, stride, uv_offset).unsqueeze(0).to(Device.DEFAULT)
img = shift_and_sample(img_q, warped_frame, sample_skip_fn)
big_img = shift_and_sample(big_img_q, warped_big_frame, sample_skip_fn)

Expand All @@ -182,21 +202,24 @@ def run_policy(img_q, big_img_q, feat_q, desire_q, desire, traffic_convention, t
return run_policy


def compile_modeld(nv12: NV12Frame, model_w, model_h, prepare_only, frame_skip,
def compile_modeld(camera_configs: list[NV12Frame], model_w, model_h, prepare_only, frame_skip,
vision_runner, policy_runner, vision_metadata, policy_metadata):
print(f"Compiling combined policy JIT for {nv12.width}x{nv12.height} (prepare_only={prepare_only})...")
print(f"Compiling combined policy JIT for {len(camera_configs)} camera sizes (prepare_only={prepare_only})...")

vision_features_slice = vision_metadata['output_slices']['hidden_state']
vision_input_shapes = vision_metadata['input_shapes']
policy_input_shapes = policy_metadata['input_shapes']

_run = make_run_policy(vision_runner, policy_runner, nv12, model_w, model_h,
camera_vars, max_frame_size = make_camera_vars(camera_configs)
max_nv12 = max(camera_configs, key=lambda n: n.size)

_run = make_run_policy(vision_runner, policy_runner, model_w, model_h,
vision_features_slice, frame_skip, prepare_only)
run_policy_jit = TinyJit(_run, prune=True)

SEED = 42

def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_match=True):
def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_match=True, camera_config=None):
input_queues, npy = make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, Device.DEFAULT)
np.random.seed(seed)
Tensor.manual_seed(seed)
Expand All @@ -205,17 +228,19 @@ def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_matc
n_runs = 1 if testing else 3

for i in range(n_runs):
frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
big_frame = Tensor.randint(nv12.size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
nv12 = camera_config or camera_configs[0]
camera_args = bind_camera_vars(camera_vars, nv12)
frame = Tensor.randint(max_frame_size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
big_frame = Tensor.randint(max_frame_size, low=0, high=256, dtype='uint8', device=WARP_DEV).realize()
for v in npy.values():
v[:] = np.random.randn(*v.shape).astype(v.dtype)
Device.default.synchronize()
st = time.perf_counter()
outs = fn(**input_queues, frame=frame, big_frame=big_frame)
outs = fn(**input_queues, frame=frame, big_frame=big_frame, **camera_args)
mt = time.perf_counter()
Device.default.synchronize()
et = time.perf_counter()
print(f" [{i+1}/{n_runs}] enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")
print(f" [{i+1}/{n_runs}] {nv12.width}x{nv12.height} enqueue {(mt-st)*1e3:6.2f} ms -- total {(et-st)*1e3:6.2f} ms")

if i == 0:
val = [np.copy(v.numpy()) for v in outs]
Expand All @@ -236,6 +261,11 @@ def random_inputs_run_fn(fn, seed, test_val=None, test_buffers=None, expect_matc
run_policy_jit = pickle.loads(pickle.dumps(run_policy_jit))
random_inputs_run_fn(run_policy_jit, SEED, test_val, test_buffers, expect_match=True)
random_inputs_run_fn(run_policy_jit, SEED+1, test_val, test_buffers, expect_match=False)
for i, nv12 in enumerate(camera_configs[1:]):
print(f'symbolic replay {nv12.width}x{nv12.height}')
random_inputs_run_fn(run_policy_jit, SEED+2+i, camera_config=nv12)
run_policy_jit.max_frame_size = max_frame_size
run_policy_jit.max_camera_size = (max_nv12.width, max_nv12.height)
return run_policy_jit


Expand All @@ -245,6 +275,8 @@ def _parse_size(s):


def read_file_chunked_to_shm(path):
from openpilot.system.hardware.hw import Paths

shm_path = os.path.join(Paths.shm_path(), os.path.basename(path))
atexit.register(lambda: os.path.exists(shm_path) and os.remove(shm_path))
with open(shm_path, 'wb') as f:
Expand Down Expand Up @@ -274,14 +306,15 @@ def read_file_chunked_to_shm(path):
out['metadata']['vision'] = make_metadata_dict(vision_path)
out['metadata']['policy'] = make_metadata_dict(policy_path)

for cam_w, cam_h in args.camera_resolutions:
nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
model_w, model_h = args.model_size
out[(cam_w,cam_h)] = {
name: compile_modeld(nv12, model_w, model_h, prepare_only, args.frame_skip,
vision_runner, policy_runner, out['metadata']['vision'], out['metadata']['policy'])
for name, prepare_only in [('warp_enqueue', True), ('run_policy', False)]
}
camera_configs = [NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h)) for cam_w, cam_h in args.camera_resolutions]
model_w, model_h = args.model_size
out['camera_configs'] = {nv12[:2]: nv12 for nv12 in camera_configs}
out['max_frame_size'] = max(nv12.size for nv12 in camera_configs)
out['symbolic'] = {
name: compile_modeld(camera_configs, model_w, model_h, prepare_only, args.frame_skip,
vision_runner, policy_runner, out['metadata']['vision'], out['metadata']['policy'])
for name, prepare_only in [('warp_enqueue', True), ('run_policy', False)]
}

with open(args.output, "wb") as f:
pickle.dump(out, f)
Expand Down
Loading
Loading