Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 2 additions & 5 deletions selfdrive/modeld/SConscript
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,12 @@ frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
for usbgpu in [False, True] if USBGPU else [False]:
target_pkl_path = File(modeld_pkl_path(usbgpu)).abspath
file_prefix, cmd_flags = ('big_', usbgpu_tg_flags) if usbgpu else ('', tg_flags)
driving_onnx_deps = [p for m in [f'{file_prefix}driving_vision', f'{file_prefix}driving_on_policy', f'{file_prefix}driving_off_policy']
for p in get_existing_chunks(File(f"models/{m}.onnx").abspath)]
driving_onnx_deps = get_existing_chunks(File(f"models/{file_prefix}driving.onnx").abspath)
camera_res_args = ' '.join(f'{cw}x{ch}' for cw, ch in CAMERA_CONFIGS)
cmd = (f'{cmd_flags} {mac_brew_string} python3 {modeld_dir}/compile_modeld.py '
f'--model-size {model_w}x{model_h} '
f'--camera-resolutions {camera_res_args} '
f'--vision-onnx {File(f"models/{file_prefix}driving_vision.onnx").abspath} '
f'--off-policy-onnx {File(f"models/{file_prefix}driving_off_policy.onnx").abspath} '
f'--on-policy-onnx {File(f"models/{file_prefix}driving_on_policy.onnx").abspath} '
f'--driving-onnx {File(f"models/{file_prefix}driving.onnx").abspath} '
f'--output {target_pkl_path} --frame-skip {frame_skip}')
onnx_sizes_sum = sum(os.path.getsize(f) for f in driving_onnx_deps)
chunk_targets = get_chunk_targets(target_pkl_path, estimate_pickle_max_size(onnx_sizes_sum))
Expand Down
65 changes: 27 additions & 38 deletions selfdrive/modeld/compile_modeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,8 +113,8 @@ def frame_prepare_tinygrad(input_frame, M_inv):
return frame_prepare_tinygrad


def make_warp_input_queues(vision_input_shapes, frame_skip, device):
img = vision_input_shapes['img'] # (1, 12, 128, 256)
def make_warp_input_queues(model_input_shapes, frame_skip, device):
img = model_input_shapes['img'] # (1, 12, 128, 256)
n_frames = img[1] // 6
img_buf_shape = (frame_skip * (n_frames - 1) + 1, 6, img[2], img[3])

Expand All @@ -130,14 +130,13 @@ def make_warp_input_queues(vision_input_shapes, frame_skip, device):
return input_queues, npy


def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, device):
input_queues, npy = make_warp_input_queues(vision_input_shapes, frame_skip, device)
def make_input_queues(model_input_shapes, frame_skip, device):
input_queues, npy = make_warp_input_queues(model_input_shapes, frame_skip, device)

fb = policy_input_shapes['features_buffer'] # (1, 25, 512)
dp = policy_input_shapes['desire_pulse'] # (1, 25, 8)
tc = policy_input_shapes['traffic_convention'] # (1, 2)
#TODO action_t is hardcoded to match tc for future compatibility
at = tc
fb = model_input_shapes['features_buffer'] # full model-run feature queue, time-major
dp = model_input_shapes['desire_pulse'] # sampled temporal desire input, (1, T, 8)
tc = model_input_shapes['traffic_convention'] # (1, 2)
at = model_input_shapes['action_t'] # (1, 2)

policy_npy = {
'desire': np.zeros(dp[2], dtype=np.float32),
Expand All @@ -146,7 +145,7 @@ def make_input_queues(vision_input_shapes, policy_input_shapes, frame_skip, devi
}
npy.update(policy_npy)
input_queues.update({
'feat_q': Tensor(np.zeros((frame_skip * (fb[1] - 1) + 1, fb[0], fb[2]), dtype=np.float32), device=device).contiguous().realize(),
'feat_q': Tensor(np.zeros(fb, dtype=np.float32), device=device).contiguous().realize(),
'desire_q': Tensor(np.zeros((frame_skip * dp[1], dp[0], dp[2]), dtype=np.float32), device=device).contiguous().realize(),
**{k: Tensor(v, device='NPY').realize() for k, v in policy_npy.items()},
})
Expand Down Expand Up @@ -183,31 +182,28 @@ def warp_enqueue(img_q, big_img_q, tfm, big_tfm, frame, big_frame):
return warp_enqueue


def make_run_policy(model_runners, model_metadata, frame_skip):
def make_run_policy(model_runner, vision_features_slice, frame_skip):
sample_desire_fn = partial(sample_desire, frame_skip=frame_skip)
sample_skip_fn = partial(sample_skip, frame_skip=frame_skip)
vision_features_slice = model_metadata['vision']['output_slices']['hidden_state']

def run_policy(img, big_img, feat_q, desire_q, desire, traffic_convention, action_t):
desire = desire.to(Device.DEFAULT)
traffic_convention = traffic_convention.to(Device.DEFAULT)
action_t = action_t.to(Device.DEFAULT)
Tensor.realize(desire, traffic_convention, action_t)
desire_buf = shift_and_sample(desire_q, desire.reshape(1, 1, -1), sample_desire_fn)
vision_out = next(iter(model_runners['vision']({'img': img, 'big_img': big_img}).values())).cast('float32')

new_feat = vision_out[:, vision_features_slice].reshape(1, -1).unsqueeze(0)
feat_buf = shift_and_sample(feat_q, new_feat, sample_skip_fn)

inputs = {
'features_buffer': feat_buf,
'img': img,
'big_img': big_img,
'features_buffer': feat_q,
'desire_pulse': desire_buf,
'traffic_convention': traffic_convention,
'action_t': action_t,
}
on_policy_out = next(iter(model_runners['on_policy'](inputs).values())).cast('float32')
off_policy_out = next(iter(model_runners['off_policy'](inputs).values())).cast('float32')
return vision_out, on_policy_out, off_policy_out
model_out = next(iter(model_runner(inputs).values())).cast('float32')
new_feat = model_out[:, vision_features_slice].reshape(1, -1).unsqueeze(0)
feat_q.assign(feat_q[1:].cat(new_feat, dim=0).contiguous()).realize()
return model_out,
return run_policy


Expand Down Expand Up @@ -277,38 +273,31 @@ def read_file_chunked_to_shm(path):
p.add_argument('--model-size', type=_parse_size, required=True, help='model input WxH')
p.add_argument('--camera-resolutions', type=_parse_size, nargs='+', required=True,
help='camera resolutions WxH (one or more)')
p.add_argument('--vision-onnx', required=True)
p.add_argument('--off-policy-onnx', required=True)
p.add_argument('--on-policy-onnx', required=True)
p.add_argument('--driving-onnx', required=True)
p.add_argument('--output', required=True)
p.add_argument('--frame-skip', type=int, required=True)
args = p.parse_args()

model_paths = {
'vision': read_file_chunked_to_shm(args.vision_onnx),
'off_policy': read_file_chunked_to_shm(args.off_policy_onnx),
'on_policy': read_file_chunked_to_shm(args.on_policy_onnx),
}
model_path = read_file_chunked_to_shm(args.driving_onnx)
model_w, model_h = args.model_size

model_runners = {name: OnnxRunner(path) for name, path in model_paths.items()}
out = {'metadata': {name: make_metadata_dict(path) for name, path in model_paths.items()}}

assert out['metadata']['off_policy']['input_shapes'] == out['metadata']['on_policy']['input_shapes']
model_runner = OnnxRunner(model_path)
model_metadata = make_metadata_dict(model_path)
model_input_shapes = model_metadata['input_shapes']
out = {'metadata': {'driving': model_metadata}}

run_policy_jit = TinyJit(make_run_policy(model_runners, out['metadata'], args.frame_skip), prune=True)
run_policy_jit = TinyJit(make_run_policy(model_runner, model_metadata['output_slices']['hidden_state'], args.frame_skip), prune=True)

make_policy_queues = partial(make_input_queues, out['metadata']['vision']['input_shapes'],
out['metadata']['on_policy']['input_shapes'], args.frame_skip)
make_random_model_inputs = partial(make_random_images, keys=['img', 'big_img'], shape=out['metadata']['vision']['input_shapes']['img'])
make_policy_queues = partial(make_input_queues, model_input_shapes, args.frame_skip)
make_random_model_inputs = partial(make_random_images, keys=['img', 'big_img'], shape=model_input_shapes['img'])
out['run_policy'] = compile_jit(run_policy_jit, make_random_model_inputs, POLICY_INPUTS,
make_policy_queues)

for cam_w, cam_h in args.camera_resolutions:
nv12 = NV12Frame(cam_w, cam_h, *get_nv12_info(cam_w, cam_h))
make_random_warp_inputs = partial(make_random_images, keys=['frame', 'big_frame'], shape=nv12.size, device=WARP_DEV)
warp_enqueue = TinyJit(make_warp(nv12, model_w, model_h, args.frame_skip), prune=True)
make_warp_queues = partial(make_warp_input_queues, out['metadata']['vision']['input_shapes'], args.frame_skip)
make_warp_queues = partial(make_warp_input_queues, model_input_shapes, args.frame_skip)
out[(cam_w,cam_h)] = compile_jit(warp_enqueue, make_random_warp_inputs, WARP_INPUTS, make_warp_queues)

with open(args.output, "wb") as f:
Expand Down
36 changes: 12 additions & 24 deletions selfdrive/modeld/modeld.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,33 +79,22 @@ def __init__(self, cam_w: int, cam_h: int, usbgpu: bool):
input_devices = get_tg_input_devices(PROCESS_NAME, usbgpu)
self.WARP_DEV, self.QUEUE_DEV = input_devices['WARP_DEV'], input_devices['QUEUE_DEV']
jits = pickle.loads(read_file_chunked(modeld_pkl_path(usbgpu)))
vision_metadata = jits['metadata']['vision']
self.vision_input_shapes = vision_metadata['input_shapes']
self.vision_input_names = list(self.vision_input_shapes.keys())
self.vision_output_slices = vision_metadata['output_slices']

off_policy_metadata = jits['metadata']['off_policy']
self.off_policy_output_slices = off_policy_metadata['output_slices']

policy_metadata = jits['metadata']['on_policy']
self.policy_input_shapes = policy_metadata['input_shapes']
self.policy_output_slices = policy_metadata['output_slices']
model_metadata = jits['metadata']['driving']
model_input_shapes = model_metadata['input_shapes']
self.vision_input_names = [name for name in model_input_shapes if 'img' in name]
self.output_slices = model_metadata['output_slices']

self.prev_desire = np.zeros(ModelConstants.DESIRE_LEN, dtype=np.float32)

self.frame_skip = ModelConstants.MODEL_RUN_FREQ // ModelConstants.MODEL_CONTEXT_FREQ
self.input_queues, self.npy = make_input_queues(self.vision_input_shapes, self.policy_input_shapes, self.frame_skip, device=self.QUEUE_DEV)
self.input_queues, self.npy = make_input_queues(model_input_shapes, self.frame_skip, device=self.QUEUE_DEV)
self.full_frames: dict[str, Tensor] = {}
self._blob_cache: dict[int, Tensor] = {}
self.parser = Parser()
self.frame_buf_params = {k: get_nv12_info(cam_w, cam_h) for k in ('img', 'big_img')}
self.run_policy = jits['run_policy']
self.warp_enqueue = jits[(cam_w,cam_h)]

def slice_outputs(self, model_outputs: np.ndarray, output_slices: dict[str, slice]) -> dict[str, np.ndarray]:
parsed_model_outputs = {k: model_outputs[np.newaxis, v] for k,v in output_slices.items()}
return parsed_model_outputs

def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
inputs: dict[str, np.ndarray], prepare_only: bool) -> dict[str, np.ndarray] | None:
for key in bufs.keys():
Expand All @@ -131,20 +120,19 @@ def run(self, bufs: dict[str, VisionBuf], transforms: dict[str, np.ndarray],
if prepare_only:
return None

vision_output, on_policy_output, off_policy_output = self.run_policy(
model_output, = self.run_policy(
**{k: self.input_queues[k] for k in POLICY_INPUTS if k in self.input_queues}, img=img, big_img=big_img
)

vision_output = vision_output.numpy().flatten()
off_policy_output = off_policy_output.numpy().flatten()
on_policy_output = on_policy_output.numpy().flatten()
vision_outputs_dict = self.parser.parse_vision_outputs(self.slice_outputs(vision_output, self.vision_output_slices))
off_policy_outputs_dict = self.parser.parse_off_policy_outputs(self.slice_outputs(off_policy_output, self.off_policy_output_slices))
policy_outputs_dict = self.parser.parse_policy_outputs(self.slice_outputs(on_policy_output, self.policy_output_slices))
model_output = model_output.numpy().flatten()
parsed_outputs = {k: model_output[np.newaxis, v] for k,v in self.output_slices.items()}
vision_outputs_dict = self.parser.parse_vision_outputs(parsed_outputs)
off_policy_outputs_dict = self.parser.parse_off_policy_outputs(parsed_outputs)
policy_outputs_dict = self.parser.parse_policy_outputs(parsed_outputs)
combined_outputs_dict = {**vision_outputs_dict, **off_policy_outputs_dict, **policy_outputs_dict}

if SEND_RAW_PRED:
combined_outputs_dict['raw_pred'] = np.concatenate([vision_output.copy(), on_policy_output.copy(), off_policy_output.copy()])
combined_outputs_dict['raw_pred'] = model_output.copy()
return combined_outputs_dict


Expand Down
17 changes: 8 additions & 9 deletions selfdrive/modeld/models/README.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
## Neural networks in openpilot
To view the architecture of the ONNX networks, you can use [netron](https://netron.app/)

## Driving Model (vision model + temporal policy model)
### Vision inputs (Full size: 799906 x float32)
## Driving Model
`driving.onnx` and `big_driving.onnx` each contain the vision model, off-policy temporal policy, and on-policy temporal policy in one graph.

### Inputs
* **image stream**
* Two consecutive images (256 * 512 * 3 in RGB) recorded at 20 Hz : 393216 = 2 * 6 * 128 * 256
* Each 256 * 512 image is represented in YUV420 with 6 channels : 6 * 128 * 256
Expand All @@ -15,20 +17,17 @@ To view the architecture of the ONNX networks, you can use [netron](https://netr
* Channels 0,1,2,3 represent the full-res Y channel and are represented in numpy as Y[::2, ::2], Y[::2, 1::2], Y[1::2, ::2], and Y[1::2, 1::2]
* Channel 4 represents the half-res U channel
* Channel 5 represents the half-res V channel
### Policy inputs
* **desire**
* one-hot encoded buffer to command model to execute certain actions, bit needs to be sent for the past 5 seconds (at 20FPS) : 100 * 8
* **traffic convention**
* one-hot encoded vector to tell model whether traffic is right-hand or left-hand traffic : 2
* **lateral control params**
* speed and steering delay for predicting the desired curvature: 2
* **previous desired curvatures**
* vector of previously predicted desired curvatures: 100 * 1
* **action t**
* lateral and longitudinal action times: 2
* **feature buffer**
* a buffer of intermediate features including the current feature to form a 5 seconds temporal context (at 20FPS) : 100 * 512
* a full-rate queue of previous intermediate vision features; the graph appends the current feature and samples the temporal policy context


### Driving Model output format (Full size: XXX x float32)
### Output
Refer to **slice_outputs** and **parse_vision_outputs/parse_policy_outputs** in modeld.


Expand Down
3 changes: 3 additions & 0 deletions selfdrive/modeld/models/big_driving.onnx
Git LFS file not shown
3 changes: 0 additions & 3 deletions selfdrive/modeld/models/big_driving_off_policy.onnx

This file was deleted.

3 changes: 0 additions & 3 deletions selfdrive/modeld/models/big_driving_on_policy.onnx

This file was deleted.

3 changes: 0 additions & 3 deletions selfdrive/modeld/models/big_driving_vision.onnx

This file was deleted.

3 changes: 3 additions & 0 deletions selfdrive/modeld/models/driving.onnx
Git LFS file not shown
3 changes: 0 additions & 3 deletions selfdrive/modeld/models/driving_off_policy.onnx

This file was deleted.

3 changes: 0 additions & 3 deletions selfdrive/modeld/models/driving_on_policy.onnx

This file was deleted.

3 changes: 0 additions & 3 deletions selfdrive/modeld/models/driving_vision.onnx

This file was deleted.

Loading