From d7c9391e36102588e1b5cc9b46d132633c9e4267 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:44:54 +0000 Subject: [PATCH 01/15] [Feature] Timeout template added --- src/finn/custom_op/fpgadataflow/hlsbackend.py | 14 ++++++ src/finn/custom_op/fpgadataflow/templates.py | 45 +++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/src/finn/custom_op/fpgadataflow/hlsbackend.py b/src/finn/custom_op/fpgadataflow/hlsbackend.py index d8210fd684..c03a9029db 100644 --- a/src/finn/custom_op/fpgadataflow/hlsbackend.py +++ b/src/finn/custom_op/fpgadataflow/hlsbackend.py @@ -474,3 +474,17 @@ def get_ap_int_max_w(self): ret = max([instream, outstream]) assert ret <= 8191, "AP_INT_MAX_W=%d is larger than allowed maximum of 8191" % ret return ret + + def timeout_value(self): + """Set timeout value for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_VALUE$"] = ["100"] + + def timeout_condition(self): + """Set timeout condition for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_CONDITION$"] = ["out_{}.empty()".format(self.hls_sname())] + + def timeout_read_stream(self): + """Set reading output stream procedure for HLS functions defined for one clock cycle""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [ + "debug_out_{} << out_{}.read();".format(self.hls_sname(), self.hls_sname()) + ] diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 3d89a0ab23..7ef74118ec 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -58,6 +58,51 @@ """ +# template for single node execution with timeout (for single clock hls operations) +docompute_template_timeout = """ +#define AP_INT_MAX_W $AP_INT_MAX_W$ +#include "cnpy.h" +#include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" +#include +#include "bnn-library.h" + +// includes for network parameters +$GLOBALS$ + +// defines for network parameters +$DEFINES$ + +int main(){ +$PRAGMAS$ + +$STREAMDECLARATIONS$ + +$READNPYDATA$ + +unsigned timeout = 0; +while(timeout < $TIMEOUT_VALUE$){ + +$DOCOMPUTE$ + +if($TIMEOUT_CONDITION$){ +timeout++; +} + +else{ +$TIMEOUT_READ_STREAM$ +timeout = 0; +} +} + +$DATAOUTSTREAM$ + +$SAVEASCNPY$ + +} + +""" + # templates for single node ip generation # cpp file From 6da0ce4d10db86f2eea3bb513164c752401956d8 Mon Sep 17 00:00:00 2001 From: mdaniowi Date: Fri, 20 Sep 2024 16:02:40 +0100 Subject: [PATCH 02/15] [Feature] npy2vectorstream.hpp include added to docompute_template --- src/finn/custom_op/fpgadataflow/templates.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/finn/custom_op/fpgadataflow/templates.py b/src/finn/custom_op/fpgadataflow/templates.py index 7ef74118ec..d2100a7516 100644 --- a/src/finn/custom_op/fpgadataflow/templates.py +++ b/src/finn/custom_op/fpgadataflow/templates.py @@ -32,6 +32,7 @@ #define AP_INT_MAX_W $AP_INT_MAX_W$ #include "cnpy.h" #include "npy2apintstream.hpp" +#include "npy2vectorstream.hpp" #include #include "bnn-library.h" From 39a2efef2fca5f356b9d32017227f1a044a0a0da Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 15:01:54 +0000 Subject: [PATCH 03/15] [Feature] Moving operations past Split added, MoveIdenticalOpPastJoinOp refactored and derived from by MoveTransposePastJoinAdd, MoveMulPastJoinAdd, MoveAddPastJoinAdd, MoveTransposePastJoinConcat, MoveAffinePastJoinConcat --- src/finn/transformation/streamline/reorder.py | 399 ++++++++++++++++-- .../test_move_identical_op_past_join_add.py | 150 +++++++ ...test_move_identical_op_past_join_concat.py | 183 ++++++++ .../test_move_identical_op_past_join_op.py | 114 ----- .../test_move_identical_op_past_split.py | 145 +++++++ 5 files changed, 839 insertions(+), 152 deletions(-) create mode 100644 tests/transformation/streamline/test_move_identical_op_past_join_add.py create mode 100644 tests/transformation/streamline/test_move_identical_op_past_join_concat.py delete mode 100644 tests/transformation/streamline/test_move_identical_op_past_join_op.py create mode 100644 tests/transformation/streamline/test_move_identical_op_past_split.py diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 9a7e9d0723..33751cb4d8 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -518,7 +518,9 @@ def apply(self, model): class MoveLinearPastEltwiseAdd(Transformation): - """Move linear operations (mul, add) past elementwise add operations where possible. + """ + DEPRECATED, use MoveAddPastJoinAdd() and MoveMulPastJoinAdd() + Move linear operations (mul, add) past elementwise add operations where possible. Specifically,matches and transforms the following patterns: (x*C) + (y*C) -> (x + y) * C (x+A) + (y+B) -> (x + y) + (A + B) @@ -918,6 +920,121 @@ def __init__(self): super().__init__(["Transpose"]) +def permute_shape(shape, perm): + new_shape = np.zeros(len(shape)) + for i, p in enumerate(perm): + new_shape[i] = shape[p] + return [int(el) for el in new_shape] + + +class MoveScalarLinearPastSplit(Transformation): + """ + Move scalar Mul and Add nodes past channel split operation. + """ + + def __init__(self): + super().__init__() + self.ops_to_move = ["Mul", "Add"] + self.fork_ops = ["Split"] + + def apply(self, model): + graph = model.graph + graph_modified = False + node_ind = 0 + for n in graph.node: + node_ind += 1 + # if n.op_type in self.fork_ops and model.is_fork_node(n): + if n.op_type in self.fork_ops: + producer = model.find_producer(n.input[0]) + if producer is not None and producer.op_type in self.ops_to_move: + linear_param = model.get_initializer(producer.input[1]) + # Check if single input + if len(producer.input) != 2 or linear_param is None: + continue + # Check if scalar + if np.prod(linear_param.shape) != 1: + continue + split_outputs = n.output + for split_output_idx, old_split_output in enumerate(split_outputs): + new_mul_node = deepcopy(producer) + new_split_output = model.make_new_valueinfo_name() + model.set_tensor_datatype( + new_split_output, model.get_tensor_datatype(producer.input[0]) + ) + + model.set_tensor_shape( + new_split_output, model.get_tensor_shape(old_split_output) + ) + + n.output[split_output_idx] = new_split_output + new_mul_node.input[0] = new_split_output + new_mul_node.output[0] = old_split_output + + graph.node.insert(node_ind, new_mul_node) + node_ind += 1 + + # remove the mul node + n.input[0] = producer.input[0] + graph.node.remove(producer) + graph_modified = True + + if graph_modified: + model = model.transform(SortGraph(), make_deepcopy=False, cleanup=False) + + return (model, graph_modified) + + +class MoveTransposePastSplit(Transformation): + def __init__(self): + super().__init__() + self.ops_to_move = ["Transpose"] + self.fork_ops = ["Split"] + + def apply(self, model): + graph = model.graph + graph_modified = False + node_ind = 0 + for n in graph.node: + node_ind += 1 + # if n.op_type in self.fork_ops and model.is_fork_node(n): + if n.op_type in self.fork_ops: + producer = model.find_producer(n.input[0]) + if producer is not None and producer.op_type in self.ops_to_move: + initial_perm = get_by_name(producer.attribute, "perm").ints + reverse_perm = np.argsort(initial_perm) + split_outputs = n.output + for split_output_idx, old_split_output in enumerate(split_outputs): + new_trans_node = deepcopy(producer) + new_split_output = model.make_new_valueinfo_name() + old_split_output_shape = model.get_tensor_shape(old_split_output) + model.set_tensor_datatype( + new_split_output, model.get_tensor_datatype(producer.input[0]) + ) + + model.set_tensor_shape( + new_split_output, permute_shape(old_split_output_shape, reverse_perm) + ) + + n.output[split_output_idx] = new_split_output + new_trans_node.input[0] = new_split_output + new_trans_node.output[0] = old_split_output + + graph.node.insert(node_ind, new_trans_node) + node_ind += 1 + + # remove the transpose node and change the split axis + old_split_axis = get_by_name(n.attribute, "axis").i + get_by_name(n.attribute, "axis").i = initial_perm[old_split_axis] + n.input[0] = producer.input[0] + graph.node.remove(producer) + graph_modified = True + + if graph_modified: + model = model.transform(SortGraph(), make_deepcopy=False, cleanup=False) + + return (model, graph_modified) + + class MoveMaxPoolPastMultiThreshold(Transformation): """Move MaxPool nodes past MultiThreshold nodes on linear segments of the graph.""" @@ -1188,13 +1305,8 @@ def apply(self, model): class MoveIdenticalOpPastJoinOp(Transformation): """ - Move identical operations on different branches past the common join node. - This transformation assumes that the identical operations only change the - data layout. For linear operations, see the transformation MoveLinearPastEltwiseAdd. - Specifically, this transformation matches and transforms the following patterns: - f(x) + f(y) -> f(x + y) - where f(.) is currently only supporting 'Transpose', and an 'Add' node is - the join node. + Move multiple identical operations on different branches past the common join node. + It assumes the shape to be preserved by the join op in the default move_node() method """ def __init__(self, identical_op_list, join_node_list): @@ -1202,52 +1314,77 @@ def __init__(self, identical_op_list, join_node_list): self.ops_to_move = identical_op_list self.join_node_op = join_node_list - def move_node(self, model, n, prod0, prod1): - # Found! move one of the identical_ops to output, remove the other one - identical_op0_in0 = prod0.input[0] - identical_op1_in0 = prod1.input[0] - add_in0 = n.input[0] - add_out = n.output[0] + def move_node(self, model, n, producers): + """ + Should be overwritten for some operations + + Returns: + bool: whether moving the node was successful + """ + identical_ops_inputs = [p.input[0] for p in producers] + # join_in0 = n.input[0] + join_out = n.output[0] - # Rewire - n.input[0] = identical_op0_in0 - n.input[1] = identical_op1_in0 + # Rewire join op inputs + for i in range(len(n.input)): + n.input[i] = identical_ops_inputs[i] # Output tensor of the join node must have the same shape as # its input tensor (original shape is preserved) - new_shape = model.get_tensor_shape(identical_op0_in0) + new_join_output = model.make_new_valueinfo_name() + new_shape = model.get_tensor_shape(identical_ops_inputs[0]) + new_layout = model.get_tensor_layout(identical_ops_inputs[0]) # Set new tensor shape - model.set_tensor_shape(tensor_name=add_in0, tensor_shape=new_shape) - - n.output[0] = add_in0 - prod0.input[0] = add_in0 - prod0.output[0] = add_out - - model.graph.node.remove(prod1) + model.set_tensor_shape(new_join_output, new_shape) + if new_layout: + model.set_tensor_layout(new_join_output, new_layout) + + # Rewire join op outputs (reuse the first join input tensor) + n.output[0] = new_join_output + producers[0].input[0] = new_join_output + producers[0].output[0] = join_out + + for prod in producers[1:]: + model.graph.node.remove(prod) + + return True + + def are_producers_identical(self, model, producers): + """ + Checks only op_types + Should be overwritten for additional checks + """ + op_types = [prod.op_type for prod in producers] + for op in op_types: + if op != op_types[0]: + return False + return True def apply(self, model): graph = model.graph graph_modified = False for n in graph.node: if n.op_type in self.join_node_op and model.is_join_node(n): - in0 = n.input[0] - in1 = n.input[1] - if in0 is None or in1 is None: + inputs = n.input + if None in inputs: continue - prod0 = model.find_producer(in0) - prod1 = model.find_producer(in1) - # Checks if the join node is preceded by - # two different, but identical operations - if prod0 == prod1: + producers = [model.find_producer(inp) for inp in inputs] + if producers[0].op_type not in self.ops_to_move: + continue + identical_ops = self.are_producers_identical(model, producers) + if not identical_ops: + warnings.warn("Producers not identical, skipping") continue - identical_op = prod0.op_type == prod1.op_type - - if identical_op and prod0.op_type in self.ops_to_move: - self.move_node(model, n, prod0, prod1) - graph_modified = True + # check for producers that are fork nodes (need to fork them before our transform) + for prod in producers: + if model.is_fork_node(prod) and not model.is_join_node(prod): + model = model.transform(MoveOpPastFork(self.ops_to_move)) + # topology modified, "ask" ModelWrapper to apply this transform again + return (model, True) + graph_modified = self.move_node(model, n, producers) if graph_modified: model = model.transform(SortGraph(), make_deepcopy=False, cleanup=False) @@ -1258,3 +1395,189 @@ def apply(self, model): class MoveTransposePastJoinAdd(MoveIdenticalOpPastJoinOp): def __init__(self): super().__init__(["Transpose"], ["Add"]) + + def are_producers_identical(self, model, producers): + if not super().are_producers_identical(model, producers): + return False + first_perm = get_by_name(producers[0].attribute, "perm").ints + for producer in producers: + if first_perm != get_by_name(producer.attribute, "perm").ints: + False + return True + + +class MoveMulPastJoinAdd(MoveIdenticalOpPastJoinOp): + def __init__(self): + super().__init__(["Mul"], ["Add"]) + + def are_producers_identical(self, model, producers): + if not super().are_producers_identical(model, producers): + return False + first_mul = model.get_initializer(producers[0].input[1]) + if first_mul is None: + return False + for producer in producers: + if first_mul != model.get_initializer(producer.input[1]): + return False + return True + + +class MoveAddPastJoinAdd(MoveIdenticalOpPastJoinOp): + def __init__(self): + super().__init__(["Add"], ["Add"]) + + def are_producers_identical(self, model, producers): + if not super().are_producers_identical(model, producers): + return False + for producer in producers: + if model.get_initializer(producer.input[1]) is None: + return False + return True + + def move_node(self, model, n, producers): + """ + We use the base move_node method to move the first producer + past the join node (and delete the rest) + """ + add_inits = [model.get_initializer(producer.input[1]) for producer in producers] + new_init = np.sum(add_inits) + model.set_initializer(producers[0].input[1], new_init) + super().move_node(model, n, producers) + + return True + + +class MoveTransposePastJoinConcat(MoveIdenticalOpPastJoinOp): + def __init__(self): + super().__init__(["Transpose"], ["Concat"]) + + def are_producers_identical(self, model, producers): + if not super().are_producers_identical(model, producers): + return False + first_perm = get_by_name(producers[0].attribute, "perm").ints + for producer in producers: + if first_perm != get_by_name(producer.attribute, "perm").ints: + False + return True + + def move_node(self, model, n, producers): + trans_inputs = [prod.input[0] for prod in producers] + # concat_in0 = n.input[0] + concat_out = n.output[0] + # Rewire concat inputs + for i in range(len(n.input)): + n.input[i] = trans_inputs[i] + + new_concat_out = model.make_new_valueinfo_name() # reuse tensor + # reverse the permutation of the concat output + transpose_perm = get_by_name(producers[0].attribute, "perm").ints + reverse_perm = np.argsort(transpose_perm) + new_concat_out_shape = permute_shape(model.get_tensor_shape(concat_out), reverse_perm) + new_concat_out_layout = model.get_tensor_layout(trans_inputs[0]) + # Set tensor layout and shape of the new concatenation output + model.set_tensor_shape(new_concat_out, new_concat_out_shape) + if new_concat_out_layout: + model.set_tensor_layout(new_concat_out, new_concat_out_layout) + # Change concatenation axis + old_concat_axis = get_by_name(n.attribute, "axis").i + get_by_name(n.attribute, "axis").i = transpose_perm[old_concat_axis] + + # Rewire concat output + n.output[0] = new_concat_out + producers[0].input[0] = new_concat_out + producers[0].output[0] = concat_out + + for prod in producers[1:]: + model.graph.node.remove(prod) + + return True + + +class MoveAffinePastJoinConcat(MoveIdenticalOpPastJoinOp): + """ + Applies to scalar linear or channelwise affine ops with the same parameter value + """ + + def __init__(self, linear_ops=["Mul", "Add"]): + super().__init__(linear_ops, ["Concat"]) + + def are_producers_identical_scalar_ops(self, model, producers): + first_param = model.get_initializer(producers[0].input[1]) + for producer in producers: + producer_param = model.get_initializer(producer.input[1]) + if (first_param != producer_param).any() or np.prod(producer_param.shape) != 1: + return False + + return True + + def are_producers_channelwise_ops(self, channel_dim, model, producers): + for producer in producers: + producer_input = producer.input[0] + num_channels = model.get_tensor_shape(producer_input)[channel_dim] + producer_param = model.get_initializer(producer.input[1]) + if ( + len(producer_param.shape) < channel_dim + or producer_param.shape[channel_dim] != num_channels + ): + return False + + return True + + def move_node(self, model, n, producers): + # check if single input + for producer in producers: + producer_init = model.get_initializer(producer.input[1]) + if len(producer.input) != 2 or producer_init is None: + warnings.warn("Producer found that is not single-input, skipping") + return False + + # decide if producers are identical scalar ops or channelwise ops + channelwise_op = False + identical_scalar_op = self.are_producers_identical_scalar_ops(model, producers) + if not identical_scalar_op: + channel_dim = get_by_name(n.attribute, "axis").i + channelwise_op = self.are_producers_channelwise_ops(channel_dim, model, producers) + if not channelwise_op: + warnings.warn( + "Producers are neither identical scalar ops nor channelwise ops, skipping" + ) + return False + + # Rewire concat inputs + producers_inputs = [prod.input[0] for prod in producers] + concat_out = n.output[0] + for i in range(len(n.input)): + n.input[i] = producers_inputs[i] + # Set tensor layout and shape of the new concatenation output + new_concat_out = model.make_new_valueinfo_name() + new_concat_out_layout = model.get_tensor_layout(producers_inputs[0]) + model.set_tensor_shape(new_concat_out, model.get_tensor_shape(concat_out)) + if new_concat_out_layout: + model.set_tensor_layout(new_concat_out, new_concat_out_layout) + model.set_tensor_datatype(new_concat_out, model.get_tensor_datatype(producers_inputs[0])) + + if channelwise_op: + # concatenate op params of producers into one mul tensor + producers_params = [model.get_initializer(prod.input[1]) for prod in producers] + new_mul_tensor = np.concatenate(producers_params, axis=channel_dim) + model.set_initializer(producers[0].input[1], new_mul_tensor) + + # Rewire concat output + n.output[0] = new_concat_out + producers[0].input[0] = new_concat_out + producers[0].output[0] = concat_out + + for prod in producers[1:]: + model.graph.node.remove(prod) + + return True + + +class MoveMulPastJoinConcat(MoveAffinePastJoinConcat): + def __init__(self): + super().__init__(["Mul"]) + + +class MoveAddPastJoinConcat(MoveAffinePastJoinConcat): + def __init__(self): + super().__init__(["Add"]) diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_add.py b/tests/transformation/streamline/test_move_identical_op_past_join_add.py new file mode 100644 index 0000000000..7226d31589 --- /dev/null +++ b/tests/transformation/streamline/test_move_identical_op_past_join_add.py @@ -0,0 +1,150 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import numpy as np +from onnx import TensorProto +from onnx import helper as oh +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model + +import finn.core.onnx_exec as oxe +from finn.transformation.streamline.reorder import ( + MoveAddPastJoinAdd, + MoveMulPastJoinAdd, + MoveTransposePastJoinAdd, +) + + +def create_add_model(identical_op): + perm = None + if "Transpose" in identical_op: + perm = identical_op.split("_")[1] + identical_op = identical_op.split("_")[0] + perm = [int(char) for char in perm] + if perm == [0, 2, 3, 1]: + in_shape = [1, 64, 10, 9] + out_shape = [1, 10, 9, 64] + elif perm == [0, 3, 1, 2]: + in_shape = [1, 10, 9, 64] + out_shape = [1, 64, 10, 9] + else: + in_shape = [1, 64, 10, 9] + out_shape = in_shape + op_value = 1.5 + + op1_node = oh.make_node(identical_op, inputs=["in1"], outputs=["op1_out"]) + + op2_node = oh.make_node(identical_op, inputs=["in2"], outputs=["op2_out"]) + + if identical_op == "Transpose": + new_attr = oh.make_attribute("perm", perm) + op1_node.attribute.append(new_attr) + op2_node.attribute.append(new_attr) + elif identical_op == "Mul" or identical_op == "Add": + op1_init = oh.make_tensor_value_info("op1_param", TensorProto.FLOAT, [1]) + op2_init = oh.make_tensor_value_info("op2_param", TensorProto.FLOAT, [1]) + op1_node.input.append(op1_init.name) + op2_node.input.append(op2_init.name) + + add_node = oh.make_node("Add", inputs=["op1_out", "op2_out"], outputs=["out_join1"]) + + in1 = oh.make_tensor_value_info("in1", TensorProto.FLOAT, in_shape) + in2 = oh.make_tensor_value_info("in2", TensorProto.FLOAT, in_shape) + op1_out = oh.make_tensor_value_info("op1_out", TensorProto.FLOAT, out_shape) + op2_out = oh.make_tensor_value_info("op2_out", TensorProto.FLOAT, out_shape) + out_join1 = oh.make_tensor_value_info("out_join1", TensorProto.FLOAT, out_shape) + + graph = oh.make_graph( + nodes=[op1_node, op2_node, add_node], + name="test_graph", + inputs=[in1, in2], + outputs=[out_join1], + value_info=[ + op1_out, + op2_out, + ], + ) + + onnx_model = qonnx_make_model(graph, producer_name="test_model") + model = ModelWrapper(onnx_model) + if identical_op == "Mul" or identical_op == "Add": + model.set_initializer("op1_param", np.array(op_value).astype(np.float32)) + model.set_initializer("op2_param", np.array(op_value).astype(np.float32)) + + return model + + +transform_dict = { + "Transpose_0231": MoveTransposePastJoinAdd(), + "Transpose_0312": MoveTransposePastJoinAdd(), + "Mul": MoveMulPastJoinAdd(), + "Add": MoveAddPastJoinAdd(), +} + + +@pytest.mark.streamline +# Permutation of transpose node +@pytest.mark.parametrize("identical_op", ["Transpose_0231", "Transpose_0312", "Mul", "Add"]) +def test_move_identical_op_past_join_op(identical_op): + model = create_add_model(identical_op) + # build_dir = os.environ["FINN_BUILD_DIR"] + # model.save(join(build_dir, "add_pytest_model_{}.onnx".format(identical_op))) + + # Create input data + input0_tensor_name = model.graph.input[0].name + input1_tensor_name = model.graph.input[1].name + + # Note: it is assumed that both tensors have the same shape and data type + input_shape = model.get_tensor_shape(input0_tensor_name) + input_dtype = model.get_tensor_datatype(input0_tensor_name) + input_val = gen_finn_dt_tensor(input_dtype, input_shape) + input_dict = {} + input_dict[input0_tensor_name] = input_val + input_dict[input1_tensor_name] = input_val + + model_transformed = model.transform(transform_dict[identical_op]) + # model_transformed.save(join(build_dir, "add_pytest_model_{}_trans.onnx".format(identical_op))) + + assert oxe.compare_execution(model, model_transformed, input_dict) + + # Check if order changed + node0_optype_model = model.find_consumers(model.graph.input[0].name)[0].op_type + node1_optype_model = model.find_consumers(model.graph.input[1].name)[0].op_type + node0_optype_model_transformed = model_transformed.find_consumers( + model_transformed.graph.input[0].name + )[0].op_type + node1_optype_model_transformed = model_transformed.find_consumers( + model_transformed.graph.input[1].name + )[0].op_type + last_node_optype_model_transformed = model_transformed.find_producer( + model_transformed.graph.output[0].name + ).op_type + assert node0_optype_model == last_node_optype_model_transformed + assert node1_optype_model == last_node_optype_model_transformed + assert node0_optype_model_transformed == node1_optype_model_transformed == "Add" diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_concat.py b/tests/transformation/streamline/test_move_identical_op_past_join_concat.py new file mode 100644 index 0000000000..2dcf90d10a --- /dev/null +++ b/tests/transformation/streamline/test_move_identical_op_past_join_concat.py @@ -0,0 +1,183 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import numpy as np +import os +from onnx import TensorProto +from onnx import helper as oh +from os.path import join +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model + +import finn.core.onnx_exec as oxe +from finn.transformation.streamline.reorder import ( + MoveAddPastJoinConcat, + MoveMulPastJoinConcat, + MoveTransposePastJoinConcat, +) + + +def create_concat_model(identical_op): + perm = None + channelwise = False + if "Transpose" in identical_op: + perm = identical_op.split("_")[1] + identical_op = identical_op.split("_")[0] + perm = [int(char) for char in perm] + if "channelwise" in identical_op: + channelwise = True + identical_op = identical_op.split("_")[0] + if perm == [0, 2, 3, 1]: + in_shape1 = [1, 64, 10, 9] + in_shape2 = [1, 32, 10, 9] + out_shape1 = [1, 10, 9, 64] + out_shape2 = [1, 10, 9, 32] + out_join_shape = [1, 10, 9, 96] + concat_axis = 3 + elif perm == [0, 3, 1, 2]: + in_shape1 = [1, 10, 9, 64] + in_shape2 = [1, 10, 9, 32] + out_shape1 = [1, 64, 10, 9] + out_shape2 = [1, 32, 10, 9] + out_join_shape = [1, 96, 10, 9] + concat_axis = 1 + else: + in_shape1 = [1, 64, 10, 9] + in_shape2 = [1, 32, 10, 9] + out_shape1 = in_shape1 + out_shape2 = in_shape2 + out_join_shape = [1, 96, 10, 9] + concat_axis = 1 + if channelwise: + op1_param_shape = [1, 64, 1, 1] + op2_param_shape = [1, 32, 1, 1] + op1_param = np.ones((1, 64, 1, 1)) * 2 + op2_param = np.ones((1, 32, 1, 1)) * 3 + else: + op1_param_shape = [1] + op2_param_shape = [1] + op1_param = 1.5 + op2_param = 1.5 + + op1_node = oh.make_node(identical_op, inputs=["in1"], outputs=["op1_out"]) + + op2_node = oh.make_node(identical_op, inputs=["in2"], outputs=["op2_out"]) + + if identical_op == "Transpose": + new_attr = oh.make_attribute("perm", perm) + op1_node.attribute.append(new_attr) + op2_node.attribute.append(new_attr) + elif identical_op == "Mul" or identical_op == "Add": + op1_init = oh.make_tensor_value_info("op1_param", TensorProto.FLOAT, op1_param_shape) + op2_init = oh.make_tensor_value_info("op2_param", TensorProto.FLOAT, op2_param_shape) + op1_node.input.append(op1_init.name) + op2_node.input.append(op2_init.name) + + concat_node = oh.make_node( + "Concat", inputs=["op1_out", "op2_out"], outputs=["out_join1"], axis=concat_axis + ) + + in1 = oh.make_tensor_value_info("in1", TensorProto.FLOAT, in_shape1) + in2 = oh.make_tensor_value_info("in2", TensorProto.FLOAT, in_shape2) + op1_out = oh.make_tensor_value_info("op1_out", TensorProto.FLOAT, out_shape1) + op2_out = oh.make_tensor_value_info("op2_out", TensorProto.FLOAT, out_shape2) + out_join1 = oh.make_tensor_value_info("out_join1", TensorProto.FLOAT, out_join_shape) + + graph = oh.make_graph( + nodes=[op1_node, op2_node, concat_node], + name="test_graph", + inputs=[in1, in2], + outputs=[out_join1], + value_info=[ + op1_out, + op2_out, + ], + ) + + onnx_model = qonnx_make_model(graph, producer_name="test_model") + model = ModelWrapper(onnx_model) + if identical_op == "Mul" or identical_op == "Add": + model.set_initializer("op1_param", np.array(op1_param).astype(np.float32)) + model.set_initializer("op2_param", np.array(op2_param).astype(np.float32)) + + return model + + +transform_dict = { + "Transpose_0231": MoveTransposePastJoinConcat(), + "Transpose_0312": MoveTransposePastJoinConcat(), + "Mul": MoveMulPastJoinConcat(), + "Mul_channelwise": MoveMulPastJoinConcat(), + "Add": MoveAddPastJoinConcat(), + "Add_channelwise": MoveAddPastJoinConcat(), +} + + +@pytest.mark.streamline +# Permutation of transpose node +@pytest.mark.parametrize( + "identical_op", + ["Transpose_0231", "Transpose_0312", "Mul", "Add", "Mul_channelwise", "Add_channelwise"], +) +def test_move_identical_op_past_join_concat(identical_op): + model = create_concat_model(identical_op) + build_dir = os.environ["FINN_BUILD_DIR"] + model.save(join(build_dir, "concat_pytest_model_{}.onnx".format(identical_op))) + + # Create input data + input0_tensor_name = model.graph.input[0].name + input1_tensor_name = model.graph.input[1].name + + # Note: it is assumed that both tensors have the same shape and data type + input_dict = {} + input_dict[input0_tensor_name] = gen_finn_dt_tensor( + model.get_tensor_datatype(input0_tensor_name), model.get_tensor_shape(input0_tensor_name) + ) + input_dict[input1_tensor_name] = gen_finn_dt_tensor( + model.get_tensor_datatype(input1_tensor_name), model.get_tensor_shape(input1_tensor_name) + ) + + model_transformed = model.transform(transform_dict[identical_op]) + model_transformed.save( + join(build_dir, "concat_pytest_model_{}_trans.onnx".format(identical_op)) + ) + + assert oxe.compare_execution(model, model_transformed, input_dict) + + # Check if order changed + node0_input0_model = model.find_consumers(model.graph.input[0].name)[0].op_type + node1_input1_model = model.find_consumers(model.graph.input[1].name)[0].op_type + node0_input0_model_transformed = model_transformed.find_consumers( + model_transformed.graph.input[0].name + )[0].op_type + node1_input1_model_transformed = model_transformed.find_consumers( + model_transformed.graph.input[1].name + )[0].op_type + assert node0_input0_model != node0_input0_model_transformed + assert node1_input1_model != node1_input1_model_transformed diff --git a/tests/transformation/streamline/test_move_identical_op_past_join_op.py b/tests/transformation/streamline/test_move_identical_op_past_join_op.py deleted file mode 100644 index dd83681fc2..0000000000 --- a/tests/transformation/streamline/test_move_identical_op_past_join_op.py +++ /dev/null @@ -1,114 +0,0 @@ -# Copyright (c) 2020, Xilinx -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of FINN nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import pytest - -from onnx import TensorProto -from onnx import helper as oh -from qonnx.core.modelwrapper import ModelWrapper -from qonnx.util.basic import gen_finn_dt_tensor, qonnx_make_model - -import finn.core.onnx_exec as oxe -from finn.transformation.streamline.reorder import MoveTransposePastJoinAdd - - -def create_model(perm): - if perm == [0, 3, 1, 2]: - in_shape = [1, 128, 1, 256] - out_shape = [1, 256, 128, 1] - if perm == [0, 2, 3, 1]: - in_shape = [1, 256, 128, 1] - out_shape = [1, 128, 1, 256] - - Transpose1_node = oh.make_node( - "Transpose", inputs=["in_transpose1"], outputs=["out_transpose1"], perm=perm - ) - - Transpose2_node = oh.make_node( - "Transpose", inputs=["in_transpose2"], outputs=["out_transpose2"], perm=perm - ) - - Join1_node = oh.make_node( - "Add", inputs=["out_transpose1", "out_transpose2"], outputs=["out_join1"] - ) - - in_transpose1 = oh.make_tensor_value_info("in_transpose1", TensorProto.FLOAT, in_shape) - in_transpose2 = oh.make_tensor_value_info("in_transpose2", TensorProto.FLOAT, in_shape) - out_transpose1 = oh.make_tensor_value_info("out_transpose1", TensorProto.FLOAT, out_shape) - out_transpose2 = oh.make_tensor_value_info("out_transpose2", TensorProto.FLOAT, out_shape) - out_join1 = oh.make_tensor_value_info("out_join1", TensorProto.FLOAT, out_shape) - - graph = oh.make_graph( - nodes=[Transpose1_node, Transpose2_node, Join1_node], - name="test_graph", - inputs=[in_transpose1, in_transpose2], - outputs=[out_join1], - value_info=[ - out_transpose1, - out_transpose2, - ], - ) - - onnx_model = qonnx_make_model(graph, producer_name="test_model") - model = ModelWrapper(onnx_model) - - return model - - -@pytest.mark.streamline -# Permutation of transpose node -@pytest.mark.parametrize("perm", [[0, 3, 1, 2], [0, 2, 3, 1]]) -def test_move_identical_op_past_join_op(perm): - model = create_model(perm) - - # Create input data - input0_tensor_name = model.graph.input[0].name - input1_tensor_name = model.graph.input[1].name - - # Note: it is assumed that both tensors have the same shape and data type - input_shape = model.get_tensor_shape(input0_tensor_name) - input_dtype = model.get_tensor_datatype(input0_tensor_name) - input_val = gen_finn_dt_tensor(input_dtype, input_shape) - input_dict = {} - input_dict[input0_tensor_name] = input_val - input_dict[input1_tensor_name] = input_val - - model_transformed = model.transform(MoveTransposePastJoinAdd()) - - assert oxe.compare_execution(model, model_transformed, input_dict) - - # Check if order changed - node0_input0_model = model.find_consumers(model.graph.input[0].name)[0].op_type - node1_input1_model = model.find_consumers(model.graph.input[1].name)[0].op_type - node0_input0_model_transformed = model_transformed.find_consumers( - model_transformed.graph.input[0].name - )[0].op_type - node1_input1_model_transformed = model_transformed.find_consumers( - model_transformed.graph.input[1].name - )[0].op_type - assert node0_input0_model != node0_input0_model_transformed - assert node1_input1_model != node1_input1_model_transformed diff --git a/tests/transformation/streamline/test_move_identical_op_past_split.py b/tests/transformation/streamline/test_move_identical_op_past_split.py new file mode 100644 index 0000000000..a104f179be --- /dev/null +++ b/tests/transformation/streamline/test_move_identical_op_past_split.py @@ -0,0 +1,145 @@ +# Copyright (c) 2020, Xilinx +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import pytest + +import numpy as np +from onnx import TensorProto +from onnx import helper as oh +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.general import GiveUniqueNodeNames +from qonnx.util.basic import gen_finn_dt_tensor + +import finn.core.onnx_exec as oxe +from finn.transformation.streamline.reorder import ( + MoveScalarLinearPastSplit, + MoveTransposePastSplit, +) + + +def create_split_model(identical_op): + perm = None + if "Transpose" in identical_op: + perm = identical_op.split("_")[1] + identical_op = identical_op.split("_")[0] + perm = [int(char) for char in perm] + if perm == [0, 2, 3, 1]: + in_shape = [1, 96, 10, 9] + out_shape = [1, 10, 9, 96] + out1_split_shape = [1, 10, 9, 32] + out2_split_shape = [1, 10, 9, 64] + split_axis = 3 + elif perm == [0, 3, 1, 2]: + in_shape = [1, 10, 9, 96] + out_shape = [1, 96, 10, 9] + out1_split_shape = [1, 32, 10, 9] + out2_split_shape = [1, 64, 10, 9] + split_axis = 1 + else: + in_shape = [1, 96, 10, 9] + out_shape = in_shape + out1_split_shape = [1, 32, 10, 9] + out2_split_shape = [1, 64, 10, 9] + split_axis = 1 + op_value = 1.5 + split = [32, 64] + + op_node = oh.make_node(identical_op, inputs=["in1"], outputs=["op_out"]) + + if identical_op == "Transpose": + new_attr = oh.make_attribute("perm", perm) + op_node.attribute.append(new_attr) + elif identical_op == "Mul" or identical_op == "Add": + op_init = oh.make_tensor_value_info("op_param", TensorProto.FLOAT, [1]) + op_node.input.append(op_init.name) + + in1 = oh.make_tensor_value_info("in1", TensorProto.FLOAT, in_shape) + op_out = oh.make_tensor_value_info("op_out", TensorProto.FLOAT, out_shape) + out1_split = oh.make_tensor_value_info("out1_split", TensorProto.FLOAT, out1_split_shape) + out2_split = oh.make_tensor_value_info("out2_split", TensorProto.FLOAT, out2_split_shape) + split_init = oh.make_tensor_value_info("split", TensorProto.INT64, [2]) + + split_node = oh.make_node( + "Split", [op_out.name, split_init.name], [out1_split.name, out2_split.name], axis=split_axis + ) + + graph = oh.make_graph( + nodes=[op_node, split_node], + name="test_graph", + inputs=[in1], + outputs=[out1_split, out2_split], + value_info=[op_out], + ) + + model = oh.make_model(graph) + model = ModelWrapper(model) + model.set_initializer(split_init.name, np.array(split, dtype=np.int64)) + if identical_op == "Mul" or identical_op == "Add": + model.set_initializer(op_init.name, np.array(op_value).astype(np.float32)) + model = model.transform(GiveUniqueNodeNames()) + + return model + + +transform_dict = { + "Transpose_0231": MoveTransposePastSplit(), + "Transpose_0312": MoveTransposePastSplit(), + "Mul": MoveScalarLinearPastSplit(), + "Add": MoveScalarLinearPastSplit(), +} + + +@pytest.mark.streamline +# Permutation of transpose node +@pytest.mark.parametrize("identical_op", ["Transpose_0231", "Transpose_0312", "Mul", "Add"]) +def test_move_identical_op_past_join_concat(identical_op): + model = create_split_model(identical_op) + # build_dir = os.environ["FINN_BUILD_DIR"] + # model.save(join(build_dir, "split_pytest_model_{}.onnx".format(identical_op))) + + # Create input data + input0_tensor_name = model.graph.input[0].name + + # Note: it is assumed that both tensors have the same shape and data type + input_dict = {} + input_dict[input0_tensor_name] = gen_finn_dt_tensor( + model.get_tensor_datatype(input0_tensor_name), model.get_tensor_shape(input0_tensor_name) + ) + + model_transformed = model.transform(transform_dict[identical_op]) + # model_transformed.save( + # join(build_dir, "split_pytest_model_{}_trans.onnx".format(identical_op)) + # ) + + assert oxe.compare_execution(model, model_transformed, input_dict) + + # Check if order changed + node0_input0_model = model.find_consumers(model.graph.input[0].name)[0].op_type + node0_input0_model_transformed = model_transformed.find_consumers( + model_transformed.graph.input[0].name + )[0].op_type + assert node0_input0_model != node0_input0_model_transformed From 51a9199858166673893f8d7bea0d3f8805769232 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Wed, 18 Sep 2024 14:50:31 +0000 Subject: [PATCH 04/15] [Deprecated] MoveLinearPastEltwiseAdd() removed from the codebase --- src/finn/transformation/streamline/reorder.py | 81 -------- .../streamline/test_linear_past_eltwise.py | 192 ------------------ 2 files changed, 273 deletions(-) delete mode 100644 tests/transformation/streamline/test_linear_past_eltwise.py diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 33751cb4d8..8688145453 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -517,87 +517,6 @@ def apply(self, model): return (model, graph_modified) -class MoveLinearPastEltwiseAdd(Transformation): - """ - DEPRECATED, use MoveAddPastJoinAdd() and MoveMulPastJoinAdd() - Move linear operations (mul, add) past elementwise add operations where possible. - Specifically,matches and transforms the following patterns: - (x*C) + (y*C) -> (x + y) * C - (x+A) + (y+B) -> (x + y) + (A + B) - where x and y are dynamic inputs, A, B, C are constant tensors (in general). - """ - - def move_node(self, graph, n, prod0, prod1, node_ind): - # found! move one of the muls to output, remove the other one - lin0_in0 = prod0.input[0] - lin1_in0 = prod1.input[0] - in0 = n.input[0] - out = n.output[0] - # TODO: check shapes don't change through scalar mul or add - # connect the eltwise add inputs to mul inputs - n.input[0] = lin0_in0 - n.input[1] = lin1_in0 - # connect mul0 output to eltwise add output - prod0.output[0] = out - # connect the input of mul0 and output of eltwise add together - n.output[0] = in0 - prod0.input[0] = in0 - # move prod0 node past eltwise add node, and remove prod1 - graph.node.remove(prod1) - graph.node.remove(prod0) - graph.node.insert(node_ind - 2, prod0) - - def apply(self, model): - graph = model.graph - node_ind = 0 - graph_modified = False - nodes = [n for n in graph.node] - for n in nodes: - node_ind += 1 - if n.op_type == "Add": - # check for tensors on both inputs (eltwise add) - # scalar add has an initializer on one input - in0 = n.input[0] - in1 = n.input[1] - if in0 is None or in1 is None: - continue - A = model.get_initializer(in0) - B = model.get_initializer(in1) - if A is not None or B is not None: - continue - # check for mul with same initializer on both inputs - prod0 = model.find_producer(in0) - prod1 = model.find_producer(in1) - # Also check case when both branches are empty and come - # from the same node: (prod0 == prod1) - # Other transform should handle that - if prod0 is None or prod1 is None or (prod0 == prod1): - continue - if len(prod0.input) < 2 or len(prod1.input) < 2: - continue - init0 = model.get_initializer(prod0.input[1]) - init1 = model.get_initializer(prod1.input[1]) - # if either initializer is None, skip - if init0 is None or init1 is None: - continue - if prod0.op_type == "Mul" and prod1.op_type == "Mul": - if np.array_equal(init0, init1): - self.move_node(graph, n, prod0, prod1, node_ind) - node_ind -= 1 - graph_modified = True - elif prod0.op_type == "Add" and prod1.op_type == "Add": - init = init0 + init1 - # update initializer of prod0, which we'll move - model.set_initializer(prod0.input[1], init) - self.move_node(graph, n, prod0, prod1, node_ind) - node_ind -= 1 - graph_modified = True - else: - continue - model = model.transform(InferShapes()) - return (model, graph_modified) - - class MoveScalarLinearPastInvariants(Transformation): """Move scalar linear operations (mul, add) past functions which are invariant to them. Specifically, matches and transforms the following patterns: diff --git a/tests/transformation/streamline/test_linear_past_eltwise.py b/tests/transformation/streamline/test_linear_past_eltwise.py deleted file mode 100644 index 70fc395652..0000000000 --- a/tests/transformation/streamline/test_linear_past_eltwise.py +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) 2020, Xilinx -# All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions are met: -# -# * Redistributions of source code must retain the above copyright notice, this -# list of conditions and the following disclaimer. -# -# * Redistributions in binary form must reproduce the above copyright notice, -# this list of conditions and the following disclaimer in the documentation -# and/or other materials provided with the distribution. -# -# * Neither the name of FINN nor the names of its -# contributors may be used to endorse or promote products derived from -# this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import pytest - -import numpy as np -import os -from onnx import TensorProto, helper -from qonnx.core.modelwrapper import ModelWrapper -from qonnx.transformation.fold_constants import FoldConstants -from qonnx.transformation.general import GiveReadableTensorNames, GiveUniqueNodeNames -from qonnx.transformation.infer_shapes import InferShapes -from qonnx.util.basic import qonnx_make_model - -import finn.core.onnx_exec as oxe -from finn.transformation.streamline.reorder import MoveLinearPastEltwiseAdd - -export_onnx_path = "test_linear_past_eltwise.onnx" -np_default_dtype = np.float32 - -# construct a synthetic graph to test: -# topk insertion, topk conversion to hls, add conversion to hls -# graph should just be a sum - - -def make_model(shape): - inp1 = helper.make_tensor_value_info("inp1", TensorProto.FLOAT, shape) - inp2 = helper.make_tensor_value_info("inp2", TensorProto.FLOAT, shape) - inp1_add = helper.make_tensor_value_info("inp1_add", TensorProto.FLOAT, shape) - inp1_add_ct = helper.make_tensor_value_info("inp1_add_ct", TensorProto.FLOAT, [1]) - inp2_add = helper.make_tensor_value_info("inp2_add", TensorProto.FLOAT, shape) - inp2_add_ct = helper.make_tensor_value_info("inp2_add_ct", TensorProto.FLOAT, [1]) - inp1_mul = helper.make_tensor_value_info("inp1_mul", TensorProto.FLOAT, shape) - inp1_mul_ct = helper.make_tensor_value_info("inp1_mul_ct", TensorProto.FLOAT, [1]) - inp2_mul = helper.make_tensor_value_info("inp2_mul", TensorProto.FLOAT, shape) - inp2_mul_ct = helper.make_tensor_value_info("inp2_mul_ct", TensorProto.FLOAT, [1]) - outp = helper.make_tensor_value_info("outp", TensorProto.FLOAT, shape) - - add1_node = helper.make_node("Add", [inp1.name, inp1_add_ct.name], [inp1_add.name]) - add2_node = helper.make_node("Add", [inp2.name, inp2_add_ct.name], [inp2_add.name]) - mul1_node = helper.make_node("Mul", [inp1_add.name, inp1_mul_ct.name], [inp1_mul.name]) - mul2_node = helper.make_node("Mul", [inp2_add.name, inp2_mul_ct.name], [inp2_mul.name]) - eltwise_add_node = helper.make_node("Add", [inp1_mul.name, inp2_mul.name], [outp.name]) - graph = helper.make_graph( - nodes=[add1_node, add2_node, mul1_node, mul2_node, eltwise_add_node], - name="graph", - inputs=[inp1, inp2], - outputs=[outp], - ) - - model = qonnx_make_model(graph, producer_name="add-model") - model = ModelWrapper(model) - - # set initializers for scalar add/mul nodes - model.set_initializer(add1_node.input[1], np.array([7.0], dtype=np_default_dtype)) - model.set_initializer(add2_node.input[1], np.array([8.0], dtype=np_default_dtype)) - model.set_initializer(mul1_node.input[1], np.array([3.0], dtype=np_default_dtype)) - model.set_initializer(mul2_node.input[1], np.array([3.0], dtype=np_default_dtype)) - - return model - - -@pytest.mark.streamline -# channels -@pytest.mark.parametrize("ch", [64]) -# ifmdim -@pytest.mark.parametrize("ifmdim", [-1, 7]) -def test_linear_past_eltwise_add(ch, ifmdim): - # generate test vectors of correct shape - if ifmdim == -1: - input_tensor_shape = (1, ch) - else: - input_tensor_shape = (1, ch, ifmdim, ifmdim) - - model = make_model(input_tensor_shape) - model.save(export_onnx_path) - model = ModelWrapper(export_onnx_path) - model = model.transform(InferShapes()) - model = model.transform(FoldConstants()) - model = model.transform(GiveUniqueNodeNames()) - model = model.transform(GiveReadableTensorNames()) - - x1 = np.random.randn(*input_tensor_shape).astype(np.float32) - x2 = np.random.randn(*input_tensor_shape).astype(np.float32) - - # generate expected value from streamlined net - input_dict = {model.graph.input[0].name: x1, model.graph.input[1].name: x2} - - output_dict = oxe.execute_onnx(model, input_dict, True) - produced_sum = output_dict[model.graph.output[0].name] - expected_sum = 3.0 * ((x1 + x2) + 15.0) - assert np.isclose(expected_sum, produced_sum, atol=1e-3).all() - assert len(model.get_nodes_by_op_type("Add")) == 3 - assert len(model.get_nodes_by_op_type("Mul")) == 2 - - model = model.transform(MoveLinearPastEltwiseAdd()) - - # verify again, to check we didnt break anything - output_dict = oxe.execute_onnx(model, input_dict, True) - produced_sum = output_dict[model.graph.output[0].name] - assert np.isclose(expected_sum, produced_sum, atol=1e-3).all() - assert len(model.get_nodes_by_op_type("Add")) == 2 - assert len(model.get_nodes_by_op_type("Mul")) == 1 - - os.remove(export_onnx_path) - - -@pytest.mark.streamline -@pytest.mark.parametrize("ch", [64, 1]) -# ifmdim -@pytest.mark.parametrize("ifmdim", [-1, 7]) -def test_linear_past_eltwise_add_multiple_forks(ch, ifmdim): - # generate test vectors of correct shape - if ifmdim == -1: - input_shape = (1, ch) - else: - input_shape = (1, ch, ifmdim, ifmdim) - - top_in = helper.make_tensor_value_info("top_in", TensorProto.FLOAT, input_shape) - top_out = helper.make_tensor_value_info("top_out", TensorProto.FLOAT, input_shape) - - num_of_params = 6 - value_info = [] - for i in range(num_of_params): - value_info += [helper.make_tensor_value_info("p" + str(i), TensorProto.FLOAT, input_shape)] - - modelproto = qonnx_make_model( - helper.make_graph( - name="test", - inputs=[top_in], - outputs=[top_out], - value_info=value_info, - nodes=[ - helper.make_node("Add", ["top_in", "p0"], ["fork1"]), - helper.make_node("Mul", ["fork1", "p1"], ["t2"]), - helper.make_node("Mul", ["fork1", "p2"], ["t3"]), - helper.make_node("Add", ["t2", "t3"], ["t4"]), - helper.make_node("Mul", ["t4", "p3"], ["fork2"]), - helper.make_node("Add", ["fork2", "p4"], ["t5"]), - helper.make_node("Add", ["fork2", "p5"], ["t6"]), - helper.make_node("Add", ["t5", "t6"], ["top_out"]), - ], - ) - ) - model = ModelWrapper(modelproto) - model = model.transform(InferShapes()) - - np.random.seed(0) - for i in range(num_of_params): - model.set_initializer("p" + str(i), np.random.rand(*input_shape).astype(np.float32)) - - # need equal mults: - model.set_initializer("p2", model.get_initializer("p1")) - - # Transform - new_model = model.transform(MoveLinearPastEltwiseAdd()) - inp_dict = {"top_in": np.random.rand(*input_shape).astype(np.float32)} - - # Test - assert oxe.compare_execution(model, new_model, inp_dict) - assert new_model.graph.node[0].op_type == "Add" - assert new_model.graph.node[1].op_type == "Add" - assert new_model.graph.node[2].op_type == "Mul" - assert new_model.graph.node[3].op_type == "Mul" - assert new_model.graph.node[4].op_type == "Add" - assert new_model.graph.node[5].op_type == "Add" - assert len(new_model.graph.node) == 6 From c92b919d0919e31206754e51c588f5bd474ecf7b Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 15:36:25 +0000 Subject: [PATCH 05/15] [Feature] The Concat op code is not hardcoded in the compiler anymore and it now accepts different datatypes of inputs. It uses the new implementation from finn-hlslib --- src/finn/custom_op/fpgadataflow/concat.py | 71 ++++++-- .../custom_op/fpgadataflow/hls/concat_hls.py | 166 ++++++++++-------- 2 files changed, 144 insertions(+), 93 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/concat.py b/src/finn/custom_op/fpgadataflow/concat.py index 210b6b7fdd..214c5a4bd5 100644 --- a/src/finn/custom_op/fpgadataflow/concat.py +++ b/src/finn/custom_op/fpgadataflow/concat.py @@ -27,7 +27,9 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import math import numpy as np +import warnings from qonnx.core.datatype import DataType from qonnx.util.basic import roundup_to_integer_multiple @@ -36,17 +38,18 @@ class StreamingConcat(HWCustomOp): """Abstraction layer for HW implementation of Concat. - Only supports concatenating along the last axis.""" + Only supports concatenating along the last (channel) axis.""" def __init__(self, onnx_node, **kwargs): super().__init__(onnx_node, **kwargs) def get_nodeattr_types(self): my_attrs = { + "SIMD": ("i", True, 0), # number of elements from each stream to concat - "ElemsPerStream": ("ints", True, []), - # FINN DataTypes for inputs; output datatype inferred from input - "inputDataType": ("s", True, ""), + "ChannelsPerStream": ("ints", True, []), + # FINN DataTypes for inputs; output datatype inferred from inputs + "inputDataTypes": ("strings", True, [""]), # number of input vectors for non-concat axes, examples: # [1] is a single vector (like a FC layer with batch=1) # [4] is four vectors (like a FC layer with batch=4) @@ -57,21 +60,24 @@ def get_nodeattr_types(self): return my_attrs def get_n_inputs(self): - return len(self.get_nodeattr("ElemsPerStream")) + return len(self.get_nodeattr("ChannelsPerStream")) def get_total_elems(self): - elems_per_stream = self.get_nodeattr("ElemsPerStream") + elems_per_stream = self.get_nodeattr("ChannelsPerStream") return int(np.sum(elems_per_stream)) def get_normal_input_shape(self, ind=0): - elems_per_stream = self.get_nodeattr("ElemsPerStream") + elems_per_stream = self.get_nodeattr("ChannelsPerStream") elems = elems_per_stream[ind] vecs = list(self.get_nodeattr("numInputVectors")) ishape = tuple(vecs + [elems]) return ishape def get_folded_input_shape(self, ind=0): - return self.get_normal_input_shape(ind) + simd = self.get_nodeattr("SIMD") + folds = self.get_nodeattr("ChannelsPerStream")[ind] // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) def get_normal_output_shape(self, ind=0): total_elems = self.get_total_elems() @@ -79,7 +85,11 @@ def get_normal_output_shape(self, ind=0): return tuple(vecs + [total_elems]) def get_folded_output_shape(self, ind=0): - return self.get_normal_output_shape() + total_elems = self.get_total_elems() + simd = self.get_nodeattr("SIMD") + folds = total_elems // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) def make_shape_compatible_op(self, model): # check all input shapes @@ -94,7 +104,16 @@ def infer_node_datatype(self, model): # check all input datatypes for i, inp in enumerate(self.onnx_node.input): idt = model.get_tensor_datatype(inp) - assert idt == self.get_input_datatype() + if idt != self.get_input_datatype(i): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + str(self.get_input_datatype(i)), + str(idt), + ) + warnings.warn(warn_str) + old_datatypes_attr = self.get_nodeattr("inputDataTypes") + old_datatypes_attr[i] = idt.name + self.set_nodeattr("inputDataTypes", old_datatypes_attr) odt = self.get_output_datatype() model.set_tensor_datatype(self.onnx_node.output[0], odt) @@ -103,21 +122,37 @@ def verify_node(self): def get_input_datatype(self, ind=0): # input dt identical for all inputs - return DataType[self.get_nodeattr("inputDataType")] + return DataType[self.get_nodeattr("inputDataTypes")[ind]] def get_output_datatype(self, ind=0): - return self.get_input_datatype() + # infer output datatype from declared inputDataTypes + min_input = 0 + max_input = 0 + for i in range(len(self.get_nodeattr("inputDataTypes"))): + idt = self.get_input_datatype(i) + if idt.min() < min_input: + min_input = idt.min() + if idt.max() > max_input: + max_input = idt.max() + # if the input range is always greater than 0, then acc_max <= 2^P - 1 + if min_input >= 0: + out_bit_width = math.ceil(np.log2(max_input + 1)) + odt = DataType[f"UINT{out_bit_width}"] + # if the input range is signed, then acc_min >= -2^{P-1} and acc_max <= + # 2^{P - 1} - 1, which means 2^{P - 1} >= max(-acc_min, 1 + acc_max) + else: + max_abs_input = max(-min_input, 1 + max_input) + out_bit_width = math.ceil(np.log2(max_abs_input) + 1) + odt = DataType[f"INT{out_bit_width}"] + return odt def get_instream_width(self, ind=0): - elems_per_stream = self.get_nodeattr("ElemsPerStream") - elems = elems_per_stream[ind] - ibits = self.get_input_datatype().bitwidth() - return elems * ibits + ibits = self.get_input_datatype(ind).bitwidth() + return ibits * self.get_nodeattr("SIMD") def get_outstream_width(self, ind=0): obits = self.get_output_datatype().bitwidth() - total_elems = self.get_total_elems() - out_width = total_elems * obits + out_width = obits * self.get_nodeattr("SIMD") return out_width def get_number_output_values(self): diff --git a/src/finn/custom_op/fpgadataflow/hls/concat_hls.py b/src/finn/custom_op/fpgadataflow/hls/concat_hls.py index 008fa9cee8..641581a12d 100644 --- a/src/finn/custom_op/fpgadataflow/hls/concat_hls.py +++ b/src/finn/custom_op/fpgadataflow/hls/concat_hls.py @@ -30,6 +30,7 @@ import numpy as np import os +from finn.custom_op.fpgadataflow import templates from finn.custom_op.fpgadataflow.concat import StreamingConcat from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy @@ -48,47 +49,6 @@ def get_nodeattr_types(self): my_attrs.update(HLSBackend.get_nodeattr_types(self)) return my_attrs - def generate_params(self, model, path): - elems_per_stream = self.get_nodeattr("ElemsPerStream") - inp_streams = [] - commands = [] - idt = self.get_input_datatype() - total_elems = self.get_total_elems() - total_bw = idt.bitwidth() * total_elems - for i, elems in enumerate(elems_per_stream): - bw = idt.bitwidth() * elems - inp_stream = "hls::stream > &in%d" % (bw, i) - inp_streams.append(inp_stream) - cmd = "in%d.read()" % i - commands.append(cmd) - out_stream = "hls::stream > &out" % (total_bw) - inp_streams.append(out_stream) - - impl_hls_code = [] - impl_hls_code.append("void StreamingConcat(") - impl_hls_code.append(",".join(inp_streams)) - impl_hls_code.append(", unsigned int numReps) {") - impl_hls_code.append("for(unsigned int i = 0; i < numReps; i++) {") - impl_hls_code.append("#pragma HLS PIPELINE II=1") - impl_hls_code.append("ap_uint<%d> out_elem;" % total_bw) - # FIXME: the order of streams for concatenation works out differently - # for cppsim vs rtlsim, addressed via reversing the order of commands - # for now - impl_hls_code.append("#ifdef __SYNTHESIS__") - impl_hls_code.append("out_elem = (" + ",".join(commands[::-1]) + ");") - impl_hls_code.append("#else") - impl_hls_code.append("out_elem = (" + ",".join(commands) + ");") - impl_hls_code.append("#endif") - impl_hls_code.append("out.write(out_elem);") - impl_hls_code.append("}") - impl_hls_code.append("}") - impl_hls_code = "\n".join(impl_hls_code) - - impl_filename = "{}/concat_impl.hpp".format(path) - f_impl = open(impl_filename, "w") - f_impl.write(impl_hls_code) - f_impl.close() - def execute_node(self, context, graph): mode = self.get_nodeattr("exec_mode") node = self.onnx_node @@ -96,8 +56,7 @@ def execute_node(self, context, graph): ishapes = [self.get_normal_input_shape(x) for x in range(n_inps)] folded_ishapes = [self.get_folded_input_shape(x) for x in range(n_inps)] exp_oshape = self.get_normal_output_shape() - folded_oshape = self.get_folded_output_shape() - export_idt = self.get_input_datatype() + export_idts = [self.get_input_datatype(i) for i in range(n_inps)] if mode == "cppsim": code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") @@ -127,8 +86,10 @@ def execute_node(self, context, graph): # load output npy file super().npy_to_dynamic_output(context) assert ( - context[node.output[0]].shape == folded_oshape - ), "cppsim did not produce expected folded output shape" + context[node.output[0]].shape == exp_oshape + ), "cppsim did not produce expected folded output shape. Got: {}, expected: {}".format( + context[node.output[0]].shape, exp_oshape + ) context[node.output[0]] = context[node.output[0]].reshape(*exp_oshape) elif mode == "rtlsim": sim = self.get_rtlsim() @@ -137,7 +98,7 @@ def execute_node(self, context, graph): nbits = self.get_instream_width(i) rtlsim_inp = npy_to_rtlsim_input( "%s/input_%d.npy" % (code_gen_dir, i), - export_idt, + export_idts[i], nbits, reverse_inner=True, ) @@ -177,33 +138,54 @@ def execute_node(self, context, graph): context[node.output[0]].shape == exp_oshape ), """Output shape doesn't match expected shape.""" + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + self.docompute() + self.dataoutstrm() + self.save_as_npy() + self.timeout_value() + self.timeout_condition() + self.timeout_read_stream() + + template = templates.docompute_template_timeout + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + def global_includes(self): - self.code_gen_dict["$GLOBALS$"] = ['#include "concat_impl.hpp"'] + self.code_gen_dict["$GLOBALS$"] = ['#include "concat.hpp"'] def defines(self, var): - num_reps = self.get_nodeattr("numInputVectors") - num_reps = np.prod(num_reps) - self.code_gen_dict["$DEFINES$"] = ["#define NumReps %d" % num_reps] + self.code_gen_dict["$DEFINES$"] = ["#define SIMD {}".format(self.get_nodeattr("SIMD"))] def read_npy_data(self): n_inputs = self.get_n_inputs() code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") npy_type = "float" self.code_gen_dict["$READNPYDATA$"] = [] - idt = self.get_input_datatype() - idt_bw = idt.bitwidth() - elem_hls_type = idt.get_hls_datatype_str() - elem_bits = idt_bw for i in range(n_inputs): - packed_bits = self.get_instream_width(i) - packed_hls_type = "ap_uint<%d>" % packed_bits + input_elem_hls_type = self.get_input_datatype(i).get_hls_datatype_str() npy_in = "%s/input_%d.npy" % (code_gen_dir, i) self.code_gen_dict["$READNPYDATA$"].append( - 'npy2apintstream<%s, %s, %d, %s>("%s", in%d_%s);' + 'npy2vectorstream<%s, %s, SIMD>("%s", in%d_%s);' % ( - packed_hls_type, - elem_hls_type, - elem_bits, + input_elem_hls_type, npy_type, npy_in, i, @@ -215,41 +197,70 @@ def strm_decl(self): self.code_gen_dict["$STREAMDECLARATIONS$"] = [] n_inputs = self.get_n_inputs() for i in range(n_inputs): - packed_bits = self.get_instream_width(i) - packed_hls_type = "ap_uint<%d>" % packed_bits + input_elem_hls_type = self.get_input_datatype(i).get_hls_datatype_str() stream_name = "in%d_%s" % (i, self.hls_sname()) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream<%s> %s ("%s");' % (packed_hls_type, stream_name, stream_name) + 'hls::stream> %s ("%s");' + % (input_elem_hls_type, stream_name, stream_name) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> out_{} ("out_{}");'.format( + self.get_output_datatype().get_hls_datatype_str(), + self.hls_sname(), + self.hls_sname(), ) + ) self.code_gen_dict["$STREAMDECLARATIONS$"].append( - 'hls::stream> out_{} ("out_{}");'.format( - self.get_outstream_width(), self.hls_sname(), self.hls_sname() + 'hls::stream> debug_out_{} ("debug_out_{}");'.format( + self.get_output_datatype().get_hls_datatype_str(), + self.hls_sname(), + self.hls_sname(), ) ) def docompute(self): self.code_gen_dict["$DOCOMPUTE$"] = [] n_inputs = self.get_n_inputs() + input_folds = [str(self.get_folded_input_shape(i)[-2]) for i in range(n_inputs)] in_streams = [] for i in range(n_inputs): in_streams.append("in%d_%s" % (i, self.hls_sname())) - in_stream_names = ",".join(in_streams) - comp_call = "StreamingConcat(%s, out_%s, NumReps);" % ( - in_stream_names, - self.hls_sname(), + in_stream_names = ", ".join(in_streams) + in_stream_folds = ", ".join(input_folds) + comp_call = "StreamingConcat<{}>(out_{}, {});".format( + in_stream_folds, self.hls_sname(), in_stream_names ) self.code_gen_dict["$DOCOMPUTE$"] = [comp_call] + def dataoutstrm(self): + npy_type = "float" + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + oshape = self.get_folded_output_shape() + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + npy_out = "%s/output.npy" % code_gen_dir + self.code_gen_dict["$DATAOUTSTREAM$"] = [ + 'vectorstream2npy<%s, %s, SIMD>(debug_out_%s, %s, "%s");' + % ( + self.get_output_datatype().get_hls_datatype_str(), + npy_type, + self.hls_sname(), + oshape_cpp_str, + npy_out, + ) + ] + def blackboxfunction(self): n_inputs = self.get_n_inputs() in_streams = [] for i in range(n_inputs): - iwidth = self.get_instream_width(i) - in_streams.append("hls::stream> &in%d_%s" % (iwidth, i, self.hls_sname())) - in_streams = ",".join(in_streams) - total_width = self.get_input_datatype().bitwidth() * self.get_total_elems() - out_stream = "hls::stream> &out_%s" % ( - total_width, + input_elem_hls_type = self.get_input_datatype(i).get_hls_datatype_str() + in_streams.append( + "hls::stream> &in%d_%s" + % (input_elem_hls_type, i, self.hls_sname()) + ) + in_streams = ", ".join(in_streams) + out_stream = "hls::stream> &out_%s" % ( + self.get_output_datatype().get_hls_datatype_str(), self.hls_sname(), ) blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_streams, out_stream) @@ -264,4 +275,9 @@ def pragmas(self): self.code_gen_dict["$PRAGMAS$"].append( "#pragma HLS INTERFACE axis port=out_" + self.hls_sname() ) + for i in range(n_inputs): + pragmas.append( + "#pragma HLS aggregate variable=in%d_%s compact=bit" % (i, self.hls_sname()) + ) + pragmas.append("#pragma HLS aggregate variable=out_%s compact=bit" % self.hls_sname()) self.code_gen_dict["$PRAGMAS$"].append("#pragma HLS INTERFACE ap_ctrl_none port=return") From d185219640282c97f7a144c7a6a0294177202f87 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:03:23 +0000 Subject: [PATCH 06/15] [Feature] InferConcatLayer transformation now accepts different datatypes among inputs and sets the SIMD parameter --- .../fpgadataflow/convert_to_hw_layers.py | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index b02bc89db8..121a5484af 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1211,21 +1211,24 @@ def apply(self, model): if (axis != -1) and (axis != last_axis): continue # check datatype coherence - dt0 = model.get_tensor_datatype(node.input[0]) - if dt0 is None: - continue - dt_coherent = all([model.get_tensor_datatype(x) == dt0 for x in node.input]) - if not dt_coherent: + if any([model.get_tensor_datatype(x) is None for x in node.input]): + warnings.warn( + "Inputs with undefined datatype detected, skipping InferConcatLayer()" + ) continue # skip conversion if any inputs are static - all_static = all([model.get_initializer(x) is None for x in node.input]) - if not all_static: + any_static = any([model.get_initializer(x) is not None for x in node.input]) + if any_static: continue # skip conversion if inputs are not integers - if not dt0.is_integer(): + all_integer = all([model.get_tensor_datatype(x).is_integer() for x in node.input]) + if not all_integer: + warnings.warn( + "Inputs with non-integer datatype detected, skipping InferConcatLayer()" + ) continue # ready for conversion - elems_per_stream = [model.get_tensor_shape(x)[-1] for x in node.input] + channels_per_stream = [model.get_tensor_shape(x)[-1] for x in node.input] inp_vec = list(model.get_tensor_shape(node.input[0])[:-1]) new_node = helper.make_node( "StreamingConcat", @@ -1233,9 +1236,10 @@ def apply(self, model): node.output, domain="finn.custom_op.fpgadataflow", backend="fpgadataflow", - name="Concat_" + node.name, - ElemsPerStream=elems_per_stream, - inputDataType=dt0.name, + name="StreamingConcat_" + node.name, + SIMD=1, + ChannelsPerStream=channels_per_stream, + inputDataTypes=[model.get_tensor_datatype(x).name for x in node.input], numInputVectors=inp_vec, inFIFODepths=[2] * len(node.input), ) From c8d36fb51ad8580bbd002c454ab3a478da3ac817 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 16:11:14 +0000 Subject: [PATCH 07/15] [Feature] test_fpgadataflow_concat.py test case checks different datatypes among inputs --- .../fpgadataflow/test_fpgadataflow_concat.py | 26 ++++++++++++------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/tests/fpgadataflow/test_fpgadataflow_concat.py b/tests/fpgadataflow/test_fpgadataflow_concat.py index 25c738d049..719d61905f 100644 --- a/tests/fpgadataflow/test_fpgadataflow_concat.py +++ b/tests/fpgadataflow/test_fpgadataflow_concat.py @@ -52,7 +52,7 @@ from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers -def make_concat_model(i_shapes, idt): +def make_concat_model(i_shapes, idts): class ConcatModel(nn.Module): def forward(self, *args): return torch.cat(args, -1) @@ -67,20 +67,25 @@ def forward(self, *args): torch.onnx.export(torch_model, input_t, model_bytes, opset_version=11) model = onnx.ModelProto.FromString(model_bytes.getvalue()) model = ModelWrapper(model) - for inp in model.graph.input: + for inp, idt in zip(model.graph.input, idts): model.set_tensor_datatype(inp.name, idt) return model @pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim"]) -@pytest.mark.parametrize("idt", [DataType["INT4"]]) +# input datatypes and expected inferred out datatype +@pytest.mark.parametrize( + "test_idts", [([DataType["INT3"], DataType["UINT4"], DataType["UINT6"]], DataType["INT7"])] +) @pytest.mark.fpgadataflow @pytest.mark.vivado @pytest.mark.slow -def test_fpgadataflow_concat(exec_mode, idt): +def test_fpgadataflow_concat(exec_mode, test_idts): + idts = test_idts[0] + exp_odt = test_idts[1] i_shapes = [(1, 2, 4), (1, 2, 6), (1, 2, 1)] - i_data = [gen_finn_dt_tensor(idt, x) for x in i_shapes] - model = make_concat_model(i_shapes, idt) + i_data = [gen_finn_dt_tensor(idt, x) for x, idt in zip(i_shapes, idts)] + model = make_concat_model(i_shapes, idts) assert len(i_shapes) == len(model.graph.input) assert len(model.graph.output) == 1 exp_oshape = list(i_shapes[0][:-1]) + [sum(x[-1] for x in i_shapes)] @@ -96,6 +101,7 @@ def test_fpgadataflow_concat(exec_mode, idt): model = model.transform(InferConcatLayer()) assert model.graph.node[0].op_type == "StreamingConcat" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + assert model.get_tensor_datatype(model.graph.output[0].name) == exp_odt ret = execute_onnx(model, inp_dict) assert (ret[oname] == exp_out).all() model = model.transform(SpecializeLayers("xc7z020clg400-1")) @@ -120,12 +126,13 @@ def test_fpgadataflow_concat(exec_mode, idt): @pytest.mark.vivado @pytest.mark.slow def test_fpgadataflow_concat_stitchedip(): - idt = DataType["INT4"] + idts = [DataType["INT3"], DataType["UINT4"], DataType["UINT6"]] + exp_odt = DataType["INT7"] fpga_part = "xc7z020clg400-1" clk_ns = 10 i_shapes = [(1, 2, 4), (1, 2, 6), (1, 2, 1)] - i_data = [gen_finn_dt_tensor(idt, x) for x in i_shapes] - model = make_concat_model(i_shapes, idt) + i_data = [gen_finn_dt_tensor(idt, x) for x, idt in zip(i_shapes, idts)] + model = make_concat_model(i_shapes, idts) assert len(i_shapes) == len(model.graph.input) assert len(model.graph.output) == 1 exp_oshape = list(i_shapes[0][:-1]) + [sum(x[-1] for x in i_shapes)] @@ -141,6 +148,7 @@ def test_fpgadataflow_concat_stitchedip(): model = model.transform(InferConcatLayer()) assert model.graph.node[0].op_type == "StreamingConcat" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + assert model.get_tensor_datatype(model.graph.output[0].name) == exp_odt model = model.transform(SpecializeLayers(fpga_part)) assert model.graph.node[0].op_type == "StreamingConcat_hls" assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.hls" From 8f87454c45c688496d6e4e1650229e81e8417867 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:05:34 +0000 Subject: [PATCH 08/15] [Feature] New Split custom_op added --- src/finn/custom_op/fpgadataflow/__init__.py | 2 + .../custom_op/fpgadataflow/hls/__init__.py | 2 + .../custom_op/fpgadataflow/hls/split_hls.py | 278 ++++++++++++++++++ src/finn/custom_op/fpgadataflow/split.py | 164 +++++++++++ 4 files changed, 446 insertions(+) create mode 100644 src/finn/custom_op/fpgadataflow/hls/split_hls.py create mode 100644 src/finn/custom_op/fpgadataflow/split.py diff --git a/src/finn/custom_op/fpgadataflow/__init__.py b/src/finn/custom_op/fpgadataflow/__init__.py index aed2ab7fe1..6f48bc6308 100644 --- a/src/finn/custom_op/fpgadataflow/__init__.py +++ b/src/finn/custom_op/fpgadataflow/__init__.py @@ -42,6 +42,7 @@ from finn.custom_op.fpgadataflow.lookup import Lookup from finn.custom_op.fpgadataflow.matrixvectoractivation import MVAU from finn.custom_op.fpgadataflow.pool import Pool +from finn.custom_op.fpgadataflow.split import StreamingSplit from finn.custom_op.fpgadataflow.streamingdataflowpartition import ( StreamingDataflowPartition, ) @@ -77,6 +78,7 @@ custom_op["Lookup"] = Lookup custom_op["Pool"] = Pool custom_op["StreamingConcat"] = StreamingConcat +custom_op["StreamingSplit"] = StreamingSplit custom_op["StreamingDataWidthConverter"] = StreamingDataWidthConverter custom_op["StreamingEltwise"] = StreamingEltwise custom_op["StreamingMaxPool"] = StreamingMaxPool diff --git a/src/finn/custom_op/fpgadataflow/hls/__init__.py b/src/finn/custom_op/fpgadataflow/hls/__init__.py index 405c47a08d..e5b24413eb 100644 --- a/src/finn/custom_op/fpgadataflow/hls/__init__.py +++ b/src/finn/custom_op/fpgadataflow/hls/__init__.py @@ -43,6 +43,7 @@ from finn.custom_op.fpgadataflow.hls.lookup_hls import Lookup_hls from finn.custom_op.fpgadataflow.hls.matrixvectoractivation_hls import MVAU_hls from finn.custom_op.fpgadataflow.hls.pool_hls import Pool_hls +from finn.custom_op.fpgadataflow.hls.split_hls import StreamingSplit_hls from finn.custom_op.fpgadataflow.hls.streamingdatawidthconverter_hls import ( StreamingDataWidthConverter_hls, ) @@ -71,6 +72,7 @@ custom_op["Lookup_hls"] = Lookup_hls custom_op["Pool_hls"] = Pool_hls custom_op["StreamingConcat_hls"] = StreamingConcat_hls +custom_op["StreamingSplit_hls"] = StreamingSplit_hls custom_op["StreamingEltwise_hls"] = StreamingEltwise_hls custom_op["StreamingDataWidthConverter_hls"] = StreamingDataWidthConverter_hls custom_op["StreamingMaxPool_hls"] = StreamingMaxPool_hls diff --git a/src/finn/custom_op/fpgadataflow/hls/split_hls.py b/src/finn/custom_op/fpgadataflow/hls/split_hls.py new file mode 100644 index 0000000000..d6f9d43f51 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/hls/split_hls.py @@ -0,0 +1,278 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import os + +from finn.custom_op.fpgadataflow import templates +from finn.custom_op.fpgadataflow.hlsbackend import HLSBackend +from finn.custom_op.fpgadataflow.split import StreamingSplit +from finn.util.data_packing import npy_to_rtlsim_input, rtlsim_output_to_npy + + +class StreamingSplit_hls(StreamingSplit, HLSBackend): + """Streaming split node with dynamically generated HLS. + Only supports splitting along the last axis.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = {} + my_attrs.update(StreamingSplit.get_nodeattr_types(self)) + my_attrs.update(HLSBackend.get_nodeattr_types(self)) + return my_attrs + + def execute_node(self, context, graph): + mode = self.get_nodeattr("exec_mode") + node = self.onnx_node + ishape = self.get_normal_input_shape() + folded_ishape = self.get_folded_input_shape() + n_outputs = self.get_n_outputs() + exp_oshapes = [self.get_normal_output_shape(i) for i in range(len(node.output))] + export_idt = self.get_input_datatype() + + if mode == "cppsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + elif mode == "rtlsim": + code_gen_dir = self.get_nodeattr("code_gen_dir_ipgen") + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + inp = context[node.input[0]] + assert str(inp.dtype) == "float32", "Input datatype is not float32" + assert inp.shape == ishape, "Input shape mismatch for " + node.input[0] + # reshape input into folded form + inp = inp.reshape(folded_ishape) + # make copy before saving array + reshaped_input = inp.copy() + np.save(os.path.join(code_gen_dir, "input_0.npy"), reshaped_input) + + if mode == "cppsim": + # execute the precompiled model + super().exec_precompiled_singlenode_model() + # load output npy file + super().npy_to_dynamic_outputs(context, ["output_%d.npy" % i for i in range(n_outputs)]) + for i in range(n_outputs): + assert ( + context[node.output[i]].shape == exp_oshapes[i] + ), "cppsim did not produce expected folded output shape: {}, expected: {}".format( + context[node.output[i]].shape, exp_oshapes[i] + ) + elif mode == "rtlsim": + sim = self.get_rtlsim() + io_dict = {"inputs": {}, "outputs": {}} + + nbits = self.get_instream_width() + rtlsim_inp = npy_to_rtlsim_input( + "%s/input_0.npy" % code_gen_dir, + export_idt, + nbits, + # reverse_inner=True, + ) + io_dict["inputs"]["in0"] = rtlsim_inp + super().reset_rtlsim(sim) + super().toggle_clk(sim) + + for i in range(n_outputs): + io_dict["outputs"]["out_arr_%d" % i] = [] + self.rtlsim_multi_io(sim, io_dict, sname="_") + odt = self.get_output_datatype() + target_bits = odt.bitwidth() + packed_bits = self.get_outstream_width() + for i in range(n_outputs): + out_npy_path = "%s/output_%d.npy" % (code_gen_dir, i) + out_shape = self.get_folded_output_shape(i) + rtlsim_output_to_npy( + io_dict["outputs"]["out_arr_%d" % i], + out_npy_path, + odt, + out_shape, + packed_bits, + target_bits, + # reverse_inner=True, + ) + # load and reshape output + output = np.load(out_npy_path) + output = np.asarray([output], dtype=np.float32).reshape(*exp_oshapes[i]) + context[node.output[i]] = output + else: + raise Exception( + """Invalid value for attribute exec_mode! Is currently set to: {} + has to be set to one of the following value ("cppsim", "rtlsim")""".format( + mode + ) + ) + + for i in range(n_outputs): + assert ( + context[node.output[i]].shape == exp_oshapes[i] + ), "cppsim did not produce expected folded output shape. Got: {}, expected: {}".format( + context[node.output[i]].shape, exp_oshapes[i] + ) + + def code_generation_cppsim(self, model): + """Generates c++ code for simulation (cppsim).""" + node = self.onnx_node + path = self.get_nodeattr("code_gen_dir_cppsim") + self.code_gen_dict["$AP_INT_MAX_W$"] = [str(self.get_ap_int_max_w())] + self.generate_params(model, path) + self.global_includes() + self.defines("cppsim") + self.read_npy_data() + self.strm_decl() + self.pragmas() + self.docompute() + self.dataoutstrm() + self.save_as_npy() + self.timeout_value() + self.timeout_condition() + self.timeout_read_stream() + + template = templates.docompute_template_timeout + + for key in self.code_gen_dict: + # transform list into long string separated by '\n' + code_gen_line = "\n".join(self.code_gen_dict[key]) + template = template.replace(key, code_gen_line) + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + f = open(os.path.join(code_gen_dir, "execute_{}.cpp".format(node.op_type)), "w") + f.write(template) + f.close() + self.code_gen_dict.clear() + + def global_includes(self): + self.code_gen_dict["$GLOBALS$"] = ['#include "split.hpp"'] + + def defines(self, var): + self.code_gen_dict["$DEFINES$"] = ["#define NUM_OUTPUTS " + str(self.get_n_outputs())] + + def read_npy_data(self): + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + npy_type = "float" + self.code_gen_dict["$READNPYDATA$"] = [] + simd = self.get_nodeattr("SIMD") + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + npy_in = "%s/input_0.npy" % code_gen_dir + self.code_gen_dict["$READNPYDATA$"].append( + 'npy2vectorstream<%s, %s, %d>("%s", in0);' + % (input_elem_hls_type, npy_type, simd, npy_in) + ) + + def strm_decl(self): + self.code_gen_dict["$STREAMDECLARATIONS$"] = [] + simd = self.get_nodeattr("SIMD") + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + stream_name = "in0" + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + 'hls::stream> %s ("%s");' + % (input_elem_hls_type, simd, stream_name, stream_name) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + "hls::stream> out_arr[NUM_OUTPUTS];".format( + self.get_output_datatype().get_hls_datatype_str(), simd + ) + ) + self.code_gen_dict["$STREAMDECLARATIONS$"].append( + "hls::stream> debug_out_arr[NUM_OUTPUTS];".format( + self.get_output_datatype().get_hls_datatype_str(), simd + ) + ) + + def docompute(self): + self.code_gen_dict["$DOCOMPUTE$"] = [] + n_outputs = self.get_n_outputs() + output_folds = [str(self.get_folded_output_shape(i)[-2]) for i in range(n_outputs)] + out_stream_folds = ", ".join(output_folds) + comp_call = "StreamingSplit<{}>(in0, out_arr);".format(out_stream_folds) + self.code_gen_dict["$DOCOMPUTE$"] = [comp_call] + + def dataoutstrm(self): + npy_type = "float" + simd = self.get_nodeattr("SIMD") + code_gen_dir = self.get_nodeattr("code_gen_dir_cppsim") + n_outputs = self.get_n_outputs() + self.code_gen_dict["$DATAOUTSTREAM$"] = [] + for i in range(n_outputs): + oshape = self.get_folded_output_shape(i) + oshape_cpp_str = str(oshape).replace("(", "{").replace(")", "}") + npy_out = "%s/output_%d.npy" % (code_gen_dir, i) + self.code_gen_dict["$DATAOUTSTREAM$"].append( + 'vectorstream2npy<%s, %s, %d>(debug_out_arr[%d], %s, "%s");' + % ( + self.get_output_datatype(i).get_hls_datatype_str(), + npy_type, + simd, + i, + oshape_cpp_str, + npy_out, + ) + ) + + def blackboxfunction(self): + input_elem_hls_type = self.get_input_datatype().get_hls_datatype_str() + simd = self.get_nodeattr("SIMD") + in_stream = "hls::stream> &in0" % (input_elem_hls_type, simd) + out_streams = "hls::stream> (&out_arr)[NUM_OUTPUTS]" % ( + input_elem_hls_type, + simd, + ) + blackbox_hls = "void %s(%s, %s)" % (self.onnx_node.name, in_stream, out_streams) + self.code_gen_dict["$BLACKBOXFUNCTION$"] = [blackbox_hls] + + def pragmas(self): + pragmas = [] + pragmas.append("#pragma HLS INTERFACE axis port=in0") + for i in range(self.get_n_outputs()): + pragmas.append("#pragma HLS INTERFACE axis port=out_arr[%d]" % i) + pragmas.append("#pragma HLS INTERFACE ap_ctrl_none port=return") + pragmas.append("#pragma HLS aggregate variable=in0 compact=bit") + for i in range(self.get_n_outputs()): + pragmas.append("#pragma HLS aggregate variable=out_arr[%d] compact=bit" % i) + self.code_gen_dict["$PRAGMAS$"] = pragmas + + def timeout_condition(self): + condition = [] + for i in range(self.get_n_outputs()): + condition.append("out_arr[{}].empty()".format(i)) + condition = " && ".join(condition) + self.code_gen_dict["$TIMEOUT_CONDITION$"] = [condition] + + def timeout_read_stream(self): + read_stream_command = """ +for(int i = 0; i < NUM_OUTPUTS; i++){ + if(!out_arr[i].empty()) + debug_out_arr[i] << out_arr[i].read(); +} +""" + self.code_gen_dict["$TIMEOUT_READ_STREAM$"] = [read_stream_command] diff --git a/src/finn/custom_op/fpgadataflow/split.py b/src/finn/custom_op/fpgadataflow/split.py new file mode 100644 index 0000000000..e6ec551bc4 --- /dev/null +++ b/src/finn/custom_op/fpgadataflow/split.py @@ -0,0 +1,164 @@ +# Copyright (C) 2024, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import numpy as np +import warnings +from onnx import helper +from qonnx.core.datatype import DataType +from qonnx.util.basic import roundup_to_integer_multiple + +from finn.custom_op.fpgadataflow.hwcustomop import HWCustomOp + + +class StreamingSplit(HWCustomOp): + """Abstraction layer for HW implementation of Split. + Only supports splitting along the last (channel) axis.""" + + def __init__(self, onnx_node, **kwargs): + super().__init__(onnx_node, **kwargs) + + def get_nodeattr_types(self): + my_attrs = { + "SIMD": ("i", True, 0), + # number of elements of each output streams + "ChannelsPerStream": ("ints", True, []), + # FINN DataTypes for input; output datatypes inferred from input + "inputDataType": ("s", True, ""), + # number of input vectors for non-split axes, examples: + # [1] is a single vector (like a FC layer with batch=1) + # [4] is four vectors (like a FC layer with batch=4) + # [1, 4, 4] is four * four vectors (like a conv layer with batch=1) + "numInputVectors": ("ints", False, [1]), + } + my_attrs.update(super().get_nodeattr_types()) + return my_attrs + + def get_n_outputs(self): + return len(self.get_nodeattr("ChannelsPerStream")) + + def get_total_elems(self): + elems_per_stream = self.get_nodeattr("ChannelsPerStream") + return int(np.sum(elems_per_stream)) + + def get_normal_input_shape(self, ind=0): + total_elems = self.get_total_elems() + vecs = list(self.get_nodeattr("numInputVectors")) + ishape = tuple(vecs + [total_elems]) + return ishape + + def get_folded_input_shape(self, ind=0): + simd = self.get_nodeattr("SIMD") + folds = self.get_total_elems() // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) + + def get_normal_output_shape(self, ind=0): + elems = self.get_nodeattr("ChannelsPerStream")[ind] + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [elems]) + + def get_folded_output_shape(self, ind=0): + elems = self.get_nodeattr("ChannelsPerStream")[ind] + simd = self.get_nodeattr("SIMD") + folds = elems // simd + vecs = list(self.get_nodeattr("numInputVectors")) + return tuple(vecs + [folds, simd]) + + def make_shape_compatible_op(self, model): + # check input shape + exp_ishape = self.get_normal_input_shape() + ishape = tuple(model.get_tensor_shape(self.onnx_node.input[0])) + assert ishape == exp_ishape, "Unexpected input shape" + + assert len(self.onnx_node.output) == self.get_n_outputs(), "Unexpected number of outputs" + ret = helper.make_node("Split", self.onnx_node.input, self.onnx_node.output, axis=-1) + return ret + + def infer_node_datatype(self, model): + # check input datatype + inp = self.onnx_node.input[0] + idt = model.get_tensor_datatype(inp) + if idt != self.get_input_datatype(): + warn_str = "inputDataType changing for %s: %s -> %s " % ( + self.onnx_node.name, + str(self.get_input_datatype()), + str(idt), + ) + warnings.warn(warn_str) + self.set_nodeattr("inputDataType", idt.name) + odt = self.get_output_datatype() + for out in self.onnx_node.output: + model.set_tensor_datatype(out, odt) + + def verify_node(self): + pass + + def get_input_datatype(self, ind=0): + return DataType[self.get_nodeattr("inputDataType")] + + def get_output_datatype(self, ind=0): + # all output datatypes are the same as the input datatype + return self.get_input_datatype() + + def get_instream_width(self, ind=0): + ibits = self.get_input_datatype().bitwidth() + return ibits * self.get_nodeattr("SIMD") + + def get_outstream_width(self, ind=0): + obits = self.get_output_datatype().bitwidth() + out_width = obits * self.get_nodeattr("SIMD") + return out_width + + def get_number_output_values(self): + num_output_values = 0 + for i in range(self.get_n_outputs()): + num_output_values += np.prod(self.get_folded_output_shape(i)[:-1]) + return num_output_values + + def get_exp_cycles(self): + return np.prod(self.get_folded_input_shape()[:-1]) + + def execute_node(self, context, graph): + node = self.onnx_node + split = self.get_nodeattr("ChannelsPerStream") + np_split_param = np.cumsum(split[:-1]) + np_result = np.split(context[node.input[0]], np_split_param, axis=-1) + for i, out in enumerate(node.output): + context[out] = np_result[i] + + def get_instream_width_padded(self, ind=0): + in_width = self.get_instream_width() + return roundup_to_integer_multiple(in_width, 8) + + def get_verilog_top_module_intf_names(self): + intf_names = super().get_verilog_top_module_intf_names() + intf_names["s_axis"] = [("in0", self.get_instream_width_padded())] + intf_names["m_axis"] = [] + for i in range(self.get_n_outputs()): + intf_names["m_axis"].append(("out_arr_%d" % i, self.get_instream_width_padded())) + return intf_names From 8ea47f37f288195564b908c7a374b1ce913ef450 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:08:54 +0000 Subject: [PATCH 09/15] [Feature] Change signal name option added to hwcustomop.rtlsim_multi_io, useful for array interfaces --- src/finn/custom_op/fpgadataflow/hwcustomop.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/finn/custom_op/fpgadataflow/hwcustomop.py b/src/finn/custom_op/fpgadataflow/hwcustomop.py index b40b8f3074..602a923424 100644 --- a/src/finn/custom_op/fpgadataflow/hwcustomop.py +++ b/src/finn/custom_op/fpgadataflow/hwcustomop.py @@ -284,11 +284,11 @@ def rtlsim(self, sim, inp, inp2=None): sim.stop_vcd_trace() return outputs - def rtlsim_multi_io(self, sim, io_dict): + def rtlsim_multi_io(self, sim, io_dict, sname=None): "Run rtlsim for this node, supports multiple i/o streams." # signal name - sname = "_" + self.hls_sname() + "_" + sname = "_" + self.hls_sname() + "_" if sname is None else sname trace_file = self.get_nodeattr("rtlsim_trace") if trace_file == "default": From 59cfce74a4ba3788d0bf0596a6b0976ea5a030a0 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:11:43 +0000 Subject: [PATCH 10/15] [Feature] InferSplitlayer() added --- .../fpgadataflow/convert_to_hw_layers.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py index b02bc89db8..e4f10af3eb 100644 --- a/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py +++ b/src/finn/transformation/fpgadataflow/convert_to_hw_layers.py @@ -1250,6 +1250,72 @@ def apply(self, model): return (model, graph_modified) +class InferSplitLayer(Transformation): + """Convert suitable Split nodes (operating on last/-1 axis) + into StreamingConcat HW layers.""" + + def apply(self, model): + graph = model.graph + node_ind = 0 + graph_modified = False + for node in graph.node: + node_ind += 1 + if node.op_type == "Split": + split_param = node.input[1] + if model.get_initializer(split_param) is None: + warnings.warn("Split param not constant, skipping InferSplitLayer()") + continue + ishape = model.get_tensor_shape(node.input[0]) + axis = get_by_name(node.attribute, "axis") + if (axis is None) or (ishape is None): + continue + axis = axis.i + last_axis = len(ishape) - 1 + # skip conversion if not using last axis + if (axis != -1) and (axis != last_axis): + warnings.warn( + "StreamingSplit supports only last axis, skipping InferSplitLayer()" + ) + continue + # only one input allowed (two including split_param) + if len(node.input) != 2: + warnings.warn("Only one input allowed, skipping InferSplitLayer()") + continue + # skip conversion if the input is static + if model.get_initializer(node.input[0]) is not None: + warnings.warn("Static input detected, skipping InferSplitLayer()") + continue + # skip conversion if inputs are not integers + if not model.get_tensor_datatype(node.input[0]).is_integer(): + warnings.warn("Non-integer input detected, skipping InferSplitLayer()") + continue + # ready for conversion + channels_per_stream = [model.get_tensor_shape(x)[-1] for x in node.output] + inp_vec = list(model.get_tensor_shape(node.input[0])[:-1]) + new_node = helper.make_node( + "StreamingSplit", + node.input, + node.output, + domain="finn.custom_op.fpgadataflow", + backend="fpgadataflow", + name="StreamingSplit_" + node.name, + SIMD=1, + ChannelsPerStream=channels_per_stream, + inputDataType=model.get_tensor_datatype(node.input[0]).name, + numInputVectors=inp_vec, + outFIFODepths=[2] * len(node.output), + ) + graph.node.insert(node_ind, new_node) + # remove old node + graph.node.remove(node) + graph_modified = True + + if graph_modified: + model = model.transform(InferShapes()) + model = model.transform(InferDataTypes()) + return (model, graph_modified) + + class InferStreamingEltwise(Transformation): """Convert eltwise Add, Sub or Sub -> Abs to StreamingEltwise layer with AddEltwise, SubEltwise or AbsDiffEltwise op.""" From 6960e1505d2c220c7363488852fb82157282f4e0 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 16 Sep 2024 17:17:07 +0000 Subject: [PATCH 11/15] [Feature] fpgadataflow test for split added --- tests/fpgadataflow/test_fpgadataflow_split.py | 150 ++++++++++++++++++ 1 file changed, 150 insertions(+) create mode 100644 tests/fpgadataflow/test_fpgadataflow_split.py diff --git a/tests/fpgadataflow/test_fpgadataflow_split.py b/tests/fpgadataflow/test_fpgadataflow_split.py new file mode 100644 index 0000000000..5859b6d5a6 --- /dev/null +++ b/tests/fpgadataflow/test_fpgadataflow_split.py @@ -0,0 +1,150 @@ +# Copyright (c) 2021, Xilinx +# Copyright (C) 2023, Advanced Micro Devices, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, this +# list of conditions and the following disclaimer. +# +# * Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# * Neither the name of FINN nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import pytest + +import numpy as np +import onnx +from onnx import helper as oh +from qonnx.core.datatype import DataType +from qonnx.core.modelwrapper import ModelWrapper +from qonnx.transformation.general import GiveUniqueNodeNames + +from finn.core.onnx_exec import execute_onnx +from finn.transformation.fpgadataflow.compile_cppsim import CompileCppSim +from finn.transformation.fpgadataflow.convert_to_hw_layers import InferSplitLayer +from finn.transformation.fpgadataflow.create_stitched_ip import CreateStitchedIP +from finn.transformation.fpgadataflow.hlssynth_ip import HLSSynthIP +from finn.transformation.fpgadataflow.insert_fifo import InsertFIFO +from finn.transformation.fpgadataflow.prepare_cppsim import PrepareCppSim +from finn.transformation.fpgadataflow.prepare_ip import PrepareIP +from finn.transformation.fpgadataflow.prepare_rtlsim import PrepareRTLSim +from finn.transformation.fpgadataflow.set_exec_mode import SetExecMode +from finn.transformation.fpgadataflow.specialize_layers import SpecializeLayers + + +def make_split_model(IN_SHAPE, IN_DTYPE, SPLIT, AXIS): + out_shapes = [IN_SHAPE[:-1] + [s] for s in SPLIT] + outputs = [] + for i in range(len(SPLIT)): + name = "global_out_" + str(i) + out = oh.make_tensor_value_info(name, onnx.TensorProto.FLOAT, out_shapes[i]) + outputs.append(out) + + inp = oh.make_tensor_value_info("global_in", onnx.TensorProto.FLOAT, IN_SHAPE) + split_init = onnx.numpy_helper.from_array( + np.array(SPLIT, dtype=np.int64), name="Split_0_param0" + ) + split_node = oh.make_node( + "Split", [inp.name, split_init.name], [out.name for out in outputs], axis=AXIS + ) + graph = oh.make_graph(nodes=[split_node], name="split_test", inputs=[inp], outputs=outputs) + model = oh.make_model(graph) + model = ModelWrapper(model) + for out in outputs: + model.set_tensor_datatype(out.name, IN_DTYPE) + model.set_tensor_layout(out.name, ["N", "H", "W", "C"]) + model.set_tensor_datatype(inp.name, IN_DTYPE) + model.set_tensor_layout(inp.name, ["N", "H", "W", "C"]) + model.set_initializer(split_init.name, np.array(SPLIT, dtype=np.int64)) + model = model.transform(GiveUniqueNodeNames()) + + return model + + +@pytest.mark.parametrize("exec_mode", ["cppsim", "rtlsim", "stitched_rtlsim"]) +@pytest.mark.parametrize("idt", [DataType["INT3"]]) +@pytest.mark.fpgadataflow +@pytest.mark.vivado +@pytest.mark.slow +def test_fpgadataflow_split(exec_mode, idt): + fpga_part = "xc7z020clg400-1" + clk_ns = 10 + i_shape = [1, 5, 5, 10] + split = [2, 2, 6] + split_axis = 3 + model = make_split_model(i_shape, idt, split, split_axis) + assert len(model.graph.output) == len(split) + exp_oshapes = [] + for s in split: + oshape = i_shape.copy() + oshape[split_axis] = s + exp_oshapes.append(oshape) + onames = [o.name for o in model.graph.output] + assert all(model.get_tensor_shape(oname) == exp_oshapes[i] for i, oname in enumerate(onames)) + + inputs = [] + for out_shape in exp_oshapes: + inputs.append(np.random.randint(idt.min(), idt.max() + 1, out_shape).astype(np.float32)) + test_input = np.concatenate(inputs, axis=split_axis) + input_dict = {model.graph.input[0].name: test_input} + ret = execute_onnx(model, input_dict) + for i, (k, v) in enumerate(ret.items()): + assert (v == inputs[i]).all() + + # call transformation to convert to HW and verify conversion + model = model.transform(InferSplitLayer()) + assert model.graph.node[0].op_type == "StreamingSplit" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow" + ret = execute_onnx(model, input_dict) + for i, (k, v) in enumerate(ret.items()): + assert (v == inputs[i]).all() + + model = model.transform(SpecializeLayers(fpga_part)) + assert model.graph.node[0].op_type == "StreamingSplit_hls" + assert model.graph.node[0].domain == "finn.custom_op.fpgadataflow.hls" + if exec_mode == "cppsim": + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareCppSim()) + model = model.transform(CompileCppSim()) + model = model.transform(SetExecMode("cppsim")) + elif exec_mode == "rtlsim": + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(fpga_part, clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform(SetExecMode("rtlsim")) + model = model.transform(PrepareRTLSim()) + elif exec_mode == "stitched_rtlsim": + model = model.transform(InsertFIFO(create_shallow_fifos=True)) + model = model.transform(SpecializeLayers(fpga_part)) + model = model.transform(GiveUniqueNodeNames()) + model = model.transform(PrepareIP(fpga_part, clk_ns)) + model = model.transform(HLSSynthIP()) + model = model.transform( + CreateStitchedIP( + fpga_part, + clk_ns, + vitis=False, + ) + ) + model.set_metadata_prop("exec_mode", "rtlsim") + model.set_metadata_prop("rtlsim_trace", "trace.vcd") + ret_sim = execute_onnx(model, input_dict) + for i, (k, v) in enumerate(ret_sim.items()): + assert (v == inputs[i]).all() From c8c8d49cef0c9374ccca4337bc60701fae3ef450 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 23 Sep 2024 13:34:20 +0000 Subject: [PATCH 12/15] [Update] Finn-hlslib commit updated --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index a4fc124fa4..078eb33ec0 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -32,7 +32,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" +HLSLIB_COMMIT="2c066e87f5b8d309693c5d46c206473ca20ac68c" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" From 823588bc1de3881e8a9dab0b91c7c0f4ad17be65 Mon Sep 17 00:00:00 2001 From: Michal Danilowicz Date: Mon, 23 Sep 2024 13:36:12 +0000 Subject: [PATCH 13/15] [Update] Finn-hlslib commit updated --- fetch-repos.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fetch-repos.sh b/fetch-repos.sh index a4fc124fa4..078eb33ec0 100755 --- a/fetch-repos.sh +++ b/fetch-repos.sh @@ -32,7 +32,7 @@ FINN_EXP_COMMIT="0724be21111a21f0d81a072fccc1c446e053f851" BREVITAS_COMMIT="d4834bd2a0fad3c1fbc0ff7e1346d5dcb3797ea4" PYVERILATOR_COMMIT="ce0a08c20cb8c1d1e84181d6f392390f846adbd1" CNPY_COMMIT="4e8810b1a8637695171ed346ce68f6984e585ef4" -HLSLIB_COMMIT="16e5847a5e3ef76cffe84c8fad2f010d593457d3" +HLSLIB_COMMIT="2c066e87f5b8d309693c5d46c206473ca20ac68c" OMX_COMMIT="0b59762f9e4c4f7e5aa535ee9bc29f292434ca7a" AVNET_BDF_COMMIT="2d49cfc25766f07792c0b314489f21fe916b639b" XIL_BDF_COMMIT="8cf4bb674a919ac34e3d99d8d71a9e60af93d14e" From dbb387ee7f187d60a1f4ad6c9ddd2163f382c314 Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Tue, 21 Jan 2025 15:06:57 +0100 Subject: [PATCH 14/15] [Streamline] Add MoveTransposePastJoinMul transformation --- src/finn/transformation/streamline/reorder.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/finn/transformation/streamline/reorder.py b/src/finn/transformation/streamline/reorder.py index 8688145453..9797e6abf3 100644 --- a/src/finn/transformation/streamline/reorder.py +++ b/src/finn/transformation/streamline/reorder.py @@ -1325,6 +1325,20 @@ def are_producers_identical(self, model, producers): return True +class MoveTransposePastJoinMul(MoveIdenticalOpPastJoinOp): + def __init__(self): + super().__init__(["Transpose"], ["Mul"]) + + def are_producers_identical(self, model, producers): + if not super().are_producers_identical(model, producers): + return False + first_perm = get_by_name(producers[0].attribute, "perm").ints + for producer in producers: + if first_perm != get_by_name(producer.attribute, "perm").ints: + False + return True + + class MoveMulPastJoinAdd(MoveIdenticalOpPastJoinOp): def __init__(self): super().__init__(["Mul"], ["Add"]) From b50a63b039240afb11eb825c4a719763604ecd7a Mon Sep 17 00:00:00 2001 From: Christoph Berganski Date: Tue, 28 Jan 2025 17:35:00 +0100 Subject: [PATCH 15/15] [Folding] Add Split, Concat and RTL-based FMPadding to auto-folding --- .../transformation/fpgadataflow/set_folding.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/finn/transformation/fpgadataflow/set_folding.py b/src/finn/transformation/fpgadataflow/set_folding.py index 3ce17a27ff..0dd089597d 100644 --- a/src/finn/transformation/fpgadataflow/set_folding.py +++ b/src/finn/transformation/fpgadataflow/set_folding.py @@ -114,9 +114,13 @@ def apply(self, model): simd_ops = [ "DownSampler_hls", "FMPadding_hls", + "FMPadding_rtl", "FMPadding_Pixel_hls", "ConvolutionInputGenerator_hls", "ConvolutionInputGenerator_rtl", + # Streaming Split and Concat are SIMD operations + "StreamingSplit_hls", + "StreamingConcat_hls" ] # these ops are preceded by depthwise SWG and have special behavior, # as explained in the SetFolding docstring @@ -215,7 +219,16 @@ def apply(self, model): # depthwise SWGs are handled separately continue else: - max_simd = node_inst.get_nodeattr("NumChannels") + # Note: Keep original behavior for all custom-ops defining + # the NumChannels attribute as it is + try: + max_simd = node_inst.get_nodeattr("NumChannels") + # Note: Some of the recent additions do not define the + # NumChannels attribute + except AttributeError: + # We can extract the channels from the normal, i.e., not + # folded, shape of the input in these cases + max_simd = node_inst.get_normal_input_shape()[-1] self.optimize_attribute_val(node_inst, max_simd, "SIMD") else: warnings.warn("SetFolding doesn't know how to handle op_type " + op_type)