diff --git a/.gitignore b/.gitignore index 8d3c051bae..112fc29b95 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# tag files +tags + # gdb history .gdb_history @@ -176,7 +179,7 @@ cython_debug/ # IDE .vscode/* -# Docker container +# Docker container docker/config # yarn prettier module (for linting workflows) diff --git a/bin/sp-ization-benchmarking/include/sp-ization-benchmarking/nasnet_bench_graph_generator.h b/bin/sp-ization-benchmarking/include/sp-ization-benchmarking/nasnet_bench_graph_generator.h index a331676401..eaea06b1a5 100644 --- a/bin/sp-ization-benchmarking/include/sp-ization-benchmarking/nasnet_bench_graph_generator.h +++ b/bin/sp-ization-benchmarking/include/sp-ization-benchmarking/nasnet_bench_graph_generator.h @@ -9,133 +9,29 @@ * https://github.com/google-research/nasbench/blob/b94247037ee470418a3e56dcb83814e9be83f3a8/nasbench/api.py */ -#include "utils/containers/all_of.h" -#include "utils/containers/repeat.h" -#include "utils/containers/transform.h" -#include "utils/graph/algorithms.h" -#include "utils/graph/digraph/algorithms/get_edges.h" -#include "utils/graph/digraph/algorithms/get_initial_nodes.h" -#include "utils/graph/digraph/algorithms/get_terminal_nodes.h" -#include "utils/graph/digraph/algorithms/is_acyclic.h" -#include "utils/graph/digraph/algorithms/materialize_digraph_view.h" -#include "utils/graph/digraph/algorithms/transitive_reduction.h" -#include "utils/graph/instances/adjacency_digraph.h" -#include "utils/graph/node/algorithms.h" -#include "utils/graph/series_parallel/digraph_generation.h" +#include "utils/graph/digraph/digraph.h" +#include "utils/graph/digraph/digraph_view.h" #include "utils/nonnegative_int/nonnegative_int.h" #include #include -using AdjacencyMatrix = std::vector>; namespace FlexFlow { -const nonnegative_int MIN_NODES = nonnegative_int{6}; -const nonnegative_int MAX_NODES = nonnegative_int{8}; -const nonnegative_int MIN_EDGES = nonnegative_int{8}; -const nonnegative_int MAX_EDGES = nonnegative_int{11}; -const nonnegative_int NUM_CELLS = nonnegative_int{9}; - struct NasNetBenchConfig { - AdjacencyMatrix adjacency_matrix; + std::vector> adjacency_matrix; }; -bool is_valid_config(NasNetBenchConfig const &config) { - AdjacencyMatrix const &matrix = config.adjacency_matrix; - const size_t size = matrix.size(); - - auto is_valid_size = [](nonnegative_int s) { - return s >= MIN_NODES && s <= MAX_NODES; - }; - - auto is_square_matrix = [&](auto const &m) { - return all_of(m, [&](const auto &row) { return row.size() == size; }); - }; - - auto is_upper_triangular = [&](auto const &m) { - for (size_t i = 0; i < size; ++i) { - for (size_t j = 0; j <= i; ++j) { - if (matrix[i][j]) { - return false; - } - } - } - return true; - }; - - return is_valid_size(nonnegative_int{size}) && is_square_matrix(matrix) && - is_upper_triangular(matrix); -} - -bool is_valid_cell(DiGraphView const &g) { - nonnegative_int n_edges = nonnegative_int{get_edges(g).size()}; - nonnegative_int n_nodes = nonnegative_int{num_nodes(g)}; - return (is_acyclic(g)) && (get_initial_nodes(g).size() == 1) && - (get_terminal_nodes(g).size() == 1) && (n_edges <= MAX_EDGES) && - (n_edges >= MIN_EDGES) && (n_nodes <= MAX_NODES) && - (n_nodes >= MIN_NODES) && - (n_edges > n_nodes); // filter linear cell and diamond cell -} +bool is_valid_config(NasNetBenchConfig const &config); -NasNetBenchConfig generate_random_config() { - static std::uniform_int_distribution<> size_dist( - MIN_NODES.unwrap_nonnegative(), MAX_NODES.unwrap_nonnegative()); - Binary bin = Binary(0, 1); +bool is_valid_cell(DiGraphView const &g); - size_t num_nodes = - Uniform(MIN_NODES.unwrap_nonnegative(), MAX_NODES.unwrap_nonnegative())(); - std::vector> matrix(num_nodes, - std::vector(num_nodes, false)); - - for (size_t i = 0; i < num_nodes; ++i) { - for (size_t j = i + 1; j < num_nodes; ++j) { - matrix[i][j] = bin(); - } - } - - return {matrix}; -} +NasNetBenchConfig generate_random_config(); std::optional - maybe_generate_nasnet_bench_cell(NasNetBenchConfig const &config) { - if (!is_valid_config(config)) { - return std::nullopt; - } - - DiGraph g = DiGraph::create(); - std::vector nodes = add_nodes(g, config.adjacency_matrix.size()); - - for (size_t i = 0; i < nodes.size(); ++i) { - for (size_t j = i + 1; j < nodes.size(); ++j) { - if (config.adjacency_matrix[i][j]) { - g.add_edge(DirectedEdge{nodes[i], nodes[j]}); - } - } - } - - g = materialize_digraph_view(transitive_reduction(g)); - - if (!is_valid_cell(g)) { - return std::nullopt; - } + maybe_generate_nasnet_bench_cell(NasNetBenchConfig const &config); - return g; -} +DiGraph generate_nasnet_bench_cell(); -DiGraph generate_nasnet_bench_cell() { - while (true) { - NasNetBenchConfig config = generate_random_config(); - std::optional maybe_cell = - maybe_generate_nasnet_bench_cell(config); - if (maybe_cell) { - return maybe_cell.value(); - } - } -} +DiGraph generate_nasnet_bench_network(); -DiGraph generate_nasnet_bench_network() { - DiGraph g = series_composition( - transform(repeat(NUM_CELLS, generate_nasnet_bench_cell), - [](DiGraph const &cell) -> DiGraphView { return cell; })); - return g; -} } // namespace FlexFlow diff --git a/bin/sp-ization-benchmarking/src/sp-ization-benchmarking/nasnet_bench_graph_generator.cc b/bin/sp-ization-benchmarking/src/sp-ization-benchmarking/nasnet_bench_graph_generator.cc new file mode 100644 index 0000000000..125759b0de --- /dev/null +++ b/bin/sp-ization-benchmarking/src/sp-ization-benchmarking/nasnet_bench_graph_generator.cc @@ -0,0 +1,137 @@ +#include "sp-ization-benchmarking/nasnet_bench_graph_generator.h" +#include "utils/containers/all_of.h" +#include "utils/containers/repeat.h" +#include "utils/containers/transform.h" +#include "utils/graph/algorithms.h" +#include "utils/graph/digraph/algorithms/get_edges.h" +#include "utils/graph/digraph/algorithms/get_initial_nodes.h" +#include "utils/graph/digraph/algorithms/get_terminal_nodes.h" +#include "utils/graph/digraph/algorithms/is_acyclic.h" +#include "utils/graph/digraph/algorithms/materialize_digraph_view.h" +#include "utils/graph/digraph/algorithms/transitive_reduction.h" +#include "utils/graph/instances/adjacency_digraph.h" +#include "utils/graph/node/algorithms.h" +#include "utils/graph/series_parallel/digraph_generation.h" + +namespace FlexFlow { + +static const nonnegative_int MIN_NODES = nonnegative_int{6}; +static const nonnegative_int MAX_NODES = nonnegative_int{8}; +static const nonnegative_int MIN_EDGES = nonnegative_int{8}; +static const nonnegative_int MAX_EDGES = nonnegative_int{11}; +static const nonnegative_int NUM_CELLS = nonnegative_int{9}; + +using AdjacencyMatrix = std::vector>; + +bool is_valid_config(NasNetBenchConfig const &config) { + AdjacencyMatrix const &matrix = config.adjacency_matrix; + const size_t size = matrix.size(); + + auto is_valid_size = [](nonnegative_int s) { + return s >= MIN_NODES && s <= MAX_NODES; + }; + + auto is_square_matrix = [&](auto const &m) { + return all_of(m, [&](const auto &row) { return row.size() == size; }); + }; + + auto is_upper_triangular = [&](auto const &m) { + for (size_t i = 0; i < size; ++i) { + for (size_t j = 0; j <= i; ++j) { + if (matrix[i][j]) { + return false; + } + } + } + return true; + }; + + return is_valid_size(nonnegative_int{size}) && is_square_matrix(matrix) && + is_upper_triangular(matrix); +} + +bool is_valid_cell(DiGraphView const &g) { + nonnegative_int n_edges = nonnegative_int{get_edges(g).size()}; + nonnegative_int n_nodes = nonnegative_int{num_nodes(g)}; + return (is_acyclic(g)) && (get_initial_nodes(g).size() == 1) && + (get_terminal_nodes(g).size() == 1) && (n_edges <= MAX_EDGES) && + (n_edges >= MIN_EDGES) && (n_nodes <= MAX_NODES) && + (n_nodes >= MIN_NODES) && + (n_edges > n_nodes); // filter linear cell and diamond cell +} + +NasNetBenchConfig generate_random_config() { + static std::uniform_int_distribution<> size_dist( + MIN_NODES.unwrap_nonnegative(), MAX_NODES.unwrap_nonnegative()); + Binary bin = Binary(0, 1); + + size_t num_nodes = + Uniform(MIN_NODES.unwrap_nonnegative(), MAX_NODES.unwrap_nonnegative())(); + std::vector> matrix(num_nodes, + std::vector(num_nodes, false)); + + for (size_t i = 0; i < num_nodes; ++i) { + for (size_t j = i + 1; j < num_nodes; ++j) { + matrix[i][j] = bin(); + } + } + + return {matrix}; +} + +std::optional + maybe_generate_nasnet_bench_cell(NasNetBenchConfig const &config) { + if (!is_valid_config(config)) { + return std::nullopt; + } + + DiGraph g = DiGraph::create(); + std::vector nodes = add_nodes(g, config.adjacency_matrix.size()); + + for (size_t i = 0; i < nodes.size(); ++i) { + for (size_t j = i + 1; j < nodes.size(); ++j) { + if (config.adjacency_matrix[i][j]) { + g.add_edge(DirectedEdge{nodes[i], nodes[j]}); + } + } + } + + g = materialize_digraph_view(transitive_reduction(g)); + + if (!is_valid_cell(g)) { + return std::nullopt; + } + + return g; +} + +DiGraph generate_nasnet_bench_cell() { + while (true) { + NasNetBenchConfig config = generate_random_config(); + std::optional maybe_cell = + maybe_generate_nasnet_bench_cell(config); + if (maybe_cell) { + return maybe_cell.value(); + } + } +} + +DiGraph generate_nasnet_bench_cell() { + while (true) { + NasNetBenchConfig config = generate_random_config(); + std::optional maybe_cell = + maybe_generate_nasnet_bench_cell(config); + if (maybe_cell) { + return maybe_cell.value(); + } + } +} + +DiGraph generate_nasnet_bench_network() { + DiGraph g = series_composition( + transform(repeat(NUM_CELLS, generate_nasnet_bench_cell), + [](DiGraph const &cell) -> DiGraphView { return cell; })); + return g; +} + +} // namespace FlexFlow diff --git a/bin/sp-ization-benchmarking/src/sp-ization-benchmarking/sample_graphs.cc b/bin/sp-ization-benchmarking/src/sp-ization-benchmarking/sample_graphs.cc new file mode 100644 index 0000000000..bdd2525a4d --- /dev/null +++ b/bin/sp-ization-benchmarking/src/sp-ization-benchmarking/sample_graphs.cc @@ -0,0 +1 @@ +#include "sp-ization-benchmarking/sample_graphs.h" diff --git a/cmake/flexflow-utils.cmake b/cmake/flexflow-utils.cmake index 795668e32a..7d5d189b1c 100644 --- a/cmake/flexflow-utils.cmake +++ b/cmake/flexflow-utils.cmake @@ -8,7 +8,7 @@ macro(ff_parse_args) endmacro() function(define_ff_vars target) - target_compile_definitions(${target} PRIVATE + target_compile_definitions(${target} PRIVATE MAX_OPNAME=${FF_MAX_OPNAME} MAX_NUM_OUTPUTS=${FF_MAX_NUM_OUTPUTS} MAX_NUM_INPUTS=${FF_MAX_NUM_INPUTS} @@ -41,24 +41,24 @@ function(ff_set_cxx_properties target) CXX_EXTENSIONS NO ) target_compile_options(${target} - PUBLIC - $<$:> - "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." - "-fsanitize=undefined" + PUBLIC + $<$:> + "-ffile-prefix-map=${CMAKE_SOURCE_DIR}=." + "-fsanitize=undefined" "-fno-sanitize-recover=all" # add C++ compile flags here ) target_link_options(${target} - PUBLIC - $<$:> - "-fsanitize=undefined" + PUBLIC + $<$:> + "-fsanitize=undefined" "-fno-sanitize-recover=all" ) endfunction() function(ff_add_library) ff_parse_args( - PREFIX + PREFIX FF_LIBRARY ARGS NAME @@ -71,10 +71,10 @@ function(ff_add_library) PARSE ${ARGN} ) - + project(${FF_LIBRARY_NAME}) file(GLOB_RECURSE SRC - CONFIGURE_DEPENDS + CONFIGURE_DEPENDS LIST_DIRECTORIES False ${FF_LIBRARY_SRC_PATTERNS}) @@ -103,7 +103,7 @@ endfunction() function(ff_add_test_executable) ff_parse_args( - PREFIX + PREFIX FF_TEST_EXEC ARGS NAME @@ -145,7 +145,7 @@ endfunction() function(ff_add_benchmark_executable) ff_parse_args( - PREFIX + PREFIX FF_BENCHMARK_EXEC ARGS NAME @@ -172,6 +172,11 @@ function(ff_add_benchmark_executable) ${FF_BENCHMARK_EXEC_NAME} ${SRC}) + target_include_directories( + ${FF_BENCHMARK_EXEC_NAME} + PRIVATE + ${FF_BENCHMARK_EXEC_PRIVATE_INCLUDE}) + target_link_libraries( ${FF_BENCHMARK_EXEC_NAME} ${FF_BENCHMARK_EXEC_DEPS} @@ -184,7 +189,7 @@ endfunction() function(ff_add_executable) ff_parse_args( - PREFIX + PREFIX FF_EXEC ARGS NAME diff --git a/flake.lock b/flake.lock index ca71a446a9..e52833e4ed 100644 --- a/flake.lock +++ b/flake.lock @@ -66,11 +66,11 @@ ] }, "locked": { - "lastModified": 1773786960, - "narHash": "sha256-XGta5Z2idBD9bAvdmx+6kN0GQpNruwNYq1BSONH1Sgo=", + "lastModified": 1778104328, + "narHash": "sha256-bn0G8xDqBrVjp5htw1i3u8fPdPMVtoZXFzX7hJ6m9YY=", "ref": "refs/heads/master", - "rev": "da1097f7ef7ecc659a2ed740203c1be8262de7fa", - "revCount": 147, + "rev": "535ac756b2674dc10051b37be978b9d2cb9f817d", + "revCount": 156, "type": "git", "url": "https://git.sr.ht/~lockshaw/proj" }, diff --git a/lib/compiler/benchmark/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc b/lib/compiler/benchmark/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc index 1b548b0e0e..d6bf636822 100644 --- a/lib/compiler/benchmark/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc +++ b/lib/compiler/benchmark/src/compiler/series_parallel/computation_graph/get_computation_graph_series_parallel_decomposition.cc @@ -17,7 +17,7 @@ static void benchmark_get_computation_graph_series_parallel_decomposition( BENCHMARK_CAPTURE(benchmark_get_computation_graph_series_parallel_decomposition, split_test, - get_split_test_computation_graph(/*batch_size=*/8_n)); + get_split_test_computation_graph(/*batch_size=*/8_p)); BENCHMARK_CAPTURE( benchmark_get_computation_graph_series_parallel_decomposition, diff --git a/lib/compiler/include/compiler/search_result.struct.toml b/lib/compiler/include/compiler/search_result.struct.toml deleted file mode 100644 index 7e7e59d7c9..0000000000 --- a/lib/compiler/include/compiler/search_result.struct.toml +++ /dev/null @@ -1,17 +0,0 @@ -namespace = "FlexFlow" -name = "SearchResult" -features = [ -] - -includes = [ - "pcg/parallel_computation_graph/parallel_computation_graph.h", - "compiler/machine_mapping/machine_mapping.h", -] - -[[fields]] -name = "pcg" -type = "::FlexFlow::ParallelComputationGraph" - -[[fields]] -name = "machine_mapping" -type = "::FlexFlow::MachineMapping" \ No newline at end of file diff --git a/lib/local-execution/include/local-execution/computation_graph_instance/computation_graph_instance.h b/lib/local-execution/include/local-execution/computation_graph_instance.h similarity index 100% rename from lib/local-execution/include/local-execution/computation_graph_instance/computation_graph_instance.h rename to lib/local-execution/include/local-execution/computation_graph_instance.h diff --git a/lib/local-execution/src/local-execution/computation_graph_instance/computation_graph_instance.cc b/lib/local-execution/src/local-execution/computation_graph_instance.cc similarity index 99% rename from lib/local-execution/src/local-execution/computation_graph_instance/computation_graph_instance.cc rename to lib/local-execution/src/local-execution/computation_graph_instance.cc index 961dfae3f1..ae4e4ada0e 100644 --- a/lib/local-execution/src/local-execution/computation_graph_instance/computation_graph_instance.cc +++ b/lib/local-execution/src/local-execution/computation_graph_instance.cc @@ -1,4 +1,4 @@ -#include "local-execution/computation_graph_instance/computation_graph_instance.h" +#include "local-execution/computation_graph_instance.h" #include "local-execution/per_device_op_state_initialization.h" #include "local-execution/task_execution.h" #include "local-execution/tensor_allocation.h" diff --git a/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc b/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc index 89010c543e..e3e88c7eca 100644 --- a/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc +++ b/lib/local-execution/src/local-execution/cost_estimator/local_cost_estimator.cc @@ -4,7 +4,7 @@ #include "kernels/device.h" #include "kernels/local_cpu_allocator.h" #include "kernels/local_cuda_allocator.h" -#include "local-execution/computation_graph_instance/computation_graph_instance.h" +#include "local-execution/computation_graph_instance.h" #include "local-execution/cost_estimator/tracked_allocator.h" #include "op-attrs/computation_graph_op_attrs.h" #include "op-attrs/pcg_operator_attrs.h" diff --git a/lib/local-execution/test/src/local-execution/test_e2e.cc b/lib/local-execution/test/src/local-execution/computation_graph_instance.cc similarity index 71% rename from lib/local-execution/test/src/local-execution/test_e2e.cc rename to lib/local-execution/test/src/local-execution/computation_graph_instance.cc index da62d22071..aaeb253b5b 100644 --- a/lib/local-execution/test/src/local-execution/test_e2e.cc +++ b/lib/local-execution/test/src/local-execution/computation_graph_instance.cc @@ -1,3 +1,4 @@ +#include "local-execution/computation_graph_instance.h" #include "kernels/compare_tensor_accessors.h" #include "kernels/copy_tensor_accessor.h" #include "kernels/device_handle_t.h" @@ -7,7 +8,6 @@ #include "kernels/managed_ff_stream.h" #include "kernels/managed_per_device_ff_handle.h" #include "kernels/tensor_accessor_reductions.h" -#include "local-execution/computation_graph_instance/computation_graph_instance.h" #include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" #include "pcg/computation_graph.h" #include "pcg/computation_graph_builder.h" @@ -357,4 +357,149 @@ TEST_SUITE(FF_CUDA_TEST_SUITE) { GenericTensorAccessorR last_epoch = loss_values.back(); CHECK(did_loss_decrease(first_epoch_loss, last_epoch)); } + + TEST_CASE("LossFunctions") { + // initialize runtime + ManagedFFStream managed_stream{}; + ManagedPerDeviceFFHandle managed_handle = initialize_single_gpu_handle( + /*workSpaceSize=*/1024 * 1024, + /*allowTensorOpMathConversion=*/true); + + Allocator allocator = create_local_cuda_memory_allocator(); + + positive_int batch_size = 10_p; + positive_int data_dim = 16_p; + positive_int output_dim = 32_p; + + // construct computation graph + ComputationGraph computation_graph = make_empty_computation_graph(); + + TensorShape input_tensor_shape = TensorShape{ + TensorDims{FFOrdered{batch_size, data_dim}}, DataType::FLOAT}; + + TensorShape weight_shape = TensorShape{ + TensorDims{FFOrdered{data_dim, output_dim}}, DataType::FLOAT}; + + LayerAddedResult inputs_layer = + add_input_layer(computation_graph, input_tensor_shape); + tensor_guid_t inputs_tensor = + require_only_key(inputs_layer.outputs, TensorSlotName::OUTPUT); + + LayerAddedResult weights_layer = add_layer( + computation_graph, + LayerAttrs{ComputationGraphOpAttrs{WeightAttrs{ + weight_shape, InitializerAttrs{ZeroInitializerAttrs{}}}}, + std::nullopt}, + {}, + {}); + tensor_guid_t weights_tensor = + require_only_key(weights_layer.outputs, TensorSlotName::OUTPUT); + + LayerAddedResult linear_operator = add_layer( + computation_graph, + LayerAttrs{ComputationGraphOpAttrs{LinearAttrs{output_dim, + /*use_bias=*/false, + DataType::FLOAT, + Activation::RELU, + std::nullopt}}, + std::nullopt}, + { + { + TensorSlotName::INPUT, + inputs_tensor, + }, + }, + { + { + TensorSlotName::WEIGHT, + weights_tensor, + }, + }); + tensor_guid_t logit_tensor = + require_only_key(linear_operator.outputs, TensorSlotName::OUTPUT); + + OptimizerAttrs optimizer_attrs = OptimizerAttrs{ + SGDOptimizerAttrs{ + /*lr=*/0.0, + /*momentum=*/0.0, + /*nesterov=*/false, + /*weight_decay=*/0.0, + }, + }; + + device_id_t device_idx = + make_device_id_t_from_idx(nonnegative_int{0}, DeviceType::GPU); + device_handle_t ff_handle = + gpu_make_device_handle_t(managed_handle.raw_handle()); + + std::unordered_map input_tensors; + + auto compute_loss = [&](LossAttrs const &loss_attrs, + GenericTensorAccessorR label_tensor) { + ComputationGraphInstance computation_graph_instance = + create_computation_graph_instance( + /*cg=*/computation_graph, + /*optimizer=*/optimizer_attrs, + /*loss=*/ + LossConfig{ + /*loss_attrs=*/loss_attrs, + /*label_tensor=*/label_tensor, + /*logit_tensor=*/logit_tensor, + }, + /*input_tensors=*/input_tensors, + /*allocator=*/allocator, + /*profiling_settings=*/ProfilingSettings{0, 1}, + /*device_handle=*/ff_handle, + /*iteration_config=*/FFIterationConfig{1_p}, + /*device_idx=*/device_idx); + + perform_all_passes_for_computation_graph_instance( + /*instance=*/computation_graph_instance, + /*profiling_settings=*/ProfilingSettings{0, 0}, + /*ff_handle=*/ff_handle, + /*iteration_config=*/FFIterationConfig{1_p}, + /*device_idx=*/device_idx); + assert_unwrap(computation_graph_instance.get_loss_tensor_accessor()); + }; + + SUBCASE("SparseCategoricalCrossEntropyLossAttrs") { + TensorShape label_tensor_shape = + TensorShape{TensorDims{FFOrdered{batch_size, 1_p}}, DataType::FLOAT}; + GenericTensorAccessorW label_tensor = + allocator.allocate_tensor(label_tensor_shape); + + LossAttrs loss_attrs = LossAttrs{ + SparseCategoricalCrossEntropyLossAttrs{/*replace_labels=*/false}}; + + compute_loss(loss_attrs, label_tensor); + } + + SUBCASE("NonconfigurableLossAttrs") { + TensorShape label_tensor_shape = TensorShape{ + TensorDims{FFOrdered{batch_size, output_dim}}, DataType::FLOAT}; + GenericTensorAccessorW label_tensor = + allocator.allocate_tensor(label_tensor_shape); + + SUBCASE("LossFunction::CATEGORICAL_CROSSENTROPY") { + LossAttrs loss_attrs = LossAttrs{ + NonconfigurableLossAttrs{LossFunction::CATEGORICAL_CROSSENTROPY}}; + + compute_loss(loss_attrs, label_tensor); + } + + SUBCASE("LossFunction::MEAN_SQUARED_ERROR_AVG_REDUCE") { + LossAttrs loss_attrs = LossAttrs{NonconfigurableLossAttrs{ + LossFunction::MEAN_SQUARED_ERROR_AVG_REDUCE}}; + + compute_loss(loss_attrs, label_tensor); + } + + SUBCASE("LossFunction::IDENTITY") { + LossAttrs loss_attrs = + LossAttrs{NonconfigurableLossAttrs{LossFunction::IDENTITY}}; + + compute_loss(loss_attrs, label_tensor); + } + } + } } diff --git a/lib/local-execution/test/src/local-execution/local_cost_estimator.cc b/lib/local-execution/test/src/local-execution/cost_estimator/local_cost_estimator.cc similarity index 100% rename from lib/local-execution/test/src/local-execution/local_cost_estimator.cc rename to lib/local-execution/test/src/local-execution/cost_estimator/local_cost_estimator.cc diff --git a/lib/local-execution/test/src/local-execution/loss_functions.cc b/lib/local-execution/test/src/local-execution/loss_functions.cc deleted file mode 100644 index 39aa5f138a..0000000000 --- a/lib/local-execution/test/src/local-execution/loss_functions.cc +++ /dev/null @@ -1,162 +0,0 @@ -#include "kernels/device_handle_t.h" -#include "kernels/local_cuda_allocator.h" -#include "kernels/managed_ff_stream.h" -#include "kernels/managed_per_device_ff_handle.h" -#include "local-execution/computation_graph_instance/computation_graph_instance.h" -#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" -#include "pcg/computation_graph.h" -#include "pcg/computation_graph_builder.h" -#include "pcg/device_id_t.h" -#include "pcg/optimizer_attrs.dtg.h" -#include "utils/containers/require_only_key.h" -#include "utils/optional.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_CUDA_TEST_SUITE) { - TEST_CASE("LossFunctions") { - // initialize runtime - ManagedFFStream managed_stream{}; - ManagedPerDeviceFFHandle managed_handle = initialize_single_gpu_handle( - /*workSpaceSize=*/1024 * 1024, - /*allowTensorOpMathConversion=*/true); - - Allocator allocator = create_local_cuda_memory_allocator(); - - positive_int batch_size = 10_p; - positive_int data_dim = 16_p; - positive_int output_dim = 32_p; - - // construct computation graph - ComputationGraph computation_graph = make_empty_computation_graph(); - - TensorShape input_tensor_shape = TensorShape{ - TensorDims{FFOrdered{batch_size, data_dim}}, DataType::FLOAT}; - - TensorShape weight_shape = TensorShape{ - TensorDims{FFOrdered{data_dim, output_dim}}, DataType::FLOAT}; - - LayerAddedResult inputs_layer = - add_input_layer(computation_graph, input_tensor_shape); - tensor_guid_t inputs_tensor = - require_only_key(inputs_layer.outputs, TensorSlotName::OUTPUT); - - LayerAddedResult weights_layer = add_layer( - computation_graph, - LayerAttrs{ComputationGraphOpAttrs{WeightAttrs{ - weight_shape, InitializerAttrs{ZeroInitializerAttrs{}}}}, - std::nullopt}, - {}, - {}); - tensor_guid_t weights_tensor = - require_only_key(weights_layer.outputs, TensorSlotName::OUTPUT); - - LayerAddedResult linear_operator = add_layer( - computation_graph, - LayerAttrs{ComputationGraphOpAttrs{LinearAttrs{output_dim, - /*use_bias=*/false, - DataType::FLOAT, - Activation::RELU, - std::nullopt}}, - std::nullopt}, - { - { - TensorSlotName::INPUT, - inputs_tensor, - }, - }, - { - { - TensorSlotName::WEIGHT, - weights_tensor, - }, - }); - tensor_guid_t logit_tensor = - require_only_key(linear_operator.outputs, TensorSlotName::OUTPUT); - - OptimizerAttrs optimizer_attrs = OptimizerAttrs{ - SGDOptimizerAttrs{ - /*lr=*/0.0, - /*momentum=*/0.0, - /*nesterov=*/false, - /*weight_decay=*/0.0, - }, - }; - - device_id_t device_idx = - make_device_id_t_from_idx(nonnegative_int{0}, DeviceType::GPU); - device_handle_t ff_handle = - gpu_make_device_handle_t(managed_handle.raw_handle()); - - std::unordered_map input_tensors; - - auto compute_loss = [&](LossAttrs const &loss_attrs, - GenericTensorAccessorR label_tensor) { - ComputationGraphInstance computation_graph_instance = - create_computation_graph_instance( - /*cg=*/computation_graph, - /*optimizer=*/optimizer_attrs, - /*loss=*/ - LossConfig{ - /*loss_attrs=*/loss_attrs, - /*label_tensor=*/label_tensor, - /*logit_tensor=*/logit_tensor, - }, - /*input_tensors=*/input_tensors, - /*allocator=*/allocator, - /*profiling_settings=*/ProfilingSettings{0, 1}, - /*device_handle=*/ff_handle, - /*iteration_config=*/FFIterationConfig{1_p}, - /*device_idx=*/device_idx); - - perform_all_passes_for_computation_graph_instance( - /*instance=*/computation_graph_instance, - /*profiling_settings=*/ProfilingSettings{0, 0}, - /*ff_handle=*/ff_handle, - /*iteration_config=*/FFIterationConfig{1_p}, - /*device_idx=*/device_idx); - assert_unwrap(computation_graph_instance.get_loss_tensor_accessor()); - }; - - SUBCASE("SparseCategoricalCrossEntropyLossAttrs") { - TensorShape label_tensor_shape = - TensorShape{TensorDims{FFOrdered{batch_size, 1_p}}, DataType::FLOAT}; - GenericTensorAccessorW label_tensor = - allocator.allocate_tensor(label_tensor_shape); - - LossAttrs loss_attrs = LossAttrs{ - SparseCategoricalCrossEntropyLossAttrs{/*replace_labels=*/false}}; - - compute_loss(loss_attrs, label_tensor); - } - - SUBCASE("NonconfigurableLossAttrs") { - TensorShape label_tensor_shape = TensorShape{ - TensorDims{FFOrdered{batch_size, output_dim}}, DataType::FLOAT}; - GenericTensorAccessorW label_tensor = - allocator.allocate_tensor(label_tensor_shape); - - SUBCASE("LossFunction::CATEGORICAL_CROSSENTROPY") { - LossAttrs loss_attrs = LossAttrs{ - NonconfigurableLossAttrs{LossFunction::CATEGORICAL_CROSSENTROPY}}; - - compute_loss(loss_attrs, label_tensor); - } - - SUBCASE("LossFunction::MEAN_SQUARED_ERROR_AVG_REDUCE") { - LossAttrs loss_attrs = LossAttrs{NonconfigurableLossAttrs{ - LossFunction::MEAN_SQUARED_ERROR_AVG_REDUCE}}; - - compute_loss(loss_attrs, label_tensor); - } - - SUBCASE("LossFunction::IDENTITY") { - LossAttrs loss_attrs = - LossAttrs{NonconfigurableLossAttrs{LossFunction::IDENTITY}}; - - compute_loss(loss_attrs, label_tensor); - } - } - } -} diff --git a/lib/local-pcg-execution/include/local-pcg-execution/parallel_model_training_instance.h b/lib/local-pcg-execution/include/local-pcg-execution/parallel_model_training_instance.h deleted file mode 100644 index 8cfc261774..0000000000 --- a/lib/local-pcg-execution/include/local-pcg-execution/parallel_model_training_instance.h +++ /dev/null @@ -1,47 +0,0 @@ -#ifndef _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_PARALLEL_MODEL_TRAINING_INSTANCE_H -#define _FLEXFLOW_LIB_LOCAL_PCG_EXECUTION_INCLUDE_LOCAL_PCG_EXECUTION_PARALLEL_MODEL_TRAINING_INSTANCE_H - -#include "compiler/mapped_parallel_computation_graph.dtg.h" -#include "kernels/allocation.h" -#include "local-execution/local_atomic_tensor_backing.dtg.h" -#include "local-execution/local_task_registry.dtg.h" -#include "local-pcg-execution/local_parallel_tensor_backing.dtg.h" -#include "local-pcg-execution/task_group_execution_times.dtg.h" -#include "op-attrs/ops/loss_functions/loss_attrs.dtg.h" -#include "pcg/optimizer_attrs.dtg.h" -#include "pcg/parallel_computation_graph/parallel_layer_guid_t.dtg.h" -#include "task-spec/runtime_task_invocation/runtime_arg_config.dtg.h" -#include "task-spec/symbolic/training_symbolic_computation_graph_from_pcg_conversion.dtg.h" - -namespace FlexFlow { - -struct ParallelModelTrainingInstance { - ParallelModelTrainingInstance(Allocator const &, - LossAttrs const &, - OptimizerAttrs const &); - -public: - std::unordered_map> - forward(); - std::unordered_map> - backward(); - void update(); - GenericTensorAccessorR get_loss_tensor_accessor() const; - -private: - Allocator allocator; - LossAttrs loss_attrs; - OptimizerAttrs optimizer_attrs; - TrainingSymbolicComputationGraphFromPcgConversion symbolic_cg; - MappedParallelComputationGraph mapped_pcg; - LocalParallelTensorBacking local_tensor_backing; - LocalAtomicTensorBacking local_atomic_tensor_backing; - LocalTaskRegistry local_task_registry; - RuntimeArgConfig runtime_arg_config; -}; - -} // namespace FlexFlow - -#endif diff --git a/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.dtg.toml b/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.dtg.toml index 45372cf7e8..8ca055441d 100644 --- a/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.dtg.toml +++ b/lib/op-attrs/include/op-attrs/num_ptensor_shard_dims_t.dtg.toml @@ -9,11 +9,11 @@ features = [ "json", ] -doctstring = """\ -A wrapper type describing the number of shard dims (i.e., not including replia dims) in a parallel tensor, +docstring = """\ +A wrapper type describing the number of shard dims (i.e., not including replia dims) in a parallel tensor, to prevent accidentally confusing the number of shard dims and the total number of parallel dims. -The conversion to/from @ref num_ptensor_parallel_dims_t is trivial, and provided by the +The conversion to/from @ref num_ptensor_parallel_dims_t is trivial, and provided by the functions @ref num_ptensor_parallel_dims_from_shard_dims and @ref num_ptensor_shard_dims_from_parallel_dims. """ diff --git a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.h b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.h index 726aef84ba..db5c4a399b 100644 --- a/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.h +++ b/lib/realm-execution/include/realm-execution/tasks/serializer/serializable_device_specific_ptr.h @@ -3,6 +3,7 @@ #include "realm-execution/device_specific_ptr.h" #include "realm-execution/tasks/serializer/serializable_device_specific_ptr.dtg.h" +#include "utils/containers/transform.h" namespace FlexFlow { diff --git a/lib/realm-execution/src/realm-execution/device_specific_ptr.cc b/lib/realm-execution/src/realm-execution/device_specific_ptr.cc new file mode 100644 index 0000000000..977f57555d --- /dev/null +++ b/lib/realm-execution/src/realm-execution/device_specific_ptr.cc @@ -0,0 +1,10 @@ +#include "realm-execution/device_specific_ptr.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template struct DeviceSpecificPtr; + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/realm.cc b/lib/realm-execution/src/realm-execution/realm.cc new file mode 100644 index 0000000000..38b3281f8b --- /dev/null +++ b/lib/realm-execution/src/realm-execution/realm.cc @@ -0,0 +1 @@ +#include "realm-execution/realm.h" diff --git a/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_device_specific_ptr.cc b/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_device_specific_ptr.cc new file mode 100644 index 0000000000..13ea814889 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/serializer/serializable_device_specific_ptr.cc @@ -0,0 +1,14 @@ +#include "realm-execution/tasks/serializer/serializable_device_specific_ptr.h" +#include "utils/archetypes/value_type.h" + +namespace FlexFlow { + +using T = value_type<0>; + +template SerializableDeviceSpecificPtr + device_specific_ptr_to_serializable(DeviceSpecificPtr const &); + +template DeviceSpecificPtr device_specific_ptr_from_serializable( + SerializableDeviceSpecificPtr const &); + +} // namespace FlexFlow diff --git a/lib/realm-execution/src/realm-execution/tasks/serializer/task_arg_serializer.cc b/lib/realm-execution/src/realm-execution/tasks/serializer/task_arg_serializer.cc new file mode 100644 index 0000000000..e17e24ba68 --- /dev/null +++ b/lib/realm-execution/src/realm-execution/tasks/serializer/task_arg_serializer.cc @@ -0,0 +1,12 @@ +#include "realm-execution/tasks/serializer/task_arg_serializer.h" +#include "utils/archetypes/jsonable_value_type.h" + +namespace FlexFlow { + +using T = jsonable_value_type<0>; + +template std::string serialize_task_args(T const &); + +template T deserialize_task_args(void const *, size_t); + +} // namespace FlexFlow diff --git a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_node_invocation.h b/lib/task-spec/include/task-spec/dynamic_graph/dynamic_node_invocation.h deleted file mode 100644 index 94a4886b49..0000000000 --- a/lib/task-spec/include/task-spec/dynamic_graph/dynamic_node_invocation.h +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_DYNAMIC_NODE_INVOCATION_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_DYNAMIC_GRAPH_DYNAMIC_NODE_INVOCATION_H - -#include "task-spec/dynamic_graph/dynamic_node_attrs.dtg.h" - -namespace FlexFlow { - -bool invocation_fully_satisfies_expansion_conditions( - std::function const &node_condition, - std::function const &slot_condition, - std::function const &) { - -] - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/include/task-spec/ops/impl/parallel_op.h b/lib/task-spec/include/task-spec/ops/impl/parallel_op.h deleted file mode 100644 index 7061821b62..0000000000 --- a/lib/task-spec/include/task-spec/ops/impl/parallel_op.h +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OPS_IMPL_PARALLEL_OP_H -#define _FLEXFLOW_LIB_TASK_SPEC_INCLUDE_TASK_SPEC_OPS_IMPL_PARALLEL_OP_H - -#include "parallel_op_info.h" -#include "utils/optional.h" - -namespace FlexFlow { - -struct ParallelOpJoinResult { - std::optional op = std::nullopt; - bool join_did_succeed = false; -}; - -ParallelOpJoinResult try_join_parallel_ops(ParallelOpInfo const &, - ParallelOpInfo const &); - -/* class ParallelOp : public Op { */ -/* public: */ -/* ParallelOp(FFModel &model, */ -/* OperatorType type, */ -/* char const *_name, */ -/* const ParallelTensor input); */ -/* virtual void init(FFModel const &) = 0; */ -/* virtual void forward(FFModel const &) = 0; */ -/* virtual void backward(FFModel const &) = 0; */ -/* virtual void create_input_partition(FFModel &model) = 0; */ -/* virtual bool measure_operator_cost(Simulator *sim, */ -/* MachineView const &pc, */ -/* CostMetrics &cost_metrics) const = 0; */ -/* virtual bool append_parallel_op_info( */ -/* std::vector ¶llel_ops) const = 0; */ -/* virtual bool is_parallel_op() const; */ - -/* public: */ -/* Legion::LogicalPartition input_lp, output_grad_lp; */ -/* }; */ - -} // namespace FlexFlow - -#endif diff --git a/lib/task-spec/src/task-spec/serialization.cc b/lib/task-spec/src/task-spec/serialization.cc new file mode 100644 index 0000000000..a2ad6eabfa --- /dev/null +++ b/lib/task-spec/src/task-spec/serialization.cc @@ -0,0 +1 @@ +#include "task-spec/serialization.h" diff --git a/lib/task-spec/test/CMakeLists.txt b/lib/task-spec/test/CMakeLists.txt index 354d9358a5..9665dba88e 100644 --- a/lib/task-spec/test/CMakeLists.txt +++ b/lib/task-spec/test/CMakeLists.txt @@ -2,8 +2,8 @@ ff_add_test_executable( NAME task-spec-tests SRC_PATTERNS - src/task-spec/dynamic_graph/*.cc - PRIVATE_INCLUDE + src/*.cc + PRIVATE_INCLUDE src/ DEPS doctest diff --git a/lib/task-spec/test/src/task-spec/device_specific.cc b/lib/task-spec/test/src/task-spec/device_specific.cc index b5ee11d109..34ef9b2bef 100644 --- a/lib/task-spec/test/src/task-spec/device_specific.cc +++ b/lib/task-spec/test/src/task-spec/device_specific.cc @@ -5,13 +5,17 @@ using namespace ::FlexFlow; TEST_SUITE(FF_TEST_SUITE) { TEST_CASE("DeviceSpecific") { - DeviceSpecific device_specific = + DeviceSpecific device_specific1 = + DeviceSpecific::create(device_id_t{gpu_id_t{0_n}}, + "hello world"); + + DeviceSpecific device_specific2 = DeviceSpecific::create(device_id_t{gpu_id_t{1_n}}, "hello world"); - std::string result = fmt::to_string(device_specific); - std::string correct = "hi"; + std::string result1 = fmt::to_string(device_specific1); + std::string result2 = fmt::to_string(device_specific2); - ASSERT(result == correct); + CHECK(result1 != result2); } } diff --git a/lib/task-spec/test/src/task-spec/op_ordered_slot_signature.cc b/lib/task-spec/test/src/task-spec/op_ordered_slot_signature.cc deleted file mode 100644 index c9da5953da..0000000000 --- a/lib/task-spec/test/src/task-spec/op_ordered_slot_signature.cc +++ /dev/null @@ -1,10 +0,0 @@ -#include "task-spec/op_ordered_slot_signature.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_op_ordered_slot_signature_for_binding") { - CHECK_MESSAGE(false, "TODO: get_op_ordered_slot_signature_for_binding"); - } -} diff --git a/lib/task-spec/test/src/task-spec/training_tensor_group.cc b/lib/task-spec/test/src/task-spec/training_tensor_group.cc deleted file mode 100644 index b40c38ce69..0000000000 --- a/lib/task-spec/test/src/task-spec/training_tensor_group.cc +++ /dev/null @@ -1,36 +0,0 @@ -#include "task-spec/training_tensor_group.h" -#include "test/utils/doctest/fmt/unordered_set.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("get_all_training_tensors_in_tensor_group") { - forward_tensor_guid_t forward_tensor = forward_tensor_guid_t{3}; - gradient_tensor_guid_t gradient_tensor = gradient_tensor_guid_t{5}; - optimizer_tensor_guid_t optimizer_tensor1 = optimizer_tensor_guid_t{8}; - optimizer_tensor_guid_t optimizer_tensor2 = optimizer_tensor_guid_t{3}; - - std::vector optimizer_tensors = { - optimizer_tensor1, - optimizer_tensor2, - }; - - TrainingTensorGroup training_tensor_group = TrainingTensorGroup{ - /*forward_tensor=*/forward_tensor, - /*gradient_tensor=*/gradient_tensor, - /*optimizer_tensors=*/optimizer_tensors, - }; - - std::unordered_set result = - get_all_training_tensors_in_tensor_group(training_tensor_group); - std::unordered_set correct = { - training_tensor_guid_t{forward_tensor}, - training_tensor_guid_t{gradient_tensor}, - training_tensor_guid_t{optimizer_tensor1}, - training_tensor_guid_t{optimizer_tensor2}, - }; - - CHECK(result == correct); - } -} diff --git a/lib/task-spec/test/src/task-spec/training_tensor_group_with_attrs.cc b/lib/task-spec/test/src/task-spec/training_tensor_group_with_attrs.cc deleted file mode 100644 index f769a877ad..0000000000 --- a/lib/task-spec/test/src/task-spec/training_tensor_group_with_attrs.cc +++ /dev/null @@ -1,84 +0,0 @@ -#include "task-spec/training_tensor_group_with_attrs.h" -#include - -using namespace ::FlexFlow; - -TEST_SUITE(FF_TEST_SUITE) { - TEST_CASE("make_training_tensor_group_with_attrs_from_group_and_attrs") { - TensorAttrs tensor_attrs = TensorAttrs{ - /*shape=*/TensorShape{ - /*dims=*/TensorDims{FFOrdered{ - 8_p, - 2_p, - 3_p, - }}, - /*data_type=*/DataType::FLOAT, - }, - /*create_grad=*/CreateGrad::YES, - }; - - forward_tensor_guid_t forward_tensor = forward_tensor_guid_t{3}; - gradient_tensor_guid_t gradient_tensor = gradient_tensor_guid_t{5}; - std::vector optimizer_tensors = { - optimizer_tensor_guid_t{8}, - optimizer_tensor_guid_t{3}, - }; - - TrainingTensorGroup training_tensor_group = TrainingTensorGroup{ - /*forward_tensor=*/forward_tensor, - /*gradient_tensor=*/gradient_tensor, - /*optimizer_tensors=*/optimizer_tensors, - }; - - TrainingTensorGroupWithAttrs result = - make_training_tensor_group_with_attrs_from_group_and_attrs( - training_tensor_group, tensor_attrs); - TrainingTensorGroupWithAttrs correct = TrainingTensorGroupWithAttrs{ - /*tensor_attrs=*/tensor_attrs, - /*forward_tensor=*/forward_tensor, - /*gradient_tensor=*/gradient_tensor, - /*optimizer_tensors=*/optimizer_tensors, - }; - - CHECK(result == correct); - } - - TEST_CASE("tensor_group_without_attrs") { - TensorAttrs tensor_attrs = TensorAttrs{ - /*shape=*/TensorShape{ - /*dims=*/TensorDims{FFOrdered{ - 8_p, - 2_p, - 3_p, - }}, - /*data_type=*/DataType::FLOAT, - }, - /*create_grad=*/CreateGrad::YES, - }; - - forward_tensor_guid_t forward_tensor = forward_tensor_guid_t{3}; - gradient_tensor_guid_t gradient_tensor = gradient_tensor_guid_t{5}; - std::vector optimizer_tensors = { - optimizer_tensor_guid_t{8}, - optimizer_tensor_guid_t{3}, - }; - - TrainingTensorGroupWithAttrs tensor_group_with_attrs = - TrainingTensorGroupWithAttrs{ - /*tensor_attrs=*/tensor_attrs, - /*forward_tensor=*/forward_tensor, - /*gradient_tensor=*/gradient_tensor, - /*optimizer_tensors=*/optimizer_tensors, - }; - - TrainingTensorGroup result = - tensor_group_without_attrs(tensor_group_with_attrs); - TrainingTensorGroup correct = TrainingTensorGroup{ - /*forward_tensor=*/forward_tensor, - /*gradient_tensor=*/gradient_tensor, - /*optimizer_tensors=*/optimizer_tensors, - }; - - CHECK(result == correct); - } -} diff --git a/lib/utils/benchmark/src/utils/graph/digraph/algorithms/random_dag.cc b/lib/utils/benchmark/src/internal/random_dag.cc similarity index 100% rename from lib/utils/benchmark/src/utils/graph/digraph/algorithms/random_dag.cc rename to lib/utils/benchmark/src/internal/random_dag.cc diff --git a/lib/utils/benchmark/src/utils/graph/digraph/algorithms/random_dag.h b/lib/utils/benchmark/src/internal/random_dag.h similarity index 100% rename from lib/utils/benchmark/src/utils/graph/digraph/algorithms/random_dag.h rename to lib/utils/benchmark/src/internal/random_dag.h diff --git a/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_closure.cc b/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_closure.cc index a22b41ee6c..80f393eccd 100644 --- a/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_closure.cc +++ b/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_closure.cc @@ -1,5 +1,5 @@ #include "utils/graph/digraph/algorithms/transitive_closure.h" -#include "./random_dag.h" +#include "internal/random_dag.h" #include using namespace ::FlexFlow; diff --git a/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_reduction.cc b/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_reduction.cc index bbb3d238a2..f72b0908ac 100644 --- a/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_reduction.cc +++ b/lib/utils/benchmark/src/utils/graph/digraph/algorithms/transitive_reduction.cc @@ -1,5 +1,5 @@ #include "utils/graph/digraph/algorithms/transitive_reduction.h" -#include "./random_dag.h" +#include "internal/random_dag.h" #include using namespace ::FlexFlow; diff --git a/lib/utils/include/utils/full_binary_tree/binary_tree_path_entry.dtg.toml b/lib/utils/include/utils/full_binary_tree/binary_tree_path_entry.dtg.toml index c4567a0e87..84f95c0d2e 100644 --- a/lib/utils/include/utils/full_binary_tree/binary_tree_path_entry.dtg.toml +++ b/lib/utils/include/utils/full_binary_tree/binary_tree_path_entry.dtg.toml @@ -10,8 +10,6 @@ features = [ [[values]] name = "LEFT_CHILD" -key = "left" [[values]] name = "RIGHT_CHILD" -key = "right" diff --git a/lib/utils/include/utils/full_binary_tree/full_binary_tree_node_type.dtg.toml b/lib/utils/include/utils/full_binary_tree/full_binary_tree_node_type.dtg.toml index dc49c0b696..e9148a6506 100644 --- a/lib/utils/include/utils/full_binary_tree/full_binary_tree_node_type.dtg.toml +++ b/lib/utils/include/utils/full_binary_tree/full_binary_tree_node_type.dtg.toml @@ -10,8 +10,6 @@ features = [ [[values]] name = "PARENT" -key = "parent" [[values]] name = "LEAF" -key = "leaf" diff --git a/lib/utils/src/utils/graph/open_kwarg_dataflow_graph/open_kwarg_dataflow_graph.cc b/lib/utils/src/utils/graph/open_kwarg_dataflow_graph/open_kwarg_dataflow_graph.cc new file mode 100644 index 0000000000..23908cf784 --- /dev/null +++ b/lib/utils/src/utils/graph/open_kwarg_dataflow_graph/open_kwarg_dataflow_graph.cc @@ -0,0 +1,11 @@ +#include "utils/graph/open_kwarg_dataflow_graph/open_kwarg_dataflow_graph.h" +#include "utils/archetypes/ordered_value_type.h" + +namespace FlexFlow { + +using GraphInputName = ordered_value_type<0>; +using SlotName = ordered_value_type<1>; + +template struct OpenKwargDataflowGraph; + +} // namespace FlexFlow