diff --git a/NEWS.md b/NEWS.md
index 14e7f9f..3b831d4 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,5 +1,11 @@
 # quickr (development version)
 
+- Added initial matrix/linear algebra handler support from base R, using the
+  same BLAS/LAPACK as R: `%*%`, `t()`, `crossprod()`, `tcrossprod()`,
+  `outer()` (with `FUN="*"`), `%o%`, `forwardsolve()`, and `backsolve()`.
+
+  The plan is to add more functions in the future (#77 @mns-nordicals)
+
 - On macOS, quickr will use LLVM flang (`flang-new`) for compilation when
   available (e.g. `brew install flang`). This is optional and can be disabled
   with `options(quickr.prefer_flang = FALSE)`.
diff --git a/R/aaa-utils.R b/R/aaa-utils.R
index e50f468..537dc7d 100644
--- a/R/aaa-utils.R
+++ b/R/aaa-utils.R
@@ -6,6 +6,43 @@ NULL
 
 `%||%` <- function(x, y) if (is.null(x)) y else x
 
+quickr_r_cmd <- function(
+  os_type = .Platform$OS.type,
+  r_home = R.home,
+  file_exists = file.exists
+) {
+  r_cmd <- r_home("bin/R")
+  if (identical(os_type, "windows") && !file_exists(r_cmd)) {
+    r_cmd <- paste0(r_cmd, ".exe")
+  }
+  r_cmd
+}
+
+quickr_r_cmd_config_value <- function(
+  name,
+  r_cmd = quickr_r_cmd(),
+  system2 = base::system2
+) {
+  out <- tryCatch(
+    suppressWarnings(system2(
+      r_cmd,
+      c("CMD", "config", name),
+      stdout = TRUE,
+      stderr = FALSE
+    )),
+    error = function(e) character()
+  )
+  status <- attr(out, "status")
+  if (!is.null(status)) {
+    return("")
+  }
+  value <- trimws(paste(out, collapse = " "))
+  if (!nzchar(value) || grepl("^ERROR:", value)) {
+    return("")
+  }
+  value
+}
+
 
 # @export
 # This will be exported by S7 next release.
diff --git a/R/compiler.R b/R/compiler.R
index 5bfd1f7..d4464ee 100644
--- a/R/compiler.R
+++ b/R/compiler.R
@@ -94,11 +94,13 @@ quickr_fcompiler_env <- function(
     quickr_env_is_true("QUICKR_PREFER_FLANG"),
   write_lines = writeLines,
   sysname = Sys.info()[["sysname"]],
-  use_openmp = FALSE
+  use_openmp = FALSE,
+  link_flags = character()
 ) {
   stopifnot(is.character(build_dir), length(build_dir) == 1L, nzchar(build_dir))
 
   use_openmp <- isTRUE(use_openmp)
+  link_flags <- link_flags[nzchar(link_flags)]
 
   flang <- ""
   flang_runtime <- character()
@@ -151,7 +153,10 @@ quickr_fcompiler_env <- function(
         }
       )
     },
-    if (use_openmp) openmp_makevars_lines()
+    if (use_openmp) openmp_makevars_lines(),
+    if (length(link_flags)) {
+      paste("PKG_LIBS +=", paste(link_flags, collapse = " "))
+    }
   )
   write_lines(
     makevars_lines,
diff --git a/R/manifest.R b/R/manifest.R
index 65c4609..0617abf 100644
--- a/R/manifest.R
+++ b/R/manifest.R
@@ -81,6 +81,9 @@ iso_c_binding_symbols <- function(
   if (grepl("\\b[0-9]+_c_int\\b", body_code)) {
     used_iso_bindings <- union(used_iso_bindings, "c_int")
   }
+  if (grepl("\\bc_int\\b", body_code)) {
+    used_iso_bindings <- union(used_iso_bindings, "c_int")
+  }
   if (grepl("\\b[0-9]+\\.[0-9]+_c_double\\b", body_code)) {
     used_iso_bindings <- union(used_iso_bindings, "c_double")
   }
diff --git a/R/parallel.R b/R/parallel.R
index d1770da..bcba755 100644
--- a/R/parallel.R
+++ b/R/parallel.R
@@ -31,6 +31,15 @@ mark_openmp_used <- function(scope) {
   invisible(root)
 }
 
+openmp_abort <- function(message, class = "quickr_openmp_error") {
+  stop(
+    structure(
+      list(message = message, call = sys.call(-1)),
+      class = c(class, "error", "condition")
+    )
+  )
+}
+
 is_parallel_decl_call <- function(e) {
   is.call(e) &&
     is.symbol(e[[1L]]) &&
@@ -126,26 +135,25 @@ openmp_directives <- function(parallel, private = NULL) {
   )
 }
 
-openmp_config_value <- function(name) {
-  out <- tryCatch(
-    suppressWarnings(system2(
-      R.home("bin/R"),
-      c("CMD", "config", name),
-      stdout = TRUE,
-      stderr = TRUE
-    )),
-    error = function(e) character()
-  )
-  status <- attr(out, "status")
-  if (!is.null(status)) {
-    return("")
-  }
-  value <- trimws(paste(out, collapse = " "))
-  if (!nzchar(value) || grepl("^ERROR:", value)) {
-    return("")
+openmp_config_value <- local({
+  cached <- NULL
+
+  function(name, config_value = quickr_r_cmd_config_value) {
+    if (is.null(cached)) {
+      cached <<- list()
+    }
+    cached_value <- cached[[name]]
+    if (!is.null(cached_value)) {
+      return(cached_value)
+    }
+    value <- config_value(name)
+    if (!nzchar(value)) {
+      value <- ""
+    }
+    cached[[name]] <<- value
+    value
   }
-  value
-}
+})
 
 openmp_fflags <- function() {
   env_flags <- trimws(Sys.getenv("QUICKR_OPENMP_FFLAGS", ""))
@@ -216,18 +224,24 @@ openmp_link_flags <- function(fflags = openmp_fflags()) {
 openmp_makevars_lines <- function() {
   fflags <- openmp_fflags()
   if (!nzchar(fflags)) {
-    stop(
-      "OpenMP was requested but no OpenMP flags were found for this toolchain.",
-      "\nSet QUICKR_OPENMP_FFLAGS to your compiler's OpenMP flags.",
-      call. = FALSE
+    openmp_abort(
+      paste(
+        "OpenMP was requested but no OpenMP flags were found for this toolchain.",
+        "Set QUICKR_OPENMP_FFLAGS to your compiler's OpenMP flags.",
+        sep = "\n"
+      ),
+      class = "quickr_openmp_unavailable"
     )
   }
   libs <- openmp_link_flags(fflags = fflags)
   if (!nzchar(libs)) {
-    stop(
-      "OpenMP was requested but no OpenMP linker flags were found.",
-      "\nSet QUICKR_OPENMP_LIBS to your linker OpenMP flags.",
-      call. = FALSE
+    openmp_abort(
+      paste(
+        "OpenMP was requested but no OpenMP linker flags were found.",
+        "Set QUICKR_OPENMP_LIBS to your linker OpenMP flags.",
+        sep = "\n"
+      ),
+      class = "quickr_openmp_unavailable"
     )
   }
   c(
diff --git a/R/quick.R b/R/quick.R
index a94ef45..0abc4af 100644
--- a/R/quick.R
+++ b/R/quick.R
@@ -208,30 +208,45 @@ compile <- function(fsub, build_dir = tempfile(paste0(fsub@name, "-build-"))) {
   writeLines(fsub, fsub_path)
   writeLines(c_wrapper, c_wrapper_path)
 
+  # Link against the same BLAS/LAPACK/Fortran libs as the running R
+  # to support generated calls to vendor BLAS (e.g., dgemm, dgesv).
+  cfg <- quickr_r_cmd_config_value
+  BLAS_LIBS <- strsplit(cfg("BLAS_LIBS"), "[[:space:]]+")[[1]]
+  LAPACK_LIBS <- strsplit(cfg("LAPACK_LIBS"), "[[:space:]]+")[[1]]
+  FLIBS <- strsplit(cfg("FLIBS"), "[[:space:]]+")[[1]]
+  BLAS_LIBS <- BLAS_LIBS[nzchar(BLAS_LIBS)]
+  LAPACK_LIBS <- LAPACK_LIBS[nzchar(LAPACK_LIBS)]
+  FLIBS <- FLIBS[nzchar(FLIBS)]
+  link_flags <- c(LAPACK_LIBS, BLAS_LIBS, FLIBS)
+
+  use_openmp <- isTRUE(attr(fsub@scope, "uses_openmp", exact = TRUE))
   suppressWarnings({
-    r_args <- c(
+    env <- quickr_fcompiler_env(
+      build_dir = build_dir,
+      use_openmp = use_openmp,
+      link_flags = link_flags
+    )
+    r_args_base <- c(
       "CMD SHLIB --use-LTO",
       "-o",
       dll_path,
       fsub_path,
       c_wrapper_path
     )
-    use_openmp <- isTRUE(attr(fsub@scope, "uses_openmp", exact = TRUE))
-    env <- quickr_fcompiler_env(
-      build_dir = build_dir,
-      use_openmp = use_openmp
-    )
+    r_args_libs <- c(r_args_base, link_flags)
+    is_windows <- identical(.Platform$OS.type, "windows")
+    r_args <- if (length(env) && !is_windows) r_args_base else r_args_libs
     result <- system2(
       R.home("bin/R"),
       r_args,
-      env = env,
       stdout = TRUE,
-      stderr = TRUE
+      stderr = TRUE,
+      env = env
     )
-    if (!is.null(attr(result, "status")) && length(env)) {
+    if (!is.null(attr(result, "status")) && length(env) && !use_openmp) {
       result2 <- system2(
         R.home("bin/R"),
-        r_args,
+        r_args_libs,
         stdout = TRUE,
         stderr = TRUE
       )
@@ -250,23 +265,152 @@ compile <- function(fsub, build_dir = tempfile(paste0(fsub@name, "-build-"))) {
       }
     }
   })
-  if (!is.null(status <- attr(result, "status"))) {
+
+  status <- attr(result, "status")
+  if (!is.null(status)) {
     # Adjust the compiler error so RStudio console formatter doesn't mangle
     # the actual error message https://github.com/rstudio/rstudio/issues/16365
     result <- gsub("Error: ", "Compiler Error: ", result, fixed = TRUE)
     writeLines(result, stderr())
     cat("---\nCompiler exit status:", status, "\n", file = stderr())
+    if (use_openmp) {
+      openmp_abort(
+        paste(
+          "OpenMP was requested but compilation with OpenMP flags failed.",
+          "quickr will not fall back to a non-OpenMP build.",
+          "Resolve the OpenMP toolchain or remove the parallel declarations.",
+          sep = "\n"
+        ),
+        class = "quickr_openmp_ignored"
+      )
+    }
     stop("Compilation Error", call. = FALSE)
   }
 
+  quickr_windows_add_dll_paths(link_flags)
+
   # tryCatch(dyn.unload(dll_path), error = identity)
-  dll <- dyn.load(dll_path)
+  dll <- tryCatch(
+    dyn.load(dll_path),
+    error = function(e) {
+      if (use_openmp) {
+        openmp_abort(
+          paste(
+            "OpenMP was requested but the compiled shared library failed to load.",
+            "This usually means the OpenMP runtime (libgomp/libomp) was not found.",
+            "Original error:",
+            conditionMessage(e),
+            sep = "\n"
+          ),
+          class = "quickr_openmp_load_failed"
+        )
+      }
+      stop(e)
+    }
+  )
   c_wrapper_name <- paste0(fsub@name, "_")
   ptr <- getNativeSymbolInfo(c_wrapper_name, dll)$address
 
   create_quick_closure(fsub@name, fsub@closure, native_symbol = ptr)
 }
 
+quickr_windows_add_dll_paths <- function(
+  flags,
+  os_type = .Platform$OS.type,
+  config_value = quickr_r_cmd_config_value,
+  which = Sys.which
+) {
+  if (!identical(os_type, "windows")) {
+    return(invisible(FALSE))
+  }
+  dirs <- flags[grepl("^-L", flags)]
+  dirs <- sub("^-L", "", dirs)
+  dirs <- dirs[nzchar(dirs)]
+
+  bin_siblings <- file.path(dirs, "..", "bin")
+
+  config_values <- c(
+    config_value("BINPREF"),
+    config_value("FC"),
+    config_value("F77"),
+    config_value("CC"),
+    config_value("CXX")
+  )
+  config_paths <- vapply(
+    config_values,
+    function(value) {
+      value <- trimws(value)
+      if (!nzchar(value)) {
+        return("")
+      }
+      value <- sub("^\"([^\"]+)\".*", "\\1", value)
+      value <- sub("^'([^']+)'.*", "\\1", value)
+      strsplit(value, "\\s+")[[1L]][[1L]]
+    },
+    character(1)
+  )
+  config_bins <- unique(dirname(config_paths[nzchar(config_paths)]))
+
+  r_bin <- R.home("bin")
+  r_bin_x64 <- file.path(r_bin, "x64")
+  r_bin_i386 <- file.path(r_bin, "i386")
+
+  rtools_roots <- Sys.getenv(c(
+    "RTOOLS45_HOME",
+    "RTOOLS44_HOME",
+    "RTOOLS43_HOME",
+    "RTOOLS42_HOME",
+    "RTOOLS40_HOME",
+    "RTOOLS_HOME"
+  ))
+  rtools_roots <- rtools_roots[nzchar(rtools_roots)]
+  rtools_bins <- unique(c(
+    file.path(rtools_roots, "usr", "bin"),
+    file.path(rtools_roots, "mingw64", "bin"),
+    file.path(rtools_roots, "ucrt64", "bin"),
+    file.path(rtools_roots, "x86_64-w64-mingw32.static.posix", "bin"),
+    file.path(rtools_roots, "x86_64-w64-mingw32.static", "bin"),
+    file.path(rtools_roots, "x86_64-w64-mingw32", "bin")
+  ))
+
+  compilers <- which(c("gfortran", "gcc", "clang", "flang"))
+  compilers <- compilers[nzchar(compilers)]
+  compiler_bins <- unique(dirname(compilers))
+
+  dirs <- unique(c(
+    dirs,
+    bin_siblings,
+    config_bins,
+    r_bin,
+    r_bin_x64,
+    r_bin_i386,
+    rtools_bins,
+    compiler_bins
+  ))
+  dirs <- dirs[nzchar(dirs)]
+  dirs <- dirs[dir.exists(dirs)]
+  if (!length(dirs)) {
+    return(invisible(FALSE))
+  }
+
+  path <- Sys.getenv("PATH", unset = "")
+  existing <- strsplit(path, ";", fixed = TRUE)[[1]]
+  existing <- existing[nzchar(existing)]
+  existing_norm <- tolower(normalizePath(
+    existing,
+    winslash = "\\",
+    mustWork = FALSE
+  ))
+  dirs_norm <- tolower(normalizePath(dirs, winslash = "\\", mustWork = FALSE))
+  to_add <- dirs[!dirs_norm %in% existing_norm]
+  if (length(to_add)) {
+    Sys.setenv(PATH = paste(c(to_add, existing), collapse = ";"))
+    return(invisible(TRUE))
+  }
+
+  invisible(FALSE)
+}
+
 
 create_quick_closure <- function(
   name,
diff --git a/R/r2f.R b/R/r2f.R
index 3b2d0bf..34a6487 100644
--- a/R/r2f.R
+++ b/R/r2f.R
@@ -313,6 +313,40 @@ get_r2f_handler <- function(name) {
     stop("Unsupported function: ", name, call. = FALSE)
 }
 
+dest_supported_for_call <- function(call) {
+  if (!is.call(call)) {
+    return(FALSE)
+  }
+  unwrapped <- call
+  while (is_call(unwrapped, "(") && length(unwrapped) == 2L) {
+    unwrapped <- unwrapped[[2L]]
+  }
+  if (!is.call(unwrapped) || !is.symbol(unwrapped[[1L]])) {
+    return(FALSE)
+  }
+  handler <- get0(as.character(unwrapped[[1L]]), r2f_handlers, inherits = FALSE)
+  isTRUE(attr(handler, "dest_supported", exact = TRUE))
+}
+
+dest_infer_for_call <- function(call, scope) {
+  if (!is.call(call)) {
+    return(NULL)
+  }
+  unwrapped <- call
+  while (is_call(unwrapped, "(") && length(unwrapped) == 2L) {
+    unwrapped <- unwrapped[[2L]]
+  }
+  if (!is.call(unwrapped) || !is.symbol(unwrapped[[1L]])) {
+    return(NULL)
+  }
+  handler <- get0(as.character(unwrapped[[1L]]), r2f_handlers, inherits = FALSE)
+  infer <- attr(handler, "dest_infer", exact = TRUE)
+  if (!is.function(infer)) {
+    return(NULL)
+  }
+  infer(as.list(unwrapped)[-1L], scope)
+}
+
 r2f_default_handler <- function(args, scope = NULL, ..., calls) {
   # stopifnot(is.call(e), is.symbol(e[[1L]]))
 
@@ -1445,15 +1479,15 @@ r2f_handlers[["<-"]] <- function(args, scope, ..., hoist = NULL) {
     )
   }
 
-  value <- r2f(rhs, scope, ..., hoist = hoist)
+  dest_allowed <- dest_supported_for_call(rhs)
 
-  # immutable / copy-on-modify usage of Variable()
+  # If target already exists (declared), thread destination hint to a single BLAS-capable child
   var <- get0(name, scope, inherits = FALSE)
-  if (is.null(var) || !inherits(var, Variable)) {
-    # The var does not exist -> this is a binding to a new symbol
-    # Create a fresh Variable carrying only mode/dims and a new name.
-    src <- value@value
-    var <- Variable(mode = src@mode, dims = src@dims)
+  existing_binding <- !is.null(var) && inherits(var, Variable)
+  inferred_var <- NULL
+  fortran_name <- NULL
+  if (!existing_binding && dest_allowed) {
+    inferred_var <- dest_infer_for_call(rhs, scope)
     fortran_name <- if (
       scope_is_closure(scope) && inherits(get0(name, scope), Variable)
     ) {
@@ -1461,6 +1495,39 @@ r2f_handlers[["<-"]] <- function(args, scope, ..., hoist = NULL) {
     } else {
       name
     }
+  }
+
+  if (existing_binding) {
+    value <- if (dest_allowed) {
+      r2f(rhs, scope, ..., hoist = hoist, dest = var)
+    } else {
+      r2f(rhs, scope, ..., hoist = hoist)
+    }
+  } else if (inherits(inferred_var, Variable)) {
+    var <- inferred_var
+    var@name <- fortran_name
+    value <- r2f(rhs, scope, ..., hoist = hoist, dest = var)
+  } else {
+    value <- r2f(rhs, scope, ..., hoist = hoist)
+  }
+
+  # immutable / copy-on-modify usage of Variable()
+  if (!existing_binding) {
+    # The var does not exist -> this is a binding to a new symbol
+    # Create a fresh Variable carrying only mode/dims and a new name.
+    if (!inherits(var, Variable)) {
+      src <- value@value
+      var <- Variable(mode = src@mode, dims = src@dims)
+    }
+    if (is.null(fortran_name)) {
+      fortran_name <- if (
+        scope_is_closure(scope) && inherits(get0(name, scope), Variable)
+      ) {
+        make_shadow_fortran_name(scope, name)
+      } else {
+        name
+      }
+    }
     var@name <- fortran_name
     # keep a reference to the R expression assigned, if available
     tryCatch(
@@ -1478,10 +1545,14 @@ r2f_handlers[["<-"]] <- function(args, scope, ..., hoist = NULL) {
     assign(name, var, scope)
   }
 
-  Fortran(glue("{var@name} = {value}"))
+  # If child consumed destination (e.g., BLAS wrote directly into LHS), skip assignment
+  if (isTRUE(attr(value, "writes_to_dest", TRUE))) {
+    Fortran("")
+  } else {
+    Fortran(glue("{var@name} = {value}"))
+  }
 }
 
-
 r2f_handlers[["[<-"]] <- function(args, scope = NULL, ...) {
   # TODO: handle logical subsetting here, which must become a where a construct like:
   #   x[lgl] <- val
diff --git a/R/sub-r2f-matrix.R b/R/sub-r2f-matrix.R
new file mode 100644
index 0000000..25d3ce4
--- /dev/null
+++ b/R/sub-r2f-matrix.R
@@ -0,0 +1,1072 @@
+# Matrix-specific r2f handlers and helpers
+
+# ---- matrix operation handlers ----
+
+# %*% handler with optional destination hint
+r2f_handlers[["%*%"]] <- function(args, scope, ..., hoist = NULL, dest = NULL) {
+  stopifnot(length(args) == 2L)
+  left_info <- unwrap_transpose_arg(args[[1L]], scope, ..., hoist = hoist)
+  right_info <- unwrap_transpose_arg(args[[2L]], scope, ..., hoist = hoist)
+  left <- left_info$value
+  right <- right_info$value
+  left_trans <- left_info$trans
+  right_trans <- right_info$trans
+
+  left_rank <- left@value@rank
+  right_rank <- right@value@rank
+
+  if (left_rank > 2 || right_rank > 2) {
+    stop("%*% only supports vectors/matrices (rank <= 2)")
+  }
+
+  left_dims <- matrix_dims(
+    left,
+    orientation = if (left_rank == 1) "rowvec" else "matrix"
+  )
+
+  right_dims <- matrix_dims(
+    right,
+    orientation = if (right_rank == 1) "colvec" else "matrix"
+  )
+
+  left_eff <- if (left_rank == 2) {
+    effective_dims(left_dims, left_trans)
+  } else {
+    left_dims
+  }
+  right_eff <- if (right_rank == 2) {
+    effective_dims(right_dims, right_trans)
+  } else {
+    right_dims
+  }
+
+  # Compute effective shapes
+  m <- left_eff$rows
+  k <- left_eff$cols
+  n <- right_eff$cols
+
+  # Leading dimensions
+  lda <- left_dims$rows
+  ldb <- right_dims$rows
+  ldc_expr <- m
+
+  # Matrix-Vector: use GEMV
+  if (left_rank == 2 && right_rank == 1) {
+    expected_len <- if (left_trans == "N") left_dims$cols else left_dims$rows
+    assert_conformable(expected_len, right_dims$rows, "%*%")
+    out_len <- if (left_trans == "N") left_dims$rows else left_dims$cols
+    return(gemv(
+      transA = left_trans,
+      A = left,
+      x = right,
+      m = left_dims$rows,
+      n = left_dims$cols,
+      lda = left_dims$rows,
+      out_dims = list(out_len, 1L),
+      scope = scope,
+      hoist = hoist,
+      dest = dest,
+      context = "%*%"
+    ))
+  }
+  # Vector-Matrix: use GEMV with transpose
+  if (left_rank == 1 && right_rank == 2) {
+    transA <- if (right_trans == "N") "T" else "N"
+    expected_len <- if (transA == "N") right_dims$cols else right_dims$rows
+    assert_conformable(left_dims$cols, expected_len, "%*%")
+    out_len <- if (transA == "N") right_dims$rows else right_dims$cols
+    return(gemv(
+      transA = transA,
+      A = right,
+      x = left,
+      m = right_dims$rows,
+      n = right_dims$cols,
+      lda = right_dims$rows,
+      out_dims = list(1L, out_len),
+      scope = scope,
+      hoist = hoist,
+      dest = dest,
+      context = "%*%"
+    ))
+  }
+
+  assert_conformable(k, right_eff$rows, "%*%")
+
+  # Matrix-Matrix
+  gemm(
+    opA = left_trans,
+    opB = right_trans,
+    left = left,
+    right = right,
+    m = m,
+    n = n,
+    k = k,
+    lda = lda,
+    ldb = ldb,
+    ldc_expr = ldc_expr,
+    scope = scope,
+    hoist = hoist,
+    dest = dest,
+    context = "%*%"
+  )
+}
+
+
+# t(x) handler: transpose 2D; 1D becomes a 1 x n row matrix
+r2f_handlers[["t"]] <- function(args, scope, ..., hoist = NULL) {
+  stopifnot(length(args) == 1L)
+  x <- r2f(args[[1L]], scope, ..., hoist = hoist)
+  x <- maybe_cast_double(x)
+  if (x@value@rank == 2) {
+    val <- Variable("double", list(x@value@dims[[2]], x@value@dims[[1]]))
+    return(Fortran(glue("transpose({x})"), val))
+  } else if (x@value@rank == 1) {
+    len <- x@value@dims[[1]]
+    val <- Variable("double", list(1L, len))
+    return(Fortran(glue("reshape({x}, [1, int({len})])"), val))
+  } else if (x@value@rank == 0) {
+    return(x)
+  } else {
+    stop("t() only supports rank 0-2 inputs")
+  }
+}
+
+
+# Handle crossprod(), using SYRK for single-arg and GEMM for two-arg forms.
+r2f_handlers[["crossprod"]] <- function(
+  args,
+  scope,
+  ...,
+  hoist = NULL,
+  dest = NULL
+) {
+  x_arg <- args[[1L]]
+  y_arg <- if (length(args) > 1L) args[[2L]] else NULL
+  crossprod_like(
+    x_arg = x_arg,
+    y_arg = y_arg,
+    scope = scope,
+    ...,
+    hoist = hoist,
+    dest = dest,
+    trans_single = "T",
+    opA = "T",
+    opB = "N",
+    context = "crossprod"
+  )
+}
+
+
+# Handle tcrossprod(), using SYRK for single-arg and GEMM for two-arg forms.
+r2f_handlers[["tcrossprod"]] <- function(
+  args,
+  scope,
+  ...,
+  hoist = NULL,
+  dest = NULL
+) {
+  x_arg <- args[[1L]]
+  y_arg <- if (length(args) > 1L) args[[2L]] else NULL
+  crossprod_like(
+    x_arg = x_arg,
+    y_arg = y_arg,
+    scope = scope,
+    ...,
+    hoist = hoist,
+    dest = dest,
+    trans_single = "N",
+    opA = "N",
+    opB = "T",
+    context = "tcrossprod"
+  )
+}
+
+# Handle outer() for FUN = "*" as BLAS outer product.
+r2f_handlers[["outer"]] <- function(
+  args,
+  scope,
+  ...,
+  hoist = NULL,
+  dest = NULL
+) {
+  x_arg <- args$X %||% args[[1L]]
+  y_arg <- args$Y %||% if (length(args) >= 2L) args[[2L]] else NULL
+  if (is.null(x_arg) || is.null(y_arg)) {
+    stop("outer() expects X and Y")
+  }
+
+  fun <- args$FUN %||% "*"
+  if (!identical(fun, "*")) {
+    stop("outer() only supports FUN = \"*\"")
+  }
+  x <- r2f(x_arg, scope, ..., hoist = hoist)
+  y <- r2f(y_arg, scope, ..., hoist = hoist)
+  outer_mul(
+    x,
+    y,
+    scope = scope,
+    hoist = hoist,
+    dest = dest,
+    context = "outer"
+  )
+}
+
+# Handle %o% for outer products via BLAS GER.
+r2f_handlers[["%o%"]] <- function(
+  args,
+  scope,
+  ...,
+  hoist = NULL,
+  dest = NULL
+) {
+  stopifnot(length(args) == 2L)
+  x <- r2f(args[[1L]], scope, ..., hoist = hoist)
+  y <- r2f(args[[2L]], scope, ..., hoist = hoist)
+  outer_mul(
+    x,
+    y,
+    scope = scope,
+    hoist = hoist,
+    dest = dest,
+    context = "%o%"
+  )
+}
+
+# Handle forwardsolve() via triangular BLAS routines.
+r2f_handlers[["forwardsolve"]] <- function(
+  args,
+  scope,
+  ...,
+  hoist = NULL,
+  dest = NULL
+) {
+  stopifnot(length(args) >= 2L)
+  if (!is.null(args$k)) {
+    stop("forwardsolve() does not support k yet")
+  }
+  upper_tri <- logical_arg_or_default(
+    args,
+    "upper.tri",
+    FALSE,
+    "forwardsolve()"
+  )
+  transpose <- logical_arg_or_default(
+    args,
+    "transpose",
+    FALSE,
+    "forwardsolve()"
+  )
+  diag_unit <- logical_arg_or_default(args, "diag", FALSE, "forwardsolve()")
+
+  A <- r2f(args[[1L]], scope, ..., hoist = hoist)
+  B <- r2f(args[[2L]], scope, ..., hoist = hoist)
+
+  triangular_solve(
+    A = A,
+    B = B,
+    uplo = if (upper_tri) "U" else "L",
+    trans = if (transpose) "T" else "N",
+    diag = if (diag_unit) "U" else "N",
+    scope = scope,
+    hoist = hoist,
+    dest = dest,
+    context = "forwardsolve"
+  )
+}
+
+# Handle backsolve() via triangular BLAS routines.
+r2f_handlers[["backsolve"]] <- function(
+  args,
+  scope,
+  ...,
+  hoist = NULL,
+  dest = NULL
+) {
+  stopifnot(length(args) >= 2L)
+  if (!is.null(args$k)) {
+    stop("backsolve() does not support k yet")
+  }
+  upper_tri <- logical_arg_or_default(args, "upper.tri", TRUE, "backsolve()")
+  transpose <- logical_arg_or_default(args, "transpose", FALSE, "backsolve()")
+  diag_unit <- logical_arg_or_default(args, "diag", FALSE, "backsolve()")
+
+  A <- r2f(args[[1L]], scope, ..., hoist = hoist)
+  B <- r2f(args[[2L]], scope, ..., hoist = hoist)
+
+  triangular_solve(
+    A = A,
+    B = B,
+    uplo = if (upper_tri) "U" else "L",
+    trans = if (transpose) "T" else "N",
+    diag = if (diag_unit) "U" else "N",
+    scope = scope,
+    hoist = hoist,
+    dest = dest,
+    context = "backsolve"
+  )
+}
+
+# ---- matrix helpers ----
+
+# Return the R symbol name if operand is a bare symbol; otherwise NULL.
+symbol_name_or_null <- function(x) {
+  stopifnot(inherits(x, Fortran))
+  r_expr <- x@r
+  if (is.symbol(r_expr)) {
+    return(as.character(r_expr))
+  }
+  if (length(x) == 1L && grepl("^[A-Za-z][A-Za-z0-9_]*$", x)) {
+    return(as.character(x))
+  }
+  NULL
+}
+
+# Return a dimension value for an axis, defaulting missing dims to 1L.
+dim_or_one_from <- function(dims, axis) {
+  stopifnot(is.numeric(axis), axis >= 1)
+  axis <- as.integer(axis)
+  if (is.null(dims)) {
+    return(1L)
+  }
+  if (axis <= length(dims) && !is.null(dims[[axis]])) {
+    dims[[axis]]
+  } else {
+    1L
+  }
+}
+
+# Return the requested axis length, defaulting scalars (or missing axes) to 1L.
+dim_or_one <- function(x, axis) {
+  stopifnot(inherits(x, Fortran))
+  dim_or_one_from(x@value@dims, axis)
+}
+
+# Return the requested axis length for a Variable, defaulting to 1L.
+var_dim_or_one <- function(var, axis) {
+  stopifnot(inherits(var, Variable))
+  dim_or_one_from(var@dims, axis)
+}
+
+# Compute matrix-style row/column dimensions from rank, dims, and orientation.
+matrix_dims_from <- function(
+  rank,
+  dims,
+  orientation = c("matrix", "rowvec", "colvec")
+) {
+  orientation <- match.arg(orientation)
+  rows <- dim_or_one_from(dims, 1L)
+  cols <- dim_or_one_from(dims, 2L)
+
+  if (rank == 0L) {
+    rows <- 1L
+    cols <- 1L
+  } else if (rank == 1L) {
+    if (orientation == "rowvec") {
+      rows <- 1L
+      cols <- dim_or_one_from(dims, 1L)
+    } else {
+      rows <- dim_or_one_from(dims, 1L)
+      cols <- 1L
+    }
+  }
+
+  list(rows = rows, cols = cols)
+}
+
+# Interpret a Fortran value as a matrix for BLAS calls. Scalars become 1x1
+# matrices, and vectors can be viewed as either row or column vectors.
+matrix_dims <- function(x, orientation = c("matrix", "rowvec", "colvec")) {
+  stopifnot(inherits(x, Fortran))
+  matrix_dims_from(x@value@rank, x@value@dims, orientation = orientation)
+}
+
+# Interpret a Variable value as a matrix for BLAS calls.
+matrix_dims_var <- function(
+  var,
+  orientation = c("matrix", "rowvec", "colvec")
+) {
+  stopifnot(inherits(var, Variable))
+  matrix_dims_from(var@rank, var@dims, orientation = orientation)
+}
+
+# Compute effective dimensions based on transpose flags.
+effective_dims <- function(dims, trans) {
+  if (identical(trans, "T")) {
+    list(rows = dims$cols, cols = dims$rows)
+  } else {
+    dims
+  }
+}
+
+# Validate conformability, warning when static checks are inconclusive.
+assert_conformable <- function(left, right, context) {
+  if (is_wholenumber(left) && is_wholenumber(right)) {
+    if (!identical(as.integer(left), as.integer(right))) {
+      stop("non-conformable arguments in ", context, call. = FALSE)
+    }
+    return(invisible(TRUE))
+  }
+  if (identical(left, right)) {
+    return(invisible(TRUE))
+  }
+
+  left_txt <- if (is.null(left)) "NULL" else deparse(left)
+  right_txt <- if (is.null(right)) "NULL" else deparse(right)
+  warning(
+    "cannot verify conformability in ",
+    context,
+    " at compile time: ",
+    left_txt,
+    " vs ",
+    right_txt,
+    call. = FALSE
+  )
+  invisible(FALSE)
+}
+
+# Unwrap t() calls to infer transpose flags and normalize scalars/vectors.
+unwrap_transpose_arg <- function(arg, scope, ..., hoist) {
+  if (is_call(arg, quote(t)) && length(arg) == 2L) {
+    inner <- r2f(arg[[2L]], scope, ..., hoist = hoist)
+    inner <- maybe_cast_double(inner)
+    if (inner@value@rank == 2L) {
+      return(list(value = inner, trans = "T"))
+    } else if (inner@value@rank == 1L) {
+      len <- inner@value@dims[[1L]]
+      val <- Variable("double", list(1L, len))
+      return(list(
+        value = Fortran(glue("reshape({inner}, [1, int({len})])"), val),
+        trans = "N"
+      ))
+    } else if (inner@value@rank == 0L) {
+      return(list(value = inner, trans = "N"))
+    } else {
+      stop("t() only supports rank 0-2 inputs")
+    }
+  }
+  value <- r2f(arg, scope, ..., hoist = hoist)
+  value <- maybe_cast_double(value)
+  list(value = value, trans = "N")
+}
+
+# Check that destination dimensions match expected output dimensions.
+assert_dest_dims_compatible <- function(dest, expected_dims, context) {
+  if (is.null(dest) || is.null(expected_dims)) {
+    return(invisible(TRUE))
+  }
+  expected_rank <- length(expected_dims)
+  if (dest@rank != expected_rank) {
+    stop("assignment target has incompatible rank for ", context, call. = FALSE)
+  }
+  for (i in seq_len(expected_rank)) {
+    dest_dim <- dest@dims[[i]]
+    expected_dim <- expected_dims[[i]]
+    if (is_wholenumber(dest_dim) && is_wholenumber(expected_dim)) {
+      if (!identical(as.integer(dest_dim), as.integer(expected_dim))) {
+        stop(
+          "assignment target has incompatible dimensions for ",
+          context,
+          call. = FALSE
+        )
+      }
+    }
+  }
+  invisible(TRUE)
+}
+
+# Determine if output can safely write into dest without aliasing.
+can_use_output <- function(dest, left, right, expected_dims = NULL, context) {
+  if (is.null(dest)) {
+    return(FALSE)
+  }
+  if (!identical(dest@mode, "double")) {
+    return(FALSE)
+  }
+  assert_dest_dims_compatible(dest, expected_dims, context)
+  output_name <- dest@name
+  # check output name is not the same as left or right
+  !identical(output_name, as.character(left)) &&
+    !identical(output_name, as.character(right))
+}
+
+# Ensure a BLAS operand is named, hoisting into a temp if needed.
+ensure_blas_operand_name <- function(x, hoist) {
+  name <- symbol_name_or_null(x)
+  if (!is.null(name)) {
+    return(name)
+  }
+  tmp <- hoist$declare_tmp(
+    mode = x@value@mode %||% "double",
+    dims = x@value@dims
+  )
+  hoist$emit(glue("{tmp@name} = {x}"))
+  tmp@name
+}
+
+# Extract a logical argument or use the provided default.
+logical_arg_or_default <- function(args, name, default, context) {
+  val <- args[[name]] %||% default
+  if (is.null(val)) {
+    return(default)
+  }
+  if (!is.logical(val) || length(val) != 1L || is.na(val)) {
+    stop(context, " only supports literal ", name, " = TRUE/FALSE")
+  }
+  val
+}
+
+# Wrap an expression as a BLAS int literal.
+blas_int <- function(x) {
+  glue("int({x}, kind=c_int)")
+}
+
+# Centralized GEMM emission with optional destination
+# gemm: centralized BLAS GEMM emission.
+# - 'hoist' is required and provided by r2f(); handlers thread it through so
+#   helpers can pre-emit temporary assignments and BLAS calls.
+gemm <- function(
+  opA,
+  opB,
+  left,
+  right,
+  m,
+  n,
+  k,
+  lda,
+  ldb,
+  ldc_expr,
+  scope,
+  hoist,
+  dest = NULL,
+  context = "gemm"
+) {
+  if (!inherits(hoist, "environment")) {
+    stop("internal: hoist must be a hoist environment")
+  }
+  A_name <- ensure_blas_operand_name(left, hoist)
+  B_name <- ensure_blas_operand_name(right, hoist)
+
+  if (
+    can_use_output(
+      dest,
+      left,
+      right,
+      expected_dims = list(m, n),
+      context = context
+    )
+  ) {
+    hoist$emit(glue(
+      "call dgemm('{opA}','{opB}', {blas_int(m)}, {blas_int(n)}, {blas_int(k)}, 1.0_c_double, {A_name}, {blas_int(lda)}, {B_name}, {blas_int(ldb)}, 0.0_c_double, {dest@name}, {blas_int(ldc_expr)})"
+    ))
+    out <- Fortran(dest@name, dest)
+    attr(out, "writes_to_dest") <- TRUE
+    return(out)
+  }
+
+  output_var <- hoist$declare_tmp(mode = "double", dims = list(m, n))
+  hoist$emit(glue(
+    "call dgemm('{opA}','{opB}', {blas_int(m)}, {blas_int(n)}, {blas_int(k)}, 1.0_c_double, {A_name}, {blas_int(lda)}, {B_name}, {blas_int(ldb)}, 0.0_c_double, {output_var@name}, {blas_int(ldc_expr)})"
+  ))
+  Fortran(output_var@name, output_var)
+}
+
+# Centralized GEMV emission with optional destination
+# gemv: centralized BLAS GEMV emission.
+# - 'hoist' is required and provided by r2f(); handlers thread it through so
+#   helpers can pre-emit temporary assignments and BLAS calls.
+gemv <- function(
+  transA,
+  A,
+  x,
+  m,
+  n,
+  lda,
+  out_dims,
+  scope,
+  hoist,
+  dest = NULL,
+  context = "gemv"
+) {
+  if (!inherits(hoist, "environment")) {
+    stop("internal: hoist must be a hoist environment")
+  }
+  A_name <- ensure_blas_operand_name(A, hoist)
+  x_name <- ensure_blas_operand_name(x, hoist)
+
+  if (
+    can_use_output(
+      dest,
+      A,
+      x,
+      expected_dims = out_dims,
+      context = context
+    )
+  ) {
+    # Assign output to output destination
+    hoist$emit(glue(
+      "call dgemv('{transA}', {blas_int(m)}, {blas_int(n)}, 1.0_c_double, {A_name}, {blas_int(lda)}, {x_name}, 1, 0.0_c_double, {dest@name}, 1)"
+    ))
+    out <- Fortran(dest@name, dest)
+    attr(out, "writes_to_dest") <- TRUE
+    return(out)
+  }
+  # Else assign to a temporary variable
+  output_var <- hoist$declare_tmp(mode = "double", dims = out_dims)
+  hoist$emit(glue(
+    "call dgemv('{transA}', {blas_int(m)}, {blas_int(n)}, 1.0_c_double, {A_name}, {blas_int(lda)}, {x_name}, 1, 0.0_c_double, {output_var@name}, 1)"
+  ))
+  Fortran(output_var@name, output_var)
+}
+
+# Centralized SYRK emission for symmetric rank-k update
+# Computes: C := alpha * op(A) * op(A)^T + beta * C
+# For crossprod(X):  C = t(X) %*% X  → trans = "T"
+# For tcrossprod(X): C = X %*% t(X)  → trans = "N"
+syrk <- function(
+  trans,
+  X,
+  scope,
+  hoist,
+  dest = NULL,
+  context = "syrk"
+) {
+  if (!inherits(hoist, "environment")) {
+    stop("internal: hoist must be a hoist environment")
+  }
+  X_name <- ensure_blas_operand_name(X, hoist)
+
+  x_dims <- matrix_dims(X)
+
+  # For trans = "T": C = t(X) %*% X, so C is k x k where k = ncol(X)
+  # For trans = "N": C = X %*% t(X), so C is n x n where n = nrow(X)
+  if (trans == "T") {
+    n <- x_dims$cols
+    k <- x_dims$rows
+  } else {
+    n <- x_dims$rows
+    k <- x_dims$cols
+  }
+  lda <- x_dims$rows
+
+  # Output is symmetric n x n matrix
+  if (
+    can_use_output(
+      dest,
+      X,
+      X,
+      expected_dims = list(n, n),
+      context = context
+    )
+  ) {
+    hoist$emit(glue(
+      "call dsyrk('U', '{trans}', {blas_int(n)}, {blas_int(k)}, 1.0_c_double, {X_name}, {blas_int(lda)}, 0.0_c_double, {dest@name}, {blas_int(n)})"
+    ))
+    # Fill lower triangle from upper
+    idx_i <- hoist$declare_tmp(mode = "integer", dims = list(1L))
+    idx_j <- hoist$declare_tmp(mode = "integer", dims = list(1L))
+    hoist$emit(glue(
+      "
+do {idx_j@name} = 1_c_int, {n} - 1_c_int
+  do {idx_i@name} = {idx_j@name} + 1_c_int, {n}
+    {dest@name}({idx_i@name}, {idx_j@name}) = {dest@name}({idx_j@name}, {idx_i@name})
+  end do
+end do"
+    ))
+    out <- Fortran(dest@name, dest)
+    attr(out, "writes_to_dest") <- TRUE
+    return(out)
+  }
+
+  output_var <- hoist$declare_tmp(mode = "double", dims = list(n, n))
+  hoist$emit(glue(
+    "call dsyrk('U', '{trans}', {blas_int(n)}, {blas_int(k)}, 1.0_c_double, {X_name}, {blas_int(lda)}, 0.0_c_double, {output_var@name}, {blas_int(n)})"
+  ))
+  # Fill lower triangle from upper
+  idx_i <- hoist$declare_tmp(mode = "integer", dims = list(1L))
+  idx_j <- hoist$declare_tmp(mode = "integer", dims = list(1L))
+  hoist$emit(glue(
+    "
+do {idx_j@name} = 1_c_int, {n} - 1_c_int
+  do {idx_i@name} = {idx_j@name} + 1_c_int, {n}
+    {output_var@name}({idx_i@name}, {idx_j@name}) = {output_var@name}({idx_j@name}, {idx_i@name})
+  end do
+end do"
+  ))
+  Fortran(output_var@name, output_var)
+}
+
+# Emit BLAS outer product for vectors or scalars with optional destination.
+outer_mul <- function(
+  x,
+  y,
+  scope,
+  hoist,
+  dest = NULL,
+  context = "outer"
+) {
+  if (!inherits(hoist, "environment")) {
+    stop("internal: hoist must be a hoist environment")
+  }
+
+  x <- maybe_cast_double(x)
+  y <- maybe_cast_double(y)
+
+  if (x@value@rank > 1L || y@value@rank > 1L) {
+    stop("outer() only supports vectors or scalars")
+  }
+
+  m <- dim_or_one(x, 1L)
+  n <- dim_or_one(y, 1L)
+
+  x_name <- ensure_blas_operand_name(x, hoist)
+  y_name <- ensure_blas_operand_name(y, hoist)
+
+  if (
+    can_use_output(
+      dest,
+      x,
+      y,
+      expected_dims = list(m, n),
+      context = context
+    )
+  ) {
+    hoist$emit(glue("{dest@name} = 0.0_c_double"))
+    hoist$emit(glue(
+      "call dger({blas_int(m)}, {blas_int(n)}, 1.0_c_double, {x_name}, 1, {y_name}, 1, {dest@name}, {blas_int(m)})"
+    ))
+    out <- Fortran(dest@name, dest)
+    attr(out, "writes_to_dest") <- TRUE
+    return(out)
+  }
+
+  output_var <- hoist$declare_tmp(mode = "double", dims = list(m, n))
+  hoist$emit(glue("{output_var@name} = 0.0_c_double"))
+  hoist$emit(glue(
+    "call dger({blas_int(m)}, {blas_int(n)}, 1.0_c_double, {x_name}, 1, {y_name}, 1, {output_var@name}, {blas_int(m)})"
+  ))
+  Fortran(output_var@name, output_var)
+}
+
+# Emit triangular solve (vector or matrix RHS) with optional destination.
+triangular_solve <- function(
+  A,
+  B,
+  uplo,
+  trans,
+  diag,
+  scope,
+  hoist,
+  dest = NULL,
+  context = "triangular solve"
+) {
+  if (!inherits(hoist, "environment")) {
+    stop("internal: hoist must be a hoist environment")
+  }
+
+  A <- maybe_cast_double(A)
+  B <- maybe_cast_double(B)
+
+  if (A@value@rank != 2L) {
+    stop("triangular solve expects a matrix")
+  }
+
+  a_dims <- matrix_dims(A)
+  assert_conformable(a_dims$rows, a_dims$cols, "triangular solve")
+  n <- a_dims$rows
+
+  b_rank <- B@value@rank
+  if (b_rank > 2L) {
+    stop("triangular solve only supports vector or matrix right-hand sides")
+  }
+  if (b_rank == 0L) {
+    stop("triangular solve expects a vector or matrix right-hand side")
+  } else if (b_rank == 1L) {
+    b_len <- dim_or_one(B, 1L)
+    assert_conformable(n, b_len, "triangular solve")
+  } else {
+    b_rows <- dim_or_one(B, 1L)
+    assert_conformable(n, b_rows, "triangular solve")
+  }
+
+  A_name <- ensure_blas_operand_name(A, hoist)
+
+  if (
+    can_use_output(
+      dest,
+      A,
+      B,
+      expected_dims = B@value@dims,
+      context = context
+    )
+  ) {
+    hoist$emit(glue("{dest@name} = {B}"))
+    B_name <- dest@name
+    out_var <- dest
+    writes_to_dest <- TRUE
+  } else {
+    out_var <- hoist$declare_tmp(
+      mode = B@value@mode %||% "double",
+      dims = B@value@dims
+    )
+    hoist$emit(glue("{out_var@name} = {B}"))
+    B_name <- out_var@name
+    writes_to_dest <- FALSE
+  }
+
+  if (b_rank <= 1L) {
+    hoist$emit(glue(
+      "call dtrsv('{uplo}', '{trans}', '{diag}', {blas_int(n)}, {A_name}, {blas_int(n)}, {B_name}, 1)"
+    ))
+  } else {
+    nrhs <- dim_or_one(B, 2L)
+    hoist$emit(glue(
+      "call dtrsm('L', '{uplo}', '{trans}', '{diag}', {blas_int(n)}, {blas_int(nrhs)}, 1.0_c_double, {A_name}, {blas_int(n)}, {B_name}, {blas_int(n)})"
+    ))
+  }
+
+  out <- Fortran(B_name, out_var)
+  if (writes_to_dest) {
+    attr(out, "writes_to_dest") <- TRUE
+  }
+  out
+}
+
+# Shared crossprod/tcrossprod logic for one- and two-argument forms.
+crossprod_like <- function(
+  x_arg,
+  y_arg,
+  scope,
+  ...,
+  hoist,
+  dest,
+  trans_single,
+  opA,
+  opB,
+  context
+) {
+  x <- r2f(x_arg, scope, ..., hoist = hoist)
+  x <- maybe_cast_double(x)
+
+  if (is.null(y_arg)) {
+    return(syrk(
+      trans = trans_single,
+      X = x,
+      scope = scope,
+      hoist = hoist,
+      dest = dest,
+      context = context
+    ))
+  }
+
+  y <- maybe_cast_double(r2f(y_arg, scope, ..., hoist = hoist))
+
+  x_dims <- matrix_dims(x)
+  y_dims <- matrix_dims(y)
+  x_eff <- effective_dims(x_dims, opA)
+  y_eff <- effective_dims(y_dims, opB)
+
+  assert_conformable(x_eff$cols, y_eff$rows, context)
+
+  m <- x_eff$rows
+  n <- y_eff$cols
+  k <- x_eff$cols
+
+  lda <- x_dims$rows
+  ldb <- y_dims$rows
+  ldc_expr <- m
+
+  gemm(
+    opA = opA,
+    opB = opB,
+    left = x,
+    right = y,
+    m = m,
+    n = n,
+    k = k,
+    lda = lda,
+    ldb = ldb,
+    ldc_expr = ldc_expr,
+    scope = scope,
+    hoist = hoist,
+    dest = dest,
+    context = context
+  )
+}
+
+# ---- matrix inference helpers ----
+
+# Infer a variable from a symbol in the current scope.
+infer_symbol_var <- function(arg, scope) {
+  if (!is.symbol(arg)) {
+    return(NULL)
+  }
+  var <- get0(as.character(arg), scope, inherits = FALSE)
+  if (inherits(var, Variable)) var else NULL
+}
+
+# Infer a matrix argument, handling t() and scalar/vector promotion.
+infer_matrix_arg <- function(arg, scope) {
+  if (is_call(arg, quote(t)) && length(arg) == 2L) {
+    inner <- infer_symbol_var(arg[[2L]], scope)
+    if (is.null(inner)) {
+      return(NULL)
+    }
+    if (inner@rank == 2L) {
+      return(list(var = inner, trans = "T"))
+    }
+    if (inner@rank == 1L) {
+      len <- inner@dims[[1L]]
+      if (is.null(len)) {
+        return(NULL)
+      }
+      val <- Variable("double", list(1L, len))
+      return(list(var = val, trans = "N"))
+    }
+    if (inner@rank == 0L) {
+      return(list(var = inner, trans = "N"))
+    }
+    return(NULL)
+  }
+  var <- infer_symbol_var(arg, scope)
+  if (is.null(var)) {
+    return(NULL)
+  }
+  list(var = var, trans = "N")
+}
+
+# Infer destination dimensions for %*% based on inputs.
+infer_dest_matmul <- function(args, scope) {
+  if (length(args) != 2L) {
+    return(NULL)
+  }
+  left_info <- infer_matrix_arg(args[[1L]], scope)
+  right_info <- infer_matrix_arg(args[[2L]], scope)
+  if (is.null(left_info) || is.null(right_info)) {
+    return(NULL)
+  }
+
+  left <- left_info$var
+  right <- right_info$var
+  left_trans <- left_info$trans
+  right_trans <- right_info$trans
+
+  left_rank <- left@rank
+  right_rank <- right@rank
+  if (left_rank > 2L || right_rank > 2L) {
+    return(NULL)
+  }
+
+  left_dims <- matrix_dims_var(
+    left,
+    orientation = if (left_rank == 1L) "rowvec" else "matrix"
+  )
+  right_dims <- matrix_dims_var(
+    right,
+    orientation = if (right_rank == 1L) "colvec" else "matrix"
+  )
+
+  left_eff <- if (left_rank == 2L) {
+    effective_dims(left_dims, left_trans)
+  } else {
+    left_dims
+  }
+  right_eff <- if (right_rank == 2L) {
+    effective_dims(right_dims, right_trans)
+  } else {
+    right_dims
+  }
+
+  if (left_rank == 2L && right_rank == 1L) {
+    out_len <- if (left_trans == "N") left_dims$rows else left_dims$cols
+    return(Variable("double", list(out_len, 1L)))
+  }
+  if (left_rank == 1L && right_rank == 2L) {
+    transA <- if (right_trans == "N") "T" else "N"
+    out_len <- if (transA == "N") right_dims$rows else right_dims$cols
+    return(Variable("double", list(1L, out_len)))
+  }
+
+  Variable("double", list(left_eff$rows, right_eff$cols))
+}
+
+# Shared inference for crossprod/tcrossprod destination sizes.
+infer_dest_crossprod_like <- function(args, scope, trans) {
+  x <- infer_symbol_var(args[[1L]], scope)
+  if (is.null(x)) {
+    return(NULL)
+  }
+  y <- if (length(args) > 1L) infer_symbol_var(args[[2L]], scope) else NULL
+  x_dims <- matrix_dims_var(x)
+  if (is.null(y)) {
+    n <- if (identical(trans, "T")) x_dims$cols else x_dims$rows
+    return(Variable("double", list(n, n)))
+  }
+  y_dims <- matrix_dims_var(y)
+  if (identical(trans, "T")) {
+    Variable("double", list(x_dims$cols, y_dims$cols))
+  } else {
+    Variable("double", list(x_dims$rows, y_dims$rows))
+  }
+}
+
+# Infer destination dimensions for crossprod().
+infer_dest_crossprod <- function(args, scope) {
+  infer_dest_crossprod_like(args, scope, trans = "T")
+}
+
+# Infer destination dimensions for tcrossprod().
+infer_dest_tcrossprod <- function(args, scope) {
+  infer_dest_crossprod_like(args, scope, trans = "N")
+}
+
+# Infer destination dimensions for outer() and %o%().
+infer_dest_outer <- function(args, scope) {
+  x_arg <- args$X %||% args[[1L]]
+  y_arg <- args$Y %||% if (length(args) >= 2L) args[[2L]] else NULL
+  x <- infer_symbol_var(x_arg, scope)
+  y <- infer_symbol_var(y_arg, scope)
+  if (is.null(x) || is.null(y)) {
+    return(NULL)
+  }
+  if (x@rank > 1L || y@rank > 1L) {
+    return(NULL)
+  }
+  m <- var_dim_or_one(x, 1L)
+  n <- var_dim_or_one(y, 1L)
+  Variable("double", list(m, n))
+}
+
+# Infer destination dimensions for forwardsolve() and backsolve().
+infer_dest_triangular <- function(args, scope) {
+  if (length(args) < 2L) {
+    return(NULL)
+  }
+  A <- infer_symbol_var(args[[1L]], scope)
+  B <- infer_symbol_var(args[[2L]], scope)
+  if (is.null(A) || is.null(B)) {
+    return(NULL)
+  }
+  if (A@rank != 2L || B@rank == 0L || B@rank > 2L) {
+    return(NULL)
+  }
+  if (is.null(B@dims)) {
+    return(NULL)
+  }
+  Variable("double", B@dims)
+}
+
+attr(r2f_handlers[["%*%"]], "dest_supported") <- TRUE
+attr(r2f_handlers[["crossprod"]], "dest_supported") <- TRUE
+attr(r2f_handlers[["tcrossprod"]], "dest_supported") <- TRUE
+attr(r2f_handlers[["outer"]], "dest_supported") <- TRUE
+attr(r2f_handlers[["%o%"]], "dest_supported") <- TRUE
+attr(r2f_handlers[["forwardsolve"]], "dest_supported") <- TRUE
+attr(r2f_handlers[["backsolve"]], "dest_supported") <- TRUE
+
+attr(r2f_handlers[["%*%"]], "dest_infer") <- infer_dest_matmul
+attr(r2f_handlers[["crossprod"]], "dest_infer") <- infer_dest_crossprod
+attr(r2f_handlers[["tcrossprod"]], "dest_infer") <- infer_dest_tcrossprod
+attr(r2f_handlers[["outer"]], "dest_infer") <- infer_dest_outer
+attr(r2f_handlers[["%o%"]], "dest_infer") <- infer_dest_outer
+attr(r2f_handlers[["forwardsolve"]], "dest_infer") <- infer_dest_triangular
+attr(r2f_handlers[["backsolve"]], "dest_infer") <- infer_dest_triangular
diff --git a/doc/matrix/baseR-blas-lapack-mapping.md b/doc/matrix/baseR-blas-lapack-mapping.md
new file mode 100644
index 0000000..ecc343a
--- /dev/null
+++ b/doc/matrix/baseR-blas-lapack-mapping.md
@@ -0,0 +1,733 @@
+# Base R (R 4.5) linear algebra → BLAS/LAPACK mapping (real/double)
+
+This document is intended as a **shared reference** for implementing an R→Fortran transpiler that lowers common base-R linear algebra operations to the **same BLAS/LAPACK** backend that R is using.
+
+Scope:
+- Focus on **real (double precision)** inputs (`double` in R; `D*` routines in BLAS/LAPACK).
+- Complex variants are omitted unless trivial.
+- Emphasis on:
+  1) A mapping of R functions/ops to BLAS/LAPACK routines  
+  2) Which routines **overwrite ("consume") inputs** (important for R copy-on-modify semantics)  
+  3) **Return value structures**: R list components ↔ LAPACK output parameters
+  4) BLAS/LAPACK naming + common calling patterns to drive helper/wrapper design  
+  5) **Workspace query patterns** and which routines require them
+  6) Notes on backend differences (OpenBLAS/MKL/Accelerate/Reference)
+  7) Quickr-specific constraints (what is implemented today vs planned)
+
+---
+
+## 1) BLAS levels and how they relate to R operations
+
+BLAS is traditionally grouped into "levels" that correlate strongly with performance characteristics:
+
+- **Level 1 (vector–vector):** `ddot`, `daxpy`, `dscal`, `dnrm2`, …  
+  - Low arithmetic intensity; often memory-bound.
+  - In-place behavior is common: `daxpy` overwrites `y`; `dscal` overwrites `x`.
+
+- **Level 2 (matrix–vector):** `dgemv`, `dtrsv`, `dsymv`, `dger`, …  
+  - `dger` is a rank-1 update (outer product update): `A := α*x*y' + A`.
+  - Often overwrites one argument (e.g., updates `A`).
+
+- **Level 3 (matrix–matrix):** `dgemm`, `dtrsm`, `dsyrk`, `dsymm`, …  
+  - Highest arithmetic intensity; typically best-optimized; the workhorse for `%*%`, `crossprod`, solves with multiple RHS, etc.
+  - Often overwrites the output buffer (`C` in `dgemm`, `B` in `dtrsm`).
+
+**Translation tip:** Prefer Level 3 routines whenever possible (R often does). Many seemingly different high-level operations reduce to `dgemm`, `dsyrk`, `dtrsm`.
+
+---
+
+## 2) High-level mapping table (base R functions / ops)
+
+Notes:
+- "Typical backend" reflects how base R commonly implements these operations. Some entries have multiple plausible routes; where base R behavior varies by options (e.g. `qr(LAPACK=TRUE)`), the table notes it explicitly.
+- "Consumes input?" refers to whether the routine overwrites its input arrays (you must copy if the original is needed later).
+- "R return type" indicates whether R returns a simple matrix/vector or a structured list.
+
+| Category | Base R function / op | What it does | Typical BLAS / LAPACK backend | Consumes input? | R return type |
+|---|---|---|---|---|---|
+| Creating | `matrix()`, `cbind()`, `rbind()` | Build/bind matrices | R internals (copies) | No | matrix |
+| Creating / special | `diag(n)` / `diag(v)` / `diag(X)` | Identity / diagonal / extract diag | R internals | No | matrix/vector |
+| Creating | `t(X)` | Transpose | Often a copy / stride change; transpose flags used in BLAS combos | No | matrix |
+| Creating | `outer(x,y,FUN="*")` | Outer product | **BLAS `dger`** (rank-1 update) for `FUN="*"`; otherwise loops | `dger` overwrites `A` | matrix |
+| Creating | `%o%` | Outer product | Typically `outer(x,y,"*")` → `dger`/loops | see above | matrix |
+| Creating | `kronecker(X,Y)` | Kronecker product | Usually R/C loops; may call BLAS in blocks in some impls | No | matrix |
+| Basic arithmetic | `+`, `-`, `*`, `/`, `^` | Elementwise ops | Intrinsics / loops | No | matrix |
+| Multiply | `%*%` | Matrix multiply | **BLAS `dgemm`** (general); possible opt: `dsymm` if symmetry exploited | No (reads A,B; writes C) | matrix |
+| Cross products | `crossprod(X,Y)` | `t(X) %*% Y` | **BLAS `dgemm`**; if `Y` missing: **`dsyrk`** | No (writes result) | matrix |
+| Cross products | `tcrossprod(X,Y)` | `X %*% t(Y)` | **BLAS `dgemm`**; if `Y` missing: **`dsyrk`** | No (writes result) | matrix |
+| Reductions | `rowSums/Means`, `colSums/Means` | Row/col reductions | Intrinsics / loops (worth supporting) | No | vector |
+| Determinant | `det(X)` / `determinant(X)` | Determinant (sign+modulus, log) | **LAPACK `dgetrf`** (LU) | Yes (LU overwrites A) | scalar / **list** |
+| Linear solve | `solve(A, b)` / `solve(A)` | Solve Ax=b / inverse | **LAPACK `dgesv`**; inverse via `dgetrf` + `dgetri` | Yes (overwrites A, and B) | matrix |
+| Triangular solve | `forwardsolve(L,b)` | Solve Lx=b | **BLAS `dtrsm`** (multi-RHS); `dtrsv` for vector RHS | Yes (overwrites RHS) | matrix/vector |
+| Triangular solve | `backsolve(U,b)` | Solve Ux=b | **BLAS `dtrsm`** / `dtrsv` | Yes (overwrites RHS) | matrix/vector |
+| Cholesky | `chol(A)` | Cholesky (SPD) | **LAPACK `dpotrf`** | Yes (overwrites A) | matrix (+attrs) |
+| Cholesky (pivoted) | `chol(A, pivot=TRUE)` | Pivoted Cholesky | **LAPACK `dpstrf`** | Yes | matrix (+attrs) |
+| Cholesky inverse | `chol2inv(R)` | Inverse from Cholesky factor | **LAPACK `dpotri`** | Yes (overwrites factor) | matrix |
+| Cholesky solve | *(via backsolve)* | Solve from Cholesky factor | **LAPACK `dpotrs`** or **BLAS `dtrsm`** ×2 | Yes (overwrites RHS) | matrix |
+| QR decomposition | `qr(X)` | QR factorization | Default **LINPACK `dqrdc2`**; if `LAPACK=TRUE`: **LAPACK `dgeqp3`** | Yes (overwrites A) | **list** (class "qr") |
+| QR solve | `qr.solve(X,b)` | Solve/LS via QR | QR (`dgeqp3`/`dgeqrf`) + apply Q (`dormqr`) + triangular solve | Yes | matrix |
+| Eigen (symmetric) | `eigen(A, symmetric=TRUE)` | Eigenvalues/vectors | **LAPACK `dsyevr`** (default); or `dsyevd`/`dsyev` | Yes (overwrites A) | **list** (class "eigen") |
+| Eigen (general) | `eigen(A, symmetric=FALSE)` | General eigenproblem | **LAPACK `dgeev`** | Yes (overwrites A) | **list** (class "eigen") |
+| SVD | `svd(X)` | Singular value decomposition | **LAPACK `dgesdd`** (fast) or `dgesvd` | Yes (destroys A) | **list** |
+| Norms | `norm(X, type)` | Matrix norms | **LAPACK `dlange`** for `1/I/F/M`; spectral norm uses SVD | `dlange` No; SVD Yes | scalar |
+| Cov/Cor | `cov(X)`, `cor(X)` | Covariance / correlation | Center/scale + `crossprod` (BLAS) + scalar ops | No (but may allocate) | matrix |
+| Conditioning | `rcond(A)`, `kappa(A)` | Reciprocal cond / cond number | `dlange` + `dgecon` (with LU) / `dtrcon` | Factorization consumes A | scalar |
+| Distances | `dist(X)` | Pairwise distances | R/C loops | No | dist object |
+| Triangular masks | `lower.tri(X)`, `upper.tri(X)` | Logical triangle masks | R internals | No | logical matrix |
+| Reshape/permute | `dim<-`, `aperm()` | Reshape / permute | R internals | No | array |
+
+---
+
+## 3) R return structures ↔ LAPACK output parameters
+
+R functions return user-friendly list structures, while LAPACK uses in-place overwriting with multiple output parameters. This section details the mapping.
+These are a **target interface**; quickr does not yet have list return values or `$` access lowering.
+When implemented, prefer name-mangling with explicit properties (e.g., `x__values__`, `x__vectors__`) and a `$` lowering rule that rewrites `x$values` → `x__values__`.
+
+### 3.1 `qr()` → DGEQP3 / DGEQRF / DQRDC2
+
+**R returns** a list of class `"qr"` with components:
+| Component | Description |
+|-----------|-------------|
+| `qr` | Matrix (same dims as input): upper triangle = R; lower triangle = Householder reflector info |
+| `rank` | Integer: numerical rank (LINPACK computes this; LAPACK always returns full rank) |
+| `pivot` | Integer vector of length `ncol(x)`: column permutation |
+| `qraux` | Numeric vector of length `ncol(x)`: scalar factors (τ) for reconstructing Q |
+
+With `LAPACK=TRUE`, the object has attribute `"useLAPACK"` = TRUE.
+
+**LAPACK DGEQP3 signature:**
+```fortran
+DGEQP3(M, N, A, LDA, JPVT, TAU, WORK, LWORK, INFO)
+```
+| Parameter | Direction | Maps to R |
+|-----------|-----------|-----------|
+| `A` | in/out | Input matrix → `qr$qr` (packed R + reflectors) |
+| `JPVT` | in/out | `qr$pivot` |
+| `TAU` | out | `qr$qraux` |
+| `INFO` | out | Error status (0 = success) |
+
+**LAPACK DGEQRF** (unpivoted) omits `JPVT`; R's LINPACK `dqrdc2` uses a modified tolerance-based pivoting strategy.
+
+---
+
+### 3.2 `eigen()` → DSYEVR / DGEEV
+
+**R returns** a list of class `"eigen"` with components:
+| Component | Description |
+|-----------|-------------|
+| `values` | Numeric vector: eigenvalues (sorted by decreasing absolute value) |
+| `vectors` | Matrix: columns are unit eigenvectors (NULL if `only.values=TRUE`) |
+
+For symmetric matrices, both are always real. For asymmetric matrices, complex conjugate pairs appear consecutively.
+
+**LAPACK DSYEVR signature** (symmetric, Relatively Robust Representations—fastest):
+```fortran
+DSYEVR(JOBZ, RANGE, UPLO, N, A, LDA, VL, VU, IL, IU, ABSTOL, 
+       M, W, Z, LDZ, ISUPPZ, WORK, LWORK, IWORK, LIWORK, INFO)
+```
+| Parameter | Direction | Maps to R |
+|-----------|-----------|-----------|
+| `W` | out | `eigen()$values` (first M elements, ascending order—R reverses) |
+| `Z` | out | `eigen()$vectors` (columns) |
+| `M` | out | Count of eigenvalues found |
+| `INFO` | out | Error status |
+
+**LAPACK DGEEV signature** (general non-symmetric):
+```fortran
+DGEEV(JOBVL, JOBVR, N, A, LDA, WR, WI, VL, LDVL, VR, LDVR, WORK, LWORK, INFO)
+```
+| Parameter | Direction | Maps to R |
+|-----------|-----------|-----------|
+| `WR`, `WI` | out | Real/imaginary parts → combined into `eigen()$values` (complex if needed) |
+| `VR` | out | Right eigenvectors → `eigen()$vectors` |
+| `INFO` | out | If >0: QR algorithm failed; eigenvalues INFO+1:N are valid |
+
+---
+
+### 3.3 `svd()` → DGESDD / DGESVD
+
+**R returns** a list with components:
+| Component | Description |
+|-----------|-------------|
+| `d` | Numeric vector: singular values in decreasing order, length min(n,p) |
+| `u` | Matrix (n × nu): left singular vectors as columns |
+| `v` | Matrix (p × nv): right singular vectors as columns |
+
+**LAPACK DGESDD signature** (divide-and-conquer, faster for large matrices):
+```fortran
+DGESDD(JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, IWORK, INFO)
+```
+| Parameter | Direction | Maps to R |
+|-----------|-----------|-----------|
+| `S` | out | `svd()$d` (singular values, descending) |
+| `U` | out | `svd()$u` |
+| `VT` | out | **Transpose** of `svd()$v` (R transposes this back) |
+| `IWORK` | workspace | Integer array of size 8×min(M,N)—**mandatory** |
+| `INFO` | out | If >0: DBDSDC did not converge |
+
+**LAPACK DGESVD** (QR-based, smaller workspace):
+```fortran
+DGESVD(JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT, WORK, LWORK, INFO)
+```
+
+---
+
+### 3.4 `chol()` → DPOTRF / DPSTRF
+
+**R returns** an upper triangular matrix R such that `t(R) %*% R == x`.
+
+With `pivot=TRUE`, R adds attributes:
+| Attribute | Description |
+|-----------|-------------|
+| `"pivot"` | Integer permutation vector |
+| `"rank"` | Numerical rank |
+
+Relationship: `t(R) %*% R == x[pivot, pivot]`
+
+**LAPACK DPOTRF signature:**
+```fortran
+DPOTRF(UPLO, N, A, LDA, INFO)
+```
+| Parameter | Direction | Notes |
+|-----------|-----------|-------|
+| `A` | in/out | Input SPD matrix → output factor in upper (UPLO='U') or lower triangle |
+| `INFO` | out | If >0: leading minor of order INFO not positive definite |
+
+**LAPACK DPSTRF signature** (pivoted):
+```fortran
+DPSTRF(UPLO, N, A, LDA, PIV, RANK, TOL, WORK, INFO)
+```
+| Parameter | Direction | Maps to R |
+|-----------|-----------|-----------|
+| `PIV` | out | `attr(result, "pivot")` |
+| `RANK` | out | `attr(result, "rank")` |
+| `WORK` | workspace | Fixed size 2×N—**no query needed** |
+
+---
+
+### 3.5 `determinant()` → DGETRF
+
+**R returns** a list of class `"det"` with components:
+| Component | Description |
+|-----------|-------------|
+| `modulus` | log|det| (with attribute `"logarithm"` = TRUE by default) |
+| `sign` | +1 or -1 |
+
+**LAPACK DGETRF signature:**
+```fortran
+DGETRF(M, N, A, LDA, IPIV, INFO)
+```
+
+The determinant is computed as: `sign = (-1)^(number of row swaps)`, `modulus = Σ log|U[i,i]|`
+
+| Parameter | Direction | Notes |
+|-----------|-----------|-------|
+| `A` | in/out | Stores L (unit lower, below diag) and U (upper, including diag) |
+| `IPIV` | out | Pivot indices for sign calculation |
+| `INFO` | out | If >0: U(INFO,INFO) = 0, matrix singular |
+
+---
+
+### 3.6 `solve()` → DGESV / DGETRF + DGETRI
+
+**R returns** a single matrix: the solution X to AX=B, or A⁻¹ if b is missing.
+
+**LAPACK DGESV signature** (combined factor + solve):
+```fortran
+DGESV(N, NRHS, A, LDA, IPIV, B, LDB, INFO)
+```
+| Parameter | Direction | Notes |
+|-----------|-----------|-------|
+| `A` | in/out | Overwritten with LU factors |
+| `B` | in/out | RHS on input → solution X on output |
+| `IPIV` | out | Pivot indices |
+| `INFO` | out | If >0: U(INFO,INFO) = 0, no solution computed |
+
+For inverse only: R uses `dgetrf` (factor) + `dgetri` (invert from factors).
+
+---
+
+## 4) Detailed mapping for the most common operations
+
+### 4.1 Matrix multiply: `%*%`
+**Backend:** `dgemm` (Level 3).  
+R typically computes:
+- `C = A %*% B` as a fresh allocation, then `dgemm` writes into `C`.
+- For transposed variants, R uses BLAS transpose flags rather than materializing `t(A)`.
+
+**Consumes input?** No (reads `A`, `B`; writes `C`).
+
+**Helper candidate:** `gemm(A, transA, B, transB) -> C` and `gemv()` fast path for vector cases.
+
+---
+
+### 4.2 `crossprod()` / `tcrossprod()`
+- `crossprod(X, Y)` = `t(X) %*% Y` → `dgemm` with `transA='T'`.
+- `crossprod(X)` = `t(X) %*% X` → `dsyrk` (symmetric rank-k update) is a common optimized path.
+- Similarly for `tcrossprod`.
+
+**Consumes input?** No.
+
+**Helper candidates:** `crossprod(X, Y?)`, `syrk(X)` returning symmetric output (choose triangle convention).
+
+---
+
+### 4.3 `solve(A,b)` (general dense)
+Typical route:
+1. **LU factorization**: `dgetrf(A, ipiv)`  
+2. **Solve**: `dgetrs(LU, ipiv, B)` or use driver `dgesv` which does both.
+
+**Consumes input?**
+- `dgetrf` overwrites `A` (stores `L` and `U` in-place).
+- `dgetrs` overwrites `B` with the solution.
+So yes: you must **copy A and B** unless you've proven they are dead afterwards.
+
+**Inverse (`solve(A)`):**
+- often `dgetrf` + `dgetri` (inversion from LU). Also consumes the factor storage and uses workspace.
+
+**Helper candidates:**
+- `lu_factor(A) -> (LU, ipiv)`
+- `lu_solve(LU, ipiv, B) -> X`
+- `invert_from_lu(LU, ipiv) -> Ainv`
+
+---
+
+### 4.4 Triangular solves: `forwardsolve`, `backsolve`
+**Backend:** `dtrsm` (Level 3) for multiple RHS; `dtrsv` (Level 2) for vector RHS.
+
+**Consumes input?** Yes: BLAS writes the solution into the RHS buffer (`B`).
+
+**Helper candidate:** `trsm(uplo, trans, diag, Atri, B) -> overwrites B`.
+
+---
+
+### 4.5 Cholesky: `chol`, `chol2inv`, and Cholesky-based solve
+- `chol(A)` → `dpotrf(uplo='U' by default in R)`.  
+- `chol(A, pivot=TRUE)` → `dpstrf` (pivoted Cholesky; returns rank/pivot).
+- `chol2inv(R)` → `dpotri` (inverts from Cholesky factor).
+
+**Cholesky-based solve (`dpotrs`):**
+For SPD systems, solving via Cholesky is faster than LU:
+```fortran
+DPOTRS(UPLO, N, NRHS, A, LDA, B, LDB, INFO)
+```
+- `A` contains the Cholesky factor from `dpotrf`
+- `B` is overwritten with solution X
+- Internally performs two triangular solves via `dtrsm`
+
+R doesn't expose `dpotrs` directly, but `backsolve(R, backsolve(R, b, transpose=TRUE))` achieves the same result. A transpiler can recognize this pattern and emit `dpotrs`.
+
+**Consumes input?** Yes: `dpotrf`, `dpstrf`, `dpotri`, and `dpotrs` all overwrite their input matrix/RHS storage.
+
+**Helper candidates:**
+- `chol_factor(A, uplo) -> R`
+- `chol_invert(R, uplo) -> Ainv`
+- `chol_solve(R, B, uplo) -> X` via `dpotrs` (faster than generic LU solve for SPD)
+
+---
+
+### 4.6 QR: `qr`, `qr.solve`, QR family helpers
+Important nuance:
+- `qr()` default in base R uses **LINPACK** (`dqrdc2`) unless `LAPACK=TRUE`.
+- With `LAPACK=TRUE`, typical LAPACK is **`dgeqp3`** (pivoted QR).
+- The LINPACK `dqrdc2` differs from standard `DQRDC` by using a tolerance-based pivoting strategy that moves near-zero 2-norm columns to the right edge—important for rank-deficient detection.
+
+Common LAPACK sequence:
+1) `dgeqp3` (or `dgeqrf` for unpivoted QR)  
+2) Apply/construct Q via `dormqr` / `dorgqr`  
+3) Solve triangular system in R via `dtrsm`/`dtrsv` (or `dtrtrs` LAPACK)
+
+**Key routines for Q manipulation:**
+- `dormqr`: Apply Q or Q^T to a matrix **without forming Q explicitly** (memory-efficient)
+- `dorgqr`: Explicitly construct the orthogonal matrix Q (requires more memory)
+
+Prefer `dormqr` when you only need Q^T * B or Q * B.
+
+**Consumes input?** Yes: QR routines overwrite `A` with `R` and reflector data.
+
+**Helper candidates:**
+- `qr_factor(A, pivoted=TRUE) -> (qr, tau, jpvt)`
+- `apply_qt(qr, tau, B) -> Q^T B` using `dormqr`
+- `apply_q(qr, tau, B) -> Q B` using `dormqr`
+- `form_q(qr, tau) -> Q` using `dorgqr` (only when explicit Q needed)
+- `tri_solve(R, B)`
+
+Also consider implementing:
+- `qr.Q`, `qr.R`, `qr.coef`, `qr.qty`, `qr.qy`, `qr.resid`, `qr.fitted`  
+These are common in regression pipelines and are built from the same primitives.
+
+---
+
+### 4.7 Eigen: `eigen`
+- **Symmetric real:** `dsyevr` is the **default and fastest** option (Relatively Robust Representations algorithm). Alternatives `dsyevd` (divide-and-conquer) and `dsyev` (classic QR) exist but are typically slower.
+- **General real:** `dgeev`.
+
+`dsyevr` benefits:
+- Supports computing eigenvalue subsets (by index range or value range)
+- Generally O(n²) for eigenvectors vs O(n³) for `dsyev`
+- Best performance with optimized BLAS
+
+**Consumes input?** Yes: these routines overwrite the input matrix during reduction steps.
+
+**Helper candidates:**
+- `sym_eigen(A, want_vectors, range?) -> (values, vectors)`
+- `gen_eigen(A, want_vectors) -> (values_real, values_imag, vectors)`
+
+---
+
+### 4.8 SVD: `svd`
+- Commonly `dgesdd` (divide-and-conquer) which is **6-7× faster** than `dgesvd` for large matrices when computing singular vectors.
+- `dgesvd` (QR-based) may be preferred for smaller workspace or when only singular values are needed.
+
+**Consumes input?** Yes: SVD routines overwrite/destroy `A`.
+
+**Helper candidates:**
+- `svd(A, jobu, jobvt) -> (s, U, VT)` with workspace query helper.
+
+---
+
+### 4.9 Norms: `norm`
+- `dlange` for `"1"`, `"I"`, `"F"`, `"M"` is straightforward and non-destructive.
+- `"2"` norm (spectral norm) usually computed via SVD (expensive and destructive).
+- Revisit implementation: `norm(x, "2")` maps to BLAS `dnrm2` for vector inputs, but to LAPACK `dlange` (or SVD for spectral norm) for matrix inputs. We should implement `norm()` with this vector vs matrix split.
+
+**Helper candidate:** `lange(A, norm_type) -> scalar`.
+
+---
+
+### 4.10 Covariance/correlation: `cov`, `cor`
+Typically implemented as:
+1) Center columns (subtract means), optional scaling to unit variance (for `cor`)
+2) Compute `crossprod(centered)` / divide by `(n-1)`  
+So: scalar ops + `crossprod` (BLAS).
+
+**Consumes input?** No, unless you perform centering in-place on shared objects (your transpiler can choose).
+
+---
+
+## 5) "Consumes input" rules of thumb (crucial for R semantics)
+
+R's visible behavior is "copy-on-modify": user variables are conceptually preserved across operations unless explicitly assigned.
+
+### 5.1 BLAS routines
+- **Most BLAS level-3 (`dgemm`, `dsyrk`)**: read inputs, write output buffer; safe if output is separate.
+- **Triangular solves (`dtrsm`, `dtrsv`)**: overwrite RHS (`B` or vector).
+- **Vector ops (`daxpy`, `dscal`)**: overwrite one operand (`y` or `x`).
+
+### 5.2 LAPACK routines
+Nearly all LAPACK "driver" and factorization routines **overwrite** their main matrix input:
+- LU: `dgetrf`, solve driver: `dgesv`
+- Cholesky: `dpotrf`, `dpstrf`, `dpotri`, `dpotrs`
+- QR: `dgeqp3`, `dgeqrf`, plus `dorgqr`/`dormqr` overwriting work buffers
+- Eigen: `dsyevr`, `dsyevd`, `dsyev`, `dgeev`
+- SVD: `dgesdd`, `dgesvd`
+
+### 5.3 Practical transpiler policy
+- Default: **copy** inputs to any LAPACK call that overwrites them.
+- If you implement def-use / liveness analysis, you can skip copies when the input is provably dead after the call.
+
+---
+
+## 6) Workspace query patterns
+
+Many LAPACK routines require workspace arrays whose optimal size depends on problem dimensions and algorithm internals. The **LWORK=-1 convention** triggers a workspace query.
+
+### 6.1 Routines requiring workspace queries
+
+| Routine | Query parameter(s) | Minimum workspace | Notes |
+|---------|-------------------|-------------------|-------|
+| `dgeqp3` | LWORK=-1 | 3×N + 1 | Optimal: 2×N + (N+1)×NB |
+| `dgeqrf` | LWORK=-1 | max(1, N) | Optimal: N×NB |
+| `dsyevr` | LWORK=-1, LIWORK=-1 | 26×N (work), 10×N (iwork) | Also needs ISUPPZ array |
+| `dsyevd` | LWORK=-1, LIWORK=-1 | 1 + 6×N + 2×N² (work) | Large workspace |
+| `dsyev` | LWORK=-1 | max(1, 3×N-1) | |
+| `dgeev` | LWORK=-1 | 3×N (no vectors), 4×N (with vectors) | |
+| `dgesdd` | LWORK=-1 | Complex; depends on JOBZ | Also needs IWORK(8×min(M,N)) |
+| `dgesvd` | LWORK=-1 | max(3×min(M,N)+max(M,N), 5×min(M,N)-4) | |
+| `dgetri` | LWORK=-1 | N | For matrix inversion |
+| `dorgqr` | LWORK=-1 | max(1, N) | For explicit Q formation |
+| `dormqr` | LWORK=-1 | max(1, N) | For Q application |
+
+### 6.2 Routines NOT requiring workspace queries
+
+These can be called directly without a query step:
+
+| Routine | Workspace | Notes |
+|---------|-----------|-------|
+| `dpotrf` | None | Cholesky factorization |
+| `dpstrf` | Fixed: 2×N | Pivoted Cholesky |
+| `dpotrs` | None | Cholesky solve |
+| `dpotri` | None | Cholesky inverse |
+| `dgetrf` | None | LU factorization |
+| `dgetrs` | None | LU solve |
+| `dgesv` | None | Combined LU factor + solve |
+| `dtrsm` | None | Triangular solve (BLAS) |
+| `dgemm` | None | Matrix multiply (BLAS) |
+| `dlange` | Conditional: N for '1' and 'I' norms | Matrix norms |
+
+### 6.3 Standard workspace query pattern
+
+```fortran
+! Step 1: Query optimal workspace
+LWORK = -1
+CALL DSYEVR(..., WORK, LWORK, IWORK, LIWORK, INFO)
+LWORK = INT(WORK(1))
+LIWORK = IWORK(1)
+
+! Step 2: Allocate workspace
+ALLOCATE(WORK(LWORK), IWORK(LIWORK))
+
+! Step 3: Actual computation
+CALL DSYEVR(..., WORK, LWORK, IWORK, LIWORK, INFO)
+```
+
+**Helper candidate:** `workspace_query(routine, args...) -> (lwork, liwork?)` that encapsulates this pattern.
+
+---
+
+## 7) INFO parameter conventions (error handling)
+
+All LAPACK routines return an `INFO` parameter with consistent semantics:
+
+| INFO value | Meaning |
+|------------|---------|
+| `= 0` | Success |
+| `< 0` | Argument `-INFO` had an illegal value |
+| `> 0` | Computational failure (routine-specific) |
+
+**Quickr note:** Today, quickr cannot propagate Fortran-side errors back to R. INFO handling is therefore a design target,
+and any `check_info()` helper is deferred until we have an error-bridge mechanism. Until then, the transpiler should either
+ignore INFO (documented) or surface it via explicit output variables.
+
+### 7.1 Routine-specific INFO > 0 meanings
+
+| Routine | INFO > 0 meaning |
+|---------|------------------|
+| `dpotrf` | Leading minor of order INFO not positive definite |
+| `dpstrf` | Matrix is rank deficient; RANK < N |
+| `dgetrf` | U(INFO,INFO) is exactly zero—matrix singular |
+| `dgesv` | U(INFO,INFO) is exactly zero—no solution computed |
+| `dgeev` | QR algorithm failed to converge; eigenvalues INFO+1:N are valid |
+| `dsyevr` | Internal error (rare with IEEE-754 arithmetic) |
+| `dgesdd` | DBDSDC (bidiagonal SVD) did not converge |
+| `dgesvd` | DBDSQR did not converge; INFO superdiagonals did not converge to zero |
+
+---
+
+## 8) BLAS/LAPACK naming conventions and call-patterns (for helper design)
+
+### 8.1 Naming
+Most routine names encode:
+- **Precision/type:** `d` = double real (`s` single real, `z` complex double, `c` complex float)
+- **Matrix type:**  
+  - `ge` general, `sy` symmetric, `po` SPD, `tr` triangular, …
+- **Operation:**  
+  - `mm` multiply (BLAS), `sv` solve (driver), `trf` factorize, `trs` triangular solve (from factors),  
+  - `qrf` QR factor, `qp3` QR with pivoting,  
+  - `ev` eigen, `evr` eigen (RRR), `evd` eigen (D&C),
+  - `svd` SVD, `sdd` SVD divide&conquer,  
+  - `con` condition estimate, `lan`/`lange` norm.
+
+Examples:
+- `dgemm` = double general matrix-matrix multiply
+- `dgesv` = double general solve driver (uses LU)
+- `dgetrf` = LU factorization
+- `dpotrf` = Cholesky factorization
+- `dpotrs` = Cholesky triangular solve
+- `dsyevr` = symmetric eigen (robust, fast driver—RRR algorithm)
+- `dgeev` = general eigen
+- `dgesdd` = SVD divide&conquer
+
+### 8.2 Common arguments (design these into wrappers)
+- Dimensions: `m,n,k`, `nrhs`
+- Leading dimensions: `lda`, `ldb`, `ldc` (for column-major arrays, typically `lda = nrow(A)` in R terms)
+- Option flags: `trans`, `uplo`, `side`, `diag`, `jobz`, `jobu`, `jobvt`, `range`
+- Pivot arrays: `ipiv` (LU), `jpvt` (pivoted QR), `piv` (pivoted Cholesky)
+- Workspace:
+  - Many LAPACK routines want `work`, `lwork` and sometimes `iwork`, `liwork`
+  - Standard pattern: **workspace query** with `lwork=-1`, then allocate (see Section 6)
+
+### 8.3 Column-major layout
+R matrices are stored column-major already, matching Fortran, which is great:
+- A direct lowering to Fortran calls can pass R's contiguous storage (subject to how you marshal memory between R and your compiled code).
+- For transposes, prefer BLAS `TRANS` flags rather than materializing `t(A)`.
+
+---
+
+## 9) Backend notes (OpenBLAS, MKL, Accelerate, reference)
+
+R is typically linked to:
+- **OpenBLAS** on many Linux distros,
+- **Reference BLAS/LAPACK** (or a bundled one) on some Windows builds,
+- **Apple Accelerate** on macOS (varies by build and user configuration),
+- **MKL** if configured by the user or via specific distributions.
+
+### 9.1 API portability
+The **BLAS/LAPACK API** is stable; your Fortran calls should work across backends as long as the symbols are available and you link exactly like R (same `libblas`/`liblapack` or unified library).
+
+### 9.2 Differences that may show up
+- **Threading behavior:** OpenBLAS/MKL can be multithreaded; Accelerate too
+- **Performance characteristics:** Vary significantly by backend
+- **Minor floating-point differences:** Due to reordering / SIMD / threading
+
+### 9.3 Thread safety considerations
+When R code uses parallel processing (e.g., `parallel` package, `foreach`), nested parallelism with threaded BLAS can cause:
+- Thread oversubscription (more threads than cores)
+- Contention and slowdown
+- In rare cases, hangs
+
+**Recommendations:**
+- Consider setting `OMP_NUM_THREADS=1` or `OPENBLAS_NUM_THREADS=1` in nested parallel contexts
+- MKL: use `MKL_NUM_THREADS` or `mkl_set_num_threads()`
+- Your transpiler may want to emit thread-count management around parallel regions
+
+### 9.4 Integer overflow considerations
+For very large matrices (n > ~46,340 for 32-bit integers), integer overflow can occur in:
+- Leading dimension calculations (LDA × N)
+- Workspace size calculations
+- Index computations within BLAS/LAPACK
+
+Most R installations use 32-bit BLAS integers. For matrices exceeding ~2 billion elements:
+- Check for 64-bit BLAS/LAPACK (ILP64 interface)
+- R itself has limits around `2^31 - 1` elements per vector/matrix
+
+**Recommendation:** Add overflow checks for `nrow * ncol` and `lda * n` before LAPACK calls when targeting large matrices.
+
+### 9.5 Matching R results
+To match R results as closely as possible, build and link against the **same BLAS/LAPACK** that R is using for that session. Check with:
+```r
+La_library()   # Shows LAPACK library path
+extSoftVersion()["BLAS"]  # Shows BLAS info
+```
+
+---
+
+## 10) Suggested helper/wrapper API for the transpiler
+
+A minimal set of Fortran-callable helpers covers most mapped R ops:
+
+### BLAS helpers
+- `gemm(transA, transB, A, B) -> C`
+- `syrk(trans, X) -> SymmetricC`
+- `trsm(side, uplo, trans, diag, Atri, B)  ! overwrites B`
+- `trsv(uplo, trans, diag, Atri, x)  ! overwrites x (vector case)`
+- `ger(x, y, A, alpha)  ! A := alpha*x*y' + A`
+
+### LAPACK helpers (factorizations)
+- `lu_factor(A) -> (LU, ipiv, info)`
+- `lu_solve(LU, ipiv, B) -> X  ! overwrites B buffer`
+- `lu_invert(LU, ipiv) -> Ainv`
+- `chol_factor(A, uplo) -> (R, info)`
+- `chol_solve(R, B, uplo) -> X  ! via dpotrs, overwrites B`
+- `chol_invert(R, uplo) -> Ainv`
+- `qr_factor_pivoted(A) -> (QR, tau, jpvt, info)`
+- `qr_factor_unpivoted(A) -> (QR, tau, info)`
+
+### LAPACK helpers (Q manipulation)
+- `apply_qt(QR, tau, B) -> Q^T B  ! via dormqr`
+- `apply_q(QR, tau, B) -> Q B  ! via dormqr`
+- `form_q(QR, tau) -> Q  ! via dorgqr (when explicit Q needed)`
+
+### LAPACK helpers (decompositions)
+- `sym_eigen(A, want_vectors, range?) -> (w, Z, info)`
+- `gen_eigen(A, want_vectors) -> (wr, wi, VR, info)` (or pack into complex)
+- `svd_dc(A, want_u, want_vt) -> (s, U, VT, info)  ! dgesdd`
+- `svd_qr(A, want_u, want_vt) -> (s, U, VT, info)  ! dgesvd`
+- `lange(A, type) -> norm`
+
+### Workspace helper
+- `workspace_query(routine, args...) -> (lwork, liwork?)` (patternized per routine family)
+
+### Error handling helper
+- `check_info(info, routine_name) -> raises error with meaningful message` (**planned**; blocked on Fortran→R error propagation)
+
+---
+
+## 11) Implementation checklist (translation rules)
+
+1. Determine if operation can be expressed as BLAS-3 (`dgemm`, `dsyrk`, `dtrsm`)  
+2. Otherwise pick LAPACK driver (solve/eigen/svd/qr/chol)  
+3. For LAPACK routines requiring workspace:
+   - Emit workspace query call (LWORK=-1)
+   - Allocate workspace
+   - Emit actual call
+4. Enforce R semantics:
+   - if the routine overwrites inputs: copy unless the variable is dead afterward
+5. Normalize to double precision (`real*8`) for R numerics
+6. Handle `NA/NaN/Inf` policy:
+   - base R sometimes checks for finite values before calling LAPACK; decide whether to emulate those checks
+7. Handle INFO return:
+   - Check for errors after each LAPACK call
+   - Map INFO codes to meaningful error messages
+   - **Quickr note:** defer error propagation until we have a Fortran→R error bridge; consider returning INFO as an explicit output in the interim
+8. Threading control:
+   - consider aligning to R's threading settings (OpenBLAS/MKL environment variables)
+   - manage thread counts in nested parallel contexts
+9. Large matrix safety:
+   - add overflow checks for matrices approaching 32-bit integer limits
+
+---
+
+## 12) Notes on "comprehensive coverage" vs "high value"
+
+Even though base R has many entry points, most linear-algebra-heavy code funnels through:
+- `%*%`, `crossprod`, `tcrossprod`
+- `solve`, `chol`, `qr`, `eigen`, `svd`
+- `forwardsolve`, `backsolve`
+- `norm`, `rcond`, `kappa`
+- `cov`, `cor` (built from centering + crossprod)
+
+Supporting these well (plus the QR helper family) typically covers the bulk of real-world usage.
+
+---
+
+## 13) Quickr implementation notes (current vs planned)
+
+- **List returns and `$` access:** Not implemented yet. Planned approach is name-mangling:
+  `eigen(x)` returns `x__values__`, `x__vectors__`, and `$` lowers to the mangled name.
+- **Transpose lowering:** Prefer BLAS `TRANS` flags for `t(A) %*% B` and `A %*% t(B)` to avoid materializing transposes.
+- **Conformability checks:** Emitted code should validate dimensions and stop with R-like errors when shapes do not align.
+
+## Appendix A: Quick reference — R function to LAPACK routine
+
+| R function | Primary LAPACK | Secondary/Alternative | Returns list? |
+|------------|---------------|----------------------|---------------|
+| `qr()` | `dqrdc2` (LINPACK) | `dgeqp3` (LAPACK=TRUE) | Yes |
+| `qr.solve()` | `dqrdc2`/`dgeqp3` + `dormqr` + `dtrsm` | | No |
+| `eigen(symmetric=TRUE)` | `dsyevr` | `dsyevd`, `dsyev` | Yes |
+| `eigen(symmetric=FALSE)` | `dgeev` | | Yes |
+| `svd()` | `dgesdd` | `dgesvd` | Yes |
+| `chol()` | `dpotrf` | `dpstrf` (pivot=TRUE) | No (+attrs) |
+| `chol2inv()` | `dpotri` | | No |
+| `solve(A,b)` | `dgesv` | `dgetrf` + `dgetrs` | No |
+| `solve(A)` | `dgetrf` + `dgetri` | | No |
+| `det()` / `determinant()` | `dgetrf` | | Scalar / Yes |
+| `norm()` | `dlange` | SVD for type="2" | Scalar |
+| `rcond()` | `dgetrf` + `dgecon` | | Scalar |
+| `forwardsolve()` | `dtrsm` | `dtrsv` | No |
+| `backsolve()` | `dtrsm` | `dtrsv` | No |
+| `crossprod(X)` | `dsyrk` | `dgemm` | No |
+| `crossprod(X,Y)` | `dgemm` | | No |
+| `%*%` | `dgemm` | `dgemv` for vec | No |
+
+---
+
+## Appendix B: Workspace requirements summary
+
+| Routine | Needs query? | Minimum workspace formula |
+|---------|-------------|---------------------------|
+| `dgeqp3` | Yes | 3N + 1 |
+| `dgeqrf` | Yes | max(1, N) |
+| `dsyevr` | Yes | WORK: 26N, IWORK: 10N |
+| `dsyevd` | Yes | WORK: 1+6N+2N², IWORK: 3+5N |
+| `dsyev` | Yes | 3N - 1 |
+| `dgeev` | Yes | 4N (with vectors) |
+| `dgesdd` | Yes | Complex; also IWORK: 8×min(M,N) |
+| `dgesvd` | Yes | max(3mn+mx, 5mn-4) where mn=min, mx=max |
+| `dgetri` | Yes | N |
+| `dpotrf` | No | — |
+| `dpstrf` | No | 2N (fixed) |
+| `dgetrf` | No | — |
+| `dgesv` | No | — |
+| `dpotrs` | No | — |
diff --git a/doc/matrix/infer-dest-process.md b/doc/matrix/infer-dest-process.md
new file mode 100644
index 0000000..87b7bb9
--- /dev/null
+++ b/doc/matrix/infer-dest-process.md
@@ -0,0 +1,143 @@
+# Destination Inference and Temporary Minimization
+
+This document explains how quickr avoids unnecessary temporaries in BLAS-backed
+matrix operations by inferring the output variable ahead of time. It starts with
+core concepts needed to understand the flow.
+
+## Prerequisites
+
+### The `<-` handler
+- All assignments in the compiled function flow through `r2f_handlers[["<-"]]` in
+  `R/r2f.R`.
+- This handler decides whether the RHS can write directly into the LHS (no extra
+  temporary), or whether a temporary result is needed and then assigned to the
+  LHS.
+
+### Scope
+- A scope is an ordered environment that stores declared variables (`Variable`
+  objects) and is threaded through the compiler.
+- When a symbol is added to the scope, it becomes a declared Fortran variable in
+  the manifest and is available for subsequent compilation steps.
+
+### `Variable` class
+- `Variable` objects carry:
+  - `mode` (type): `double`, `integer`, `logical`, etc.
+  - `dims`: list of dimensions (`list(m, n)` for matrices)
+  - `name`: the Fortran variable name
+- These are used for compile-time checks, Fortran declarations, and BLAS
+  decisions.
+
+### Manifest
+- The manifest is the Fortran declaration block built from all `Variable`
+  bindings in the scope.
+- It is generated after compilation of the body, so it reflects all variables
+  that ended up in scope.
+
+### Hoist and temporaries
+- Some expressions need pre-statements (e.g., assign a computed expression to a
+  temporary before calling BLAS). These are emitted via `hoist`.
+- The `hoist` object provides `declare_tmp()` and `emit()` to manage these
+  temporary variables and pre-statements.
+
+## Why destination inference exists
+
+BLAS calls (like `dgemm`) want a destination buffer. If the compiler can provide
+an existing output variable, BLAS can write directly into it. This avoids:
+- allocating a temporary output array
+- emitting a separate assignment `out = tmp_result`
+
+Without inference, the compiler does not know the output shape early enough to
+create and pass a destination variable into the RHS compilation.
+
+## The inference pipeline (summary)
+
+1) Assignment handler sees `out <- rhs`.
+2) If the RHS handler supports a destination, `dest_infer_for_call()` tries to
+   pre-compute the output shape as a `Variable`.
+3) If inference succeeds, that `Variable` is inserted into scope under the LHS
+   symbol. This makes it a declared Fortran variable.
+4) The RHS is compiled with `dest=var` so BLAS helpers can write into it.
+5) If the RHS wrote directly into `dest`, the assignment handler emits nothing.
+
+## The key functions and their roles
+
+### `dest_supported_for_call(call)`
+- Checks whether a handler supports `dest`.
+- Uses `attr(handler, "dest_supported")`.
+
+### `dest_infer_for_call(call, scope)`
+- Calls the handler's `dest_infer` function if it exists.
+- Returns a `Variable` or `NULL`.
+
+### Inference helpers for matrix ops
+- `infer_dest_matmul()`
+  - Computes output dims for `%*%` from declared input dims.
+- `infer_dest_crossprod_like()`
+  - Computes output dims for `crossprod()` and `tcrossprod()`.
+- `infer_dest_outer()`
+  - Computes output dims for `outer()` when both inputs are vectors.
+- `infer_dest_triangular()`
+  - Computes output dims for `forwardsolve()` / `backsolve()` from the RHS.
+
+These functions are conservative: they only infer if inputs are plain symbols
+already declared in scope.
+
+## How direct output writes are decided
+
+In BLAS helpers (e.g., `gemm()`):
+- If a `dest` is passed, `can_use_output()` decides whether it is safe.
+- Current rules:
+  - `dest` exists
+  - `dest@mode == "double"` (BLAS outputs are doubles)
+  - `dest` does not alias input names
+- If allowed, BLAS writes directly into `dest` and the returned `Fortran` result
+  is tagged with `writes_to_dest = TRUE`.
+
+The assignment handler then skips emitting `out = ...` because the output is
+already written by BLAS.
+
+## When temporaries are still created
+
+### Case: expression inputs
+If an operand is not a symbol (e.g. `a + b`), `ensure_blas_operand_name()` hoists
+it into a temporary variable before calling BLAS.
+
+### Case: inference fails
+If inference cannot determine output dims (non-symbol inputs, unknown dims), the
+compiler cannot predeclare the LHS and therefore cannot pass a destination.
+This forces:
+- a temporary BLAS output
+- a later assignment to the LHS
+
+## Example flow: `out <- a %*% b`
+
+1) `infer_dest_matmul()` computes dims and creates a `Variable`.
+2) The assignment handler inserts it into scope.
+3) `%*%` handler calls `gemm()` with `dest=out`.
+4) `gemm()` writes directly into `out` and returns `writes_to_dest = TRUE`.
+5) The assignment handler emits no extra assignment.
+
+## Example flow: `out <- (a + b) %*% c`
+
+1) Inference fails because the left operand is not a symbol.
+2) No destination is passed to `%*%`.
+3) `ensure_blas_operand_name()` hoists `a + b` into a temp.
+4) `gemm()` writes to a temp output.
+5) Assignment emits `out = tmp_result`.
+
+## Design notes and trade-offs
+
+- Inference is intentionally conservative to avoid guessing shapes.
+- Temporaries are minimized when operands are declared symbols.
+- BLAS outputs are treated as double-only, so destinations must be `double`.
+- The manifest is a result of scope contents; it is not used to decide whether
+  direct output writes are allowed.
+
+## Quick checklist for debugging temp creation
+
+- Are operands declared symbols in scope?
+- Does the handler have `dest_supported = TRUE`?
+- Does inference return a `Variable` (non-NULL)?
+- Is `dest@mode` double?
+- Does `can_use_output()` reject due to aliasing?
+
diff --git a/doc/matrix/matmul-examples.R b/doc/matrix/matmul-examples.R
new file mode 100644
index 0000000..a60d138
--- /dev/null
+++ b/doc/matrix/matmul-examples.R
@@ -0,0 +1,217 @@
+devtools::load_all()
+
+mm3 <- function(a, b, c) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(k, n)),
+    type(c = double(n, p))
+  )
+  ab <- a %*% b
+  out <- ab %*% c
+  out
+}
+
+xtx_scale <- function(x) {
+  declare(type(x = double(NA, NA)))
+  xtx <- crossprod(x)
+  half_xtx <- 0.5 * xtx
+  out <- xtx + half_xtx
+  out
+}
+
+atb_c <- function(a, b, c) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(n, k)),
+    type(c = double(n, p))
+  )
+  atb <- a %*% t(b)
+  out <- atb %*% c
+  out
+}
+
+sum_of_products <- function(a, b, c, d) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(k, n)),
+    type(c = double(m, r)),
+    type(d = double(r, n))
+  )
+  left <- a %*% b
+  right <- c %*% d
+  out <- left + right
+  out
+}
+
+chain_plus <- function(a, b, c, j) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(k, n)),
+    type(c = double(n, p)),
+    type(j = double(m, p))
+  )
+  q <- a %*% b %*% c + j
+  q
+}
+
+chain_mix <- function(a, b, c) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(n, k)),
+    type(c = double(n, n))
+  )
+  q <- (a %*% t(b)) %*% c + 0.25 * (a %*% t(b))
+  q
+}
+
+crossprod_plus <- function(x, y, j) {
+  declare(
+    type(x = double(m, k)),
+    type(y = double(k, n)),
+    type(j = double(k, n))
+  )
+  q <- crossprod(x) %*% y + j
+  q
+}
+
+sum_of_products_line <- function(a, b, c, d, j) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(k, n)),
+    type(c = double(m, r)),
+    type(d = double(r, n)),
+    type(j = double(m, n))
+  )
+  q <- a %*% b + c %*% d + j
+  q
+}
+
+bad_conformable <- function(a, b) {
+  declare(
+    type(a = double(m, k)),
+    type(b = double(n, p))
+  )
+  a %*% b
+}
+
+cat("=== r2f: mm3 ===\n")
+print(r2f(mm3))
+cat("\n=== r2f: xtx_scale ===\n")
+print(r2f(xtx_scale))
+cat("\n=== r2f: atb_c ===\n")
+print(r2f(atb_c))
+cat("\n=== r2f: sum_of_products ===\n")
+print(r2f(sum_of_products))
+cat("\n=== r2f: chain_plus ===\n")
+print(r2f(chain_plus))
+cat("\n=== r2f: chain_mix ===\n")
+print(r2f(chain_mix))
+cat("\n=== r2f: crossprod_plus ===\n")
+print(r2f(crossprod_plus))
+cat("\n=== r2f: sum_of_products_line ===\n")
+print(r2f(sum_of_products_line))
+cat("\n=== r2f: bad_conformable (expect warning) ===\n")
+print(r2f(bad_conformable))
+
+set.seed(1)
+m <- 80
+k <- 60
+n <- 40
+p <- 50
+
+a <- matrix(runif(m * k), m, k)
+b <- matrix(runif(k * n), k, n)
+c <- matrix(runif(n * p), n, p)
+j <- matrix(runif(m * p), m, p)
+
+x <- matrix(runif(120 * 80), 120, 80)
+
+a2 <- matrix(runif(90 * 30), 90, 30)
+b2 <- matrix(runif(70 * 30), 70, 30)
+c2 <- matrix(runif(70 * 40), 70, 40)
+
+c_left <- matrix(runif(80 * 20), 80, 20)
+d_right <- matrix(runif(20 * 50), 20, 50)
+a_sp <- matrix(runif(80 * 60), 80, 60)
+b_sp <- matrix(runif(60 * 50), 60, 50)
+c_sp <- matrix(runif(80 * 20), 80, 20)
+d_sp <- matrix(runif(20 * 50), 20, 50)
+
+b_chain <- matrix(runif(55 * 30), 55, 30)
+c_chain <- matrix(runif(55 * 55), 55, 55)
+a_chain <- matrix(runif(90 * 30), 90, 30)
+
+x_cp <- matrix(runif(200 * 70), 200, 70)
+y_cp <- matrix(runif(70 * 60), 70, 60)
+j_cp <- matrix(runif(70 * 60), 70, 60)
+
+a_sum <- matrix(runif(100 * 80), 100, 80)
+b_sum <- matrix(runif(80 * 60), 80, 60)
+c_sum <- matrix(runif(100 * 30), 100, 30)
+d_sum <- matrix(runif(30 * 60), 30, 60)
+j_sum <- matrix(runif(100 * 60), 100, 60)
+
+q_mm3 <- quick(mm3)
+q_xtx_scale <- quick(xtx_scale)
+q_atb_c <- quick(atb_c)
+q_sum_of_products <- quick(sum_of_products)
+q_chain_plus <- quick(chain_plus)
+q_chain_mix <- quick(chain_mix)
+q_crossprod_plus <- quick(crossprod_plus)
+q_sum_of_products_line <- quick(sum_of_products_line)
+
+cat("\n=== bench: mm3 ===\n")
+print(bench::mark(
+  mm3(a, b, c),
+  q_mm3(a, b, c),
+  check = TRUE
+))
+
+cat("\n=== bench: xtx_scale ===\n")
+print(bench::mark(
+  xtx_scale(x),
+  q_xtx_scale(x),
+  check = TRUE
+))
+
+cat("\n=== bench: atb_c ===\n")
+print(bench::mark(
+  atb_c(a2, b2, c2),
+  q_atb_c(a2, b2, c2),
+  check = TRUE
+))
+
+cat("\n=== bench: sum_of_products ===\n")
+print(bench::mark(
+  sum_of_products(a_sp, b_sp, c_sp, d_sp),
+  q_sum_of_products(a_sp, b_sp, c_sp, d_sp),
+  check = TRUE
+))
+
+cat("\n=== bench: chain_plus ===\n")
+print(bench::mark(
+  chain_plus(a, b, c, j),
+  q_chain_plus(a, b, c, j),
+  check = TRUE
+))
+
+cat("\n=== bench: chain_mix ===\n")
+print(bench::mark(
+  chain_mix(a_chain, b_chain, c_chain),
+  q_chain_mix(a_chain, b_chain, c_chain),
+  check = TRUE
+))
+
+cat("\n=== bench: crossprod_plus ===\n")
+print(bench::mark(
+  crossprod_plus(x_cp, y_cp, j_cp),
+  q_crossprod_plus(x_cp, y_cp, j_cp),
+  check = TRUE
+))
+
+cat("\n=== bench: sum_of_products_line ===\n")
+print(bench::mark(
+  sum_of_products_line(a_sum, b_sum, c_sum, d_sum, j_sum),
+  q_sum_of_products_line(a_sum, b_sum, c_sum, d_sum, j_sum),
+  check = TRUE
+))
diff --git a/tests/testthat/helper.R b/tests/testthat/helper.R
index 20ac197..ed7b23f 100644
--- a/tests/testthat/helper.R
+++ b/tests/testthat/helper.R
@@ -87,15 +87,11 @@ openmp_supported_or_skip <- local({
           })
           TRUE
         },
-        error = function(e) {
-          msg <- conditionMessage(e)
-          if (grepl("OpenMP", msg, fixed = TRUE)) {
-            return(FALSE)
-          }
-          stop(e)
-        }
+        quickr_openmp_unavailable = function(e) FALSE
       )
     }
-    skip_if(!isTRUE(supported), "OpenMP unavailable in this toolchain")
+    if (!isTRUE(supported)) {
+      skip("OpenMP toolchain not available")
+    }
   }
 })
diff --git a/tests/testthat/test-compiler.R b/tests/testthat/test-compiler.R
index 14965f7..030eb83 100644
--- a/tests/testthat/test-compiler.R
+++ b/tests/testthat/test-compiler.R
@@ -11,6 +11,53 @@ test_that("quickr_env_is_true recognizes common truthy values", {
   expect_true(quickr:::quickr_env_is_true("QUICKR_PREFER_FLANG"))
 })
 
+test_that("quickr_r_cmd_config_value captures only stdout", {
+  expect_identical(
+    deparse(formals(quickr:::quickr_r_cmd_config_value)$system2),
+    "base::system2"
+  )
+
+  observed_stdout <- NULL
+  observed_stderr <- NULL
+  system2_stub <- function(
+    command,
+    args,
+    stdout = "",
+    stderr = "",
+    ...
+  ) {
+    observed_stdout <<- stdout
+    observed_stderr <<- stderr
+    " value "
+  }
+
+  expect_identical(
+    quickr:::quickr_r_cmd_config_value(
+      "CC",
+      r_cmd = "R",
+      system2 = system2_stub
+    ),
+    "value"
+  )
+  expect_identical(observed_stdout, TRUE)
+  expect_identical(observed_stderr, FALSE)
+})
+
+test_that("quickr_r_cmd_config_value returns empty on command failure", {
+  system2_stub <- function(command, args, stdout = "", stderr = "", ...) {
+    structure(" value ", status = 1L)
+  }
+
+  expect_identical(
+    quickr:::quickr_r_cmd_config_value(
+      "CC",
+      r_cmd = "R",
+      system2 = system2_stub
+    ),
+    ""
+  )
+})
+
 test_that("quickr_flang_path and quickr_prefer_flang are deterministic with stubs", {
   which <- function(x) {
     if (x == "flang-new") {
@@ -74,3 +121,53 @@ test_that("quickr_fcompiler_env writes Makevars when flang is usable", {
   expect_true(startsWith(env, "R_MAKEVARS_USER="))
   expect_true(file.exists(sub("R_MAKEVARS_USER=", "", env, fixed = TRUE)))
 })
+
+test_that("compile cleans existing build directories and reports failures", {
+  fsub <- r2f(function(x) {
+    declare(type(x = double(1)))
+    x + 1
+  })
+
+  build_dir <- withr::local_tempdir()
+  file.create(file.path(build_dir, "stale.txt"))
+
+  calls <- 0L
+  system2_stub <- function(
+    command,
+    args,
+    stdout = TRUE,
+    stderr = TRUE,
+    env = character(),
+    ...
+  ) {
+    if (
+      length(args) >= 3L &&
+        identical(args[[1L]], "CMD") &&
+        identical(args[[2L]], "config")
+    ) {
+      return("")
+    }
+    calls <<- calls + 1L
+    if (calls == 1L) {
+      return(structure("flang fail", status = 1))
+    }
+    structure("fallback fail", status = 2)
+  }
+
+  local_mocked_bindings(
+    system2 = system2_stub,
+    .package = "base"
+  )
+  local_mocked_bindings(
+    quickr_fcompiler_env = function(...) "ENV=1",
+    .package = "quickr"
+  )
+
+  expect_error(
+    quickr:::compile(fsub, build_dir = build_dir),
+    "Compilation Error",
+    fixed = TRUE
+  )
+  expect_true(dir.exists(build_dir))
+  expect_false(file.exists(file.path(build_dir, "stale.txt")))
+})
diff --git a/tests/testthat/test-errors.R b/tests/testthat/test-errors.R
index 18d809e..0d02c1d 100644
--- a/tests/testthat/test-errors.R
+++ b/tests/testthat/test-errors.R
@@ -67,3 +67,34 @@ test_that("value-returning local closures can be called as statements", {
 
   expect_quick_identical(fn, list(1))
 })
+
+test_that("reserved or underscored names are rejected", {
+  expect_error(
+    quick(function(x) {
+      declare(type(x = integer(1)))
+      `_bad` <- x + 1L
+      `_bad`
+    }),
+    "symbols cannot start or end with '_'",
+    fixed = TRUE
+  )
+
+  expect_error(
+    quick(function(x) {
+      declare(type(x = integer(1)))
+      `bad_` <- x + 1L
+      `bad_`
+    }),
+    "symbols cannot start or end with '_'",
+    fixed = TRUE
+  )
+
+  expect_error(
+    quick(function(int) {
+      declare(type(int = integer(1)))
+      int
+    }),
+    "symbols cannot start or end with '_'",
+    fixed = TRUE
+  )
+})
diff --git a/tests/testthat/test-example-heat_diffusion.R b/tests/testthat/test-example-heat_diffusion.R
index 626e84a..0e49891 100644
--- a/tests/testthat/test-example-heat_diffusion.R
+++ b/tests/testthat/test-example-heat_diffusion.R
@@ -5,13 +5,13 @@ test_that("heat diffusion", {
   # 2D grid, explicit time-stepping, fixed boundaries
 
   # Parameters
-  nx <- 100L # Grid size in x
-  ny <- 100L # Grid size in y
+  nx <- 60L # Grid size in x
+  ny <- 60L # Grid size in y
   dx <- 1L # Grid spacing
   dy <- 1L
   dt <- 0.01 # Time step
   k <- 0.1 # Thermal diffusivity
-  steps <- 50L # Number of time steps
+  steps <- 25L # Number of time steps
 
   diffuse_heat <- function(nx, ny, dx, dy, dt, k, steps) {
     declare(
diff --git a/tests/testthat/test-matrix-inference.R b/tests/testthat/test-matrix-inference.R
new file mode 100644
index 0000000..47aa74d
--- /dev/null
+++ b/tests/testthat/test-matrix-inference.R
@@ -0,0 +1,168 @@
+test_that("matrix ops infer destination sizes for assignments", {
+  matmul_infer <- function(A, B) {
+    declare(type(A = double(2, 3)), type(B = double(3, 2)))
+    out <- A %*% B
+    out
+  }
+
+  matvec_infer <- function(A, x) {
+    declare(type(A = double(2, 3)), type(x = double(3)))
+    out <- A %*% x
+    out
+  }
+
+  vecmat_infer <- function(x, A) {
+    declare(type(x = double(2)), type(A = double(2, 3)))
+    out <- x %*% A
+    out
+  }
+
+  cross_infer <- function(x) {
+    declare(type(x = double(4, 3)))
+    out <- crossprod(x)
+    out
+  }
+
+  cross_infer2 <- function(x, y) {
+    declare(type(x = double(4, 3)), type(y = double(4, 2)))
+    out <- crossprod(x, y)
+    out
+  }
+
+  tcross_infer <- function(x) {
+    declare(type(x = double(4, 3)))
+    out <- tcrossprod(x)
+    out
+  }
+
+  tcross_infer2 <- function(x, y) {
+    declare(type(x = double(4, 3)), type(y = double(2, 3)))
+    out <- tcrossprod(x, y)
+    out
+  }
+
+  outer_infer <- function(x, y) {
+    declare(type(x = double(2)), type(y = double(3)))
+    out <- outer(x, y)
+    out
+  }
+
+  outer_op_infer <- function(x, y) {
+    declare(type(x = double(2)), type(y = double(3)))
+    out <- x %o% y
+    out
+  }
+
+  forward_infer <- function(L, b) {
+    declare(type(L = double(2, 2)), type(b = double(2, 2)))
+    out <- forwardsolve(L, b)
+    out
+  }
+
+  back_infer <- function(U, b) {
+    declare(type(U = double(2, 2)), type(b = double(2)))
+    out <- backsolve(U, b)
+    out
+  }
+
+  set.seed(99)
+  A <- matrix(rnorm(6), nrow = 2)
+  B <- matrix(rnorm(6), nrow = 3)
+  x2 <- rnorm(2)
+  x3 <- rnorm(3)
+  X <- matrix(rnorm(12), nrow = 4)
+  Yc <- matrix(rnorm(8), nrow = 4)
+  Yt <- matrix(rnorm(6), nrow = 2)
+  v2 <- rnorm(2)
+  v3 <- rnorm(3)
+
+  L <- matrix(c(2, 0, 1, 3), nrow = 2, byrow = TRUE)
+  U <- matrix(c(2, 1, 0, 3), nrow = 2, byrow = TRUE)
+  b_mat <- matrix(rnorm(4), nrow = 2)
+  b_vec <- rnorm(2)
+
+  expect_quick_equal(matmul_infer, list(A = A, B = B))
+  expect_quick_equal(matvec_infer, list(A = A, x = x3))
+  expect_quick_equal(vecmat_infer, list(x = x2, A = A))
+  expect_quick_equal(cross_infer, list(x = X))
+  expect_quick_equal(cross_infer2, list(x = X, y = Yc))
+  expect_quick_equal(tcross_infer, list(x = X))
+  expect_quick_equal(tcross_infer2, list(x = X, y = Yt))
+  expect_quick_equal(outer_infer, list(x = v2, y = v3))
+  expect_quick_equal(outer_op_infer, list(x = v2, y = v3))
+  expect_quick_equal(forward_infer, list(L = L, b = b_mat))
+  expect_quick_equal(back_infer, list(U = U, b = b_vec))
+})
+
+test_that("matrix helpers report unsupported inputs", {
+  matmul_bad_rank <- function(a, b) {
+    declare(type(a = double(2, 2, 2)), type(b = double(2, 2)))
+    a %*% b
+  }
+
+  transpose_bad_rank <- function(x) {
+    declare(type(x = double(2, 2, 2)))
+    t(x)
+  }
+
+  outer_bad_rank <- function(x, y) {
+    declare(type(x = double(2, 2)), type(y = double(2)))
+    outer(x, y)
+  }
+
+  outer_missing <- function(x) {
+    declare(type(x = double(2)))
+    outer(x)
+  }
+
+  forward_k <- function(L, b) {
+    declare(type(L = double(2, 2)), type(b = double(2)))
+    forwardsolve(L, b, k = 1)
+  }
+
+  back_bad_upper <- function(U, b, flag) {
+    declare(
+      type(U = double(2, 2)),
+      type(b = double(2)),
+      type(flag = logical(1))
+    )
+    backsolve(U, b, upper.tri = flag)
+  }
+
+  back_bad_A <- function(A, b) {
+    declare(type(A = double(2)), type(b = double(2)))
+    backsolve(A, b)
+  }
+
+  back_bad_B <- function(U, b) {
+    declare(type(U = double(2, 2)), type(b = double(2, 2, 2)))
+    backsolve(U, b)
+  }
+
+  expect_error(quick(matmul_bad_rank), "%\\*% only supports vectors/matrices")
+  expect_error(quick(transpose_bad_rank), "t\\(\\) only supports rank 0-2")
+  expect_error(quick(outer_bad_rank), "outer\\(\\) only supports vectors")
+  expect_error(quick(outer_missing), "outer\\(\\) expects X and Y")
+  expect_error(quick(forward_k), "forwardsolve\\(\\) does not support k")
+  expect_error(quick(back_bad_upper), "only supports literal upper\\.tri")
+  expect_error(quick(back_bad_A), "triangular solve expects a matrix")
+  expect_error(quick(back_bad_B), "triangular solve only supports vector")
+})
+
+test_that("matrix conformability warnings are surfaced", {
+  matmul_warn <- function(A, B, n, m, k) {
+    declare(
+      type(n = integer(1)),
+      type(m = integer(1)),
+      type(k = integer(1)),
+      type(A = double(n, m)),
+      type(B = double(k, n))
+    )
+    A %*% B
+  }
+
+  expect_warning(
+    quick(matmul_warn),
+    "cannot verify conformability in %\\*%"
+  )
+})
diff --git a/tests/testthat/test-matrix-internals.R b/tests/testthat/test-matrix-internals.R
new file mode 100644
index 0000000..fec5191
--- /dev/null
+++ b/tests/testthat/test-matrix-internals.R
@@ -0,0 +1,304 @@
+# Unit tests for matrix-specific internal helpers
+
+test_that("matrix helper dimensions handle scalars and defaults", {
+  expect_identical(quickr:::dim_or_one_from(NULL, 1L), 1L)
+  expect_identical(quickr:::dim_or_one_from(list(3L), 1L), 3L)
+  expect_identical(quickr:::dim_or_one_from(list(3L), 2L), 1L)
+
+  expect_identical(
+    quickr:::matrix_dims_from(0L, NULL, orientation = "matrix"),
+    list(rows = 1L, cols = 1L)
+  )
+  expect_identical(
+    quickr:::matrix_dims_from(1L, list(4L), orientation = "rowvec"),
+    list(rows = 1L, cols = 4L)
+  )
+  expect_identical(
+    quickr:::matrix_dims_from(1L, list(4L), orientation = "colvec"),
+    list(rows = 4L, cols = 1L)
+  )
+})
+
+test_that("symbol_name_or_null recognizes identifiers", {
+  var <- quickr:::Variable("double", list(1L, 1L), name = "x")
+  f_sym <- quickr:::Fortran("x", var, r = quote(x))
+  expect_identical(quickr:::symbol_name_or_null(f_sym), "x")
+
+  f_str <- quickr:::Fortran("x", var)
+  expect_identical(quickr:::symbol_name_or_null(f_str), "x")
+
+  f_expr <- quickr:::Fortran("x + 1", var, r = quote(x + 1))
+  expect_null(quickr:::symbol_name_or_null(f_expr))
+})
+
+test_that("logical_arg_or_default returns NULL defaults", {
+  expect_null(quickr:::logical_arg_or_default(list(), "upper.tri", NULL, "ctx"))
+})
+
+test_that("destination helpers handle NULL and mode mismatches", {
+  expect_invisible(
+    quickr:::assert_dest_dims_compatible(NULL, list(1L), "ctx")
+  )
+
+  dest <- quickr:::Variable("integer", list(1L), name = "out")
+  left <- quickr:::Fortran(
+    "x",
+    quickr:::Variable("double", list(1L), name = "x"),
+    r = quote(x)
+  )
+  right <- quickr:::Fortran(
+    "y",
+    quickr:::Variable("double", list(1L), name = "y"),
+    r = quote(y)
+  )
+
+  expect_false(
+    quickr:::can_use_output(
+      dest,
+      left,
+      right,
+      expected_dims = list(1L),
+      context = "ctx"
+    )
+  )
+})
+
+test_that("unwrap_transpose_arg handles scalar inputs and rank errors", {
+  scope <- quickr:::new_scope(NULL)
+  scope@assign("a", quickr:::Variable("double", name = "a"))
+  scope@assign(
+    "arr",
+    quickr:::Variable("double", list(2L, 2L, 2L), name = "arr")
+  )
+  hoist <- quickr:::new_hoist(scope)
+
+  info <- quickr:::unwrap_transpose_arg(quote(t(a)), scope, hoist = hoist)
+  expect_identical(info$trans, "N")
+  expect_identical(info$value@value@rank, 0L)
+
+  expect_error(
+    quickr:::unwrap_transpose_arg(quote(t(arr)), scope, hoist = hoist),
+    "t\\(\\) only supports rank 0-2 inputs"
+  )
+})
+
+test_that("blas helpers require a hoist environment", {
+  scope <- new.env(parent = emptyenv())
+  var_mat <- quickr:::Variable("double", list(1L, 1L), name = "A")
+  var_vec <- quickr:::Variable("double", list(1L), name = "x")
+  A <- quickr:::Fortran("A", var_mat, r = quote(A))
+  B <- quickr:::Fortran("B", var_mat, r = quote(B))
+  x <- quickr:::Fortran("x", var_vec, r = quote(x))
+
+  expect_error(
+    quickr:::gemm(
+      "N",
+      "N",
+      A,
+      B,
+      1L,
+      1L,
+      1L,
+      1L,
+      1L,
+      1L,
+      scope = scope,
+      hoist = NULL
+    ),
+    "hoist must be a hoist environment"
+  )
+  expect_error(
+    quickr:::gemv(
+      "N",
+      A,
+      x,
+      1L,
+      1L,
+      1L,
+      list(1L, 1L),
+      scope = scope,
+      hoist = NULL
+    ),
+    "hoist must be a hoist environment"
+  )
+  expect_error(
+    quickr:::syrk("N", A, scope = scope, hoist = NULL),
+    "hoist must be a hoist environment"
+  )
+  expect_error(
+    quickr:::outer_mul(x, x, scope = scope, hoist = NULL),
+    "hoist must be a hoist environment"
+  )
+  expect_error(
+    quickr:::triangular_solve(
+      A,
+      x,
+      "L",
+      "N",
+      "N",
+      scope = scope,
+      hoist = NULL
+    ),
+    "hoist must be a hoist environment"
+  )
+})
+
+test_that("blas helpers allocate temporaries when outputs are not reused", {
+  scope <- quickr:::new_scope(NULL)
+  hoist <- quickr:::new_hoist(scope)
+
+  A <- quickr:::Fortran(
+    "A",
+    quickr:::Variable("double", list(2L, 3L), name = "A"),
+    r = quote(A)
+  )
+  x <- quickr:::Fortran(
+    "x",
+    quickr:::Variable("double", list(3L), name = "x"),
+    r = quote(x)
+  )
+
+  out_gemv <- quickr:::gemv(
+    "N",
+    A,
+    x,
+    m = 2L,
+    n = 3L,
+    lda = 2L,
+    out_dims = list(2L, 1L),
+    scope = scope,
+    hoist = hoist
+  )
+  expect_identical(out_gemv@value@dims, list(2L, 1L))
+
+  out_syrk <- quickr:::syrk(
+    trans = "T",
+    X = A,
+    scope = scope,
+    hoist = hoist
+  )
+  expect_identical(out_syrk@value@dims, list(3L, 3L))
+
+  y <- quickr:::Fortran(
+    "y",
+    quickr:::Variable("double", list(4L), name = "y"),
+    r = quote(y)
+  )
+  out_outer <- quickr:::outer_mul(x, y, scope = scope, hoist = hoist)
+  expect_identical(out_outer@value@dims, list(3L, 4L))
+
+  A_tri <- quickr:::Fortran(
+    "L",
+    quickr:::Variable("double", list(2L, 2L), name = "L"),
+    r = quote(L)
+  )
+  b <- quickr:::Fortran(
+    "b",
+    quickr:::Variable("double", list(2L), name = "b"),
+    r = quote(b)
+  )
+  out_tri <- quickr:::triangular_solve(
+    A_tri,
+    b,
+    uplo = "L",
+    trans = "N",
+    diag = "N",
+    scope = scope,
+    hoist = hoist
+  )
+  expect_identical(out_tri@value@dims, list(2L))
+})
+
+test_that("triangular_solve rejects scalar right-hand sides", {
+  scope <- quickr:::new_scope(NULL)
+  hoist <- quickr:::new_hoist(scope)
+
+  A <- quickr:::Fortran(
+    "A",
+    quickr:::Variable("double", list(2L, 2L), name = "A"),
+    r = quote(A)
+  )
+  b_scalar <- quickr:::Fortran(
+    "b",
+    quickr:::Variable("double", name = "b"),
+    r = quote(b)
+  )
+
+  expect_error(
+    quickr:::triangular_solve(
+      A,
+      b_scalar,
+      uplo = "L",
+      trans = "N",
+      diag = "N",
+      scope = scope,
+      hoist = hoist
+    ),
+    "expects a vector or matrix right-hand side"
+  )
+})
+
+test_that("matrix argument inference handles transposes and ranks", {
+  scope <- quickr:::new_scope(NULL)
+  scope@assign("A", quickr:::Variable("double", list(2L, 3L), name = "A"))
+  scope@assign("B", quickr:::Variable("double", list(3L, 4L), name = "B"))
+  scope@assign("v", quickr:::Variable("double", list(3L), name = "v"))
+  scope@assign("s", quickr:::Variable("double", name = "s"))
+  scope@assign(
+    "arr",
+    quickr:::Variable("double", list(2L, 2L, 2L), name = "arr")
+  )
+
+  expect_identical(
+    quickr:::infer_symbol_var(quote(A), scope),
+    scope[["A"]]
+  )
+  expect_null(quickr:::infer_symbol_var(quote(A + 1), scope))
+
+  info_mat <- quickr:::infer_matrix_arg(quote(t(A)), scope)
+  expect_identical(info_mat$trans, "T")
+  expect_identical(info_mat$var, scope[["A"]])
+
+  info_vec <- quickr:::infer_matrix_arg(quote(t(v)), scope)
+  expect_identical(info_vec$trans, "N")
+  expect_identical(info_vec$var@dims, list(1L, 3L))
+
+  info_scalar <- quickr:::infer_matrix_arg(quote(t(s)), scope)
+  expect_identical(info_scalar$trans, "N")
+  expect_identical(info_scalar$var, scope[["s"]])
+
+  expect_null(quickr:::infer_matrix_arg(quote(t(arr)), scope))
+  expect_null(quickr:::infer_matrix_arg(quote(t(missing)), scope))
+  expect_null(quickr:::infer_matrix_arg(quote(missing), scope))
+
+  info_plain <- quickr:::infer_matrix_arg(quote(B), scope)
+  expect_identical(info_plain$trans, "N")
+  expect_identical(info_plain$var, scope[["B"]])
+})
+
+test_that("matrix destination inference covers common shapes", {
+  scope <- quickr:::new_scope(NULL)
+  scope@assign("A", quickr:::Variable("double", list(2L, 3L), name = "A"))
+  scope@assign("B", quickr:::Variable("double", list(3L, 4L), name = "B"))
+  scope@assign("v", quickr:::Variable("double", list(3L), name = "v"))
+  scope@assign(
+    "arr",
+    quickr:::Variable("double", list(2L, 2L, 2L), name = "arr")
+  )
+
+  expect_null(quickr:::infer_dest_matmul(list(quote(A)), scope))
+  expect_null(quickr:::infer_dest_matmul(list(quote(A), quote(missing)), scope))
+  expect_null(quickr:::infer_dest_matmul(list(quote(A), quote(arr)), scope))
+
+  out_mm <- quickr:::infer_dest_matmul(list(quote(A), quote(B)), scope)
+  expect_identical(out_mm@dims, list(2L, 4L))
+
+  out_mv <- quickr:::infer_dest_matmul(list(quote(A), quote(v)), scope)
+  expect_identical(out_mv@dims, list(2L, 1L))
+
+  out_vm <- quickr:::infer_dest_matmul(list(quote(v), quote(B)), scope)
+  expect_identical(out_vm@dims, list(1L, 4L))
+
+  out_t <- quickr:::infer_dest_matmul(list(quote(t(A)), quote(t(B))), scope)
+  expect_identical(out_t@dims, list(3L, 3L))
+})
diff --git a/tests/testthat/test-matrix-mul.R b/tests/testthat/test-matrix-mul.R
new file mode 100644
index 0000000..f2242a8
--- /dev/null
+++ b/tests/testthat/test-matrix-mul.R
@@ -0,0 +1,541 @@
+test_that("matrix multiplication matches R for common shapes", {
+  mat_mat <- function(mat_A, mat_B) {
+    declare(
+      type(mat_A = double(4, 3)),
+      type(mat_B = double(3, 5))
+    )
+    mat_A %*% mat_B
+  }
+
+  mat_mat_square <- function(mat_A, mat_B) {
+    declare(
+      type(mat_A = double(3, 3)),
+      type(mat_B = double(3, 3))
+    )
+    mat_A %*% mat_B
+  }
+
+  vec_mat <- function(vec, mat_A) {
+    declare(
+      type(vec = double(3)),
+      type(mat_A = double(3, 4))
+    )
+    vec %*% mat_A
+  }
+
+  mat_vec <- function(mat_A, vec) {
+    declare(
+      type(mat_A = double(3, 4)),
+      type(vec = double(4))
+    )
+    mat_A %*% vec
+  }
+
+  vec_vec <- function(vec_A, vec_B) {
+    declare(
+      type(vec_A = double(3)),
+      type(vec_B = double(3))
+    )
+    vec_A %*% vec_B
+  }
+
+  set.seed(1)
+  mat_A <- matrix(rnorm(4 * 3), nrow = 4)
+  mat_B <- matrix(rnorm(3 * 5), nrow = 3)
+  mat_sq_A <- matrix(rnorm(3 * 3), nrow = 3)
+  mat_sq_B <- matrix(rnorm(3 * 3), nrow = 3)
+  vec_3 <- rnorm(3)
+  mat_3x4 <- matrix(rnorm(3 * 4), nrow = 3)
+  vec_4 <- rnorm(4)
+
+  expect_quick_equal(mat_mat, list(mat_A = mat_A, mat_B = mat_B))
+  expect_quick_equal(
+    mat_mat_square,
+    list(mat_A = mat_sq_A, mat_B = mat_sq_B)
+  )
+  expect_quick_equal(vec_mat, list(vec = vec_3, mat_A = mat_3x4))
+  expect_quick_equal(mat_vec, list(mat_A = mat_3x4, vec = vec_4))
+  expect_quick_equal(vec_vec, list(vec_A = vec_3, vec_B = vec_3))
+})
+
+test_that("matrix multiplication handles transposed operands", {
+  matmul_t_left <- function(x, y) {
+    declare(
+      type(x = double(4, 3)),
+      type(y = double(4, 5))
+    )
+    t(x) %*% y
+  }
+
+  matmul_t_right <- function(x, y) {
+    declare(
+      type(x = double(4, 3)),
+      type(y = double(5, 3))
+    )
+    x %*% t(y)
+  }
+
+  matmul_t_both <- function(x, y) {
+    declare(
+      type(x = double(4, 3)),
+      type(y = double(5, 4))
+    )
+    t(x) %*% t(y)
+  }
+
+  set.seed(4)
+  x <- matrix(rnorm(4 * 3), nrow = 4)
+  y_left <- matrix(rnorm(4 * 5), nrow = 4)
+  y_right <- matrix(rnorm(5 * 3), nrow = 5)
+  y_both <- matrix(rnorm(5 * 4), nrow = 5)
+
+  expect_quick_equal(matmul_t_left, list(x = x, y = y_left))
+  expect_quick_equal(matmul_t_right, list(x = x, y = y_right))
+  expect_quick_equal(matmul_t_both, list(x = x, y = y_both))
+})
+
+test_that("matrix multiplication handles chained mixes", {
+  chain_mix <- function(a, b, c) {
+    declare(
+      type(a = double(4, 3)),
+      type(b = double(5, 3)),
+      type(c = double(5, 5))
+    )
+    (a %*% t(b)) %*% c + 0.25 * (a %*% t(b))
+  }
+
+  set.seed(7)
+  a <- matrix(rnorm(4 * 3), nrow = 4)
+  b <- matrix(rnorm(5 * 3), nrow = 5)
+  c <- matrix(rnorm(5 * 5), nrow = 5)
+
+  expect_quick_equal(chain_mix, list(a = a, b = b, c = c))
+})
+
+test_that("matrix multiplication handles 1x1 and 1xN/Nx1 shapes", {
+  mat_mat_1x1 <- function(a, b) {
+    declare(
+      type(a = double(1, 1)),
+      type(b = double(1, 1))
+    )
+    a %*% b
+  }
+
+  mat_row_col <- function(row, col) {
+    declare(
+      type(row = double(1, 4)),
+      type(col = double(4, 1))
+    )
+    row %*% col
+  }
+
+  mat_col_row <- function(col, row) {
+    declare(
+      type(col = double(4, 1)),
+      type(row = double(1, 4))
+    )
+    col %*% row
+  }
+
+  set.seed(5)
+  a <- matrix(rnorm(1), nrow = 1)
+  b <- matrix(rnorm(1), nrow = 1)
+  row <- matrix(rnorm(4), nrow = 1)
+  col <- matrix(rnorm(4), nrow = 4)
+
+  expect_quick_equal(mat_mat_1x1, list(a = a, b = b))
+  expect_quick_equal(mat_row_col, list(row = row, col = col))
+  expect_quick_equal(mat_col_row, list(col = col, row = row))
+})
+
+test_that("matrix multiplication handles t(vec) orientation", {
+  tvec_mat <- function(vec, mat_A) {
+    declare(
+      type(vec = double(3)),
+      type(mat_A = double(3, 4))
+    )
+    t(vec) %*% mat_A
+  }
+
+  mat_tvec <- function(mat_A, vec) {
+    declare(
+      type(mat_A = double(4, 1)),
+      type(vec = double(4))
+    )
+    mat_A %*% t(vec)
+  }
+
+  set.seed(6)
+  vec <- rnorm(3)
+  mat_A <- matrix(rnorm(3 * 4), nrow = 3)
+  vec_long <- rnorm(4)
+  mat_B <- matrix(rnorm(4), nrow = 4)
+
+  expect_quick_equal(tvec_mat, list(vec = vec, mat_A = mat_A))
+  expect_quick_equal(mat_tvec, list(mat_A = mat_B, vec = vec_long))
+})
+
+test_that("matrix multiplication handles transposed matrix in vector cases", {
+  vec_tmat <- function(vec, mat_A) {
+    declare(
+      type(vec = double(3)),
+      type(mat_A = double(4, 3))
+    )
+    vec %*% t(mat_A)
+  }
+
+  tmat_vec <- function(mat_A, vec) {
+    declare(
+      type(mat_A = double(4, 3)),
+      type(vec = double(4))
+    )
+    t(mat_A) %*% vec
+  }
+
+  set.seed(15)
+  vec_3 <- rnorm(3)
+  mat_4x3 <- matrix(rnorm(12), nrow = 4)
+  vec_4 <- rnorm(4)
+
+  expect_quick_equal(vec_tmat, list(vec = vec_3, mat_A = mat_4x3))
+  expect_quick_equal(tmat_vec, list(mat_A = mat_4x3, vec = vec_4))
+})
+
+test_that("matrix multiplication errors on non-conformable arguments", {
+  matmul_bad <- function(x, y) {
+    declare(
+      type(x = double(2, 3)),
+      type(y = double(2, 4))
+    )
+    x %*% y
+  }
+
+  x <- matrix(rnorm(2 * 3), nrow = 2)
+  y <- matrix(rnorm(2 * 4), nrow = 2)
+
+  expect_error(matmul_bad(x, y), "non-conformable")
+  expect_error(quick(matmul_bad), "non-conformable arguments in %*%")
+})
+
+test_that("matrix multiplication rejects incompatible destinations", {
+  dest_mismatch <- function() {
+    declare(type(x = double(2)))
+    a <- matrix(1.5, 2L, 2L)
+    x <- a %*% a
+    x
+  }
+
+  expect_error(quick(dest_mismatch), "incompatible rank for %\\*%")
+})
+
+test_that("crossprod and tcrossprod match R", {
+  cross_fun <- function(x, y) {
+    declare(
+      type(x = double(6, 4)),
+      type(y = double(6, 4))
+    )
+    crossprod(x, y)
+  }
+
+  tcross_fun <- function(x, y) {
+    declare(
+      type(x = double(6, 4)),
+      type(y = double(6, 4))
+    )
+    tcrossprod(x, y)
+  }
+
+  set.seed(2)
+  x <- matrix(rnorm(6 * 4), nrow = 6)
+  y <- matrix(rnorm(6 * 4), nrow = 6)
+
+  expect_quick_equal(cross_fun, list(x = x, y = y))
+  expect_quick_equal(tcross_fun, list(x = x, y = y))
+})
+
+test_that("single-argument crossprod/tcrossprod match R", {
+  cross_single <- function(x) {
+    declare(type(x = double(5, 4)))
+    crossprod(x)
+  }
+
+  tcross_single <- function(x) {
+    declare(type(x = double(5, 4)))
+    tcrossprod(x)
+  }
+
+  cross_vec <- function(x) {
+    declare(type(x = double(5)))
+    crossprod(x)
+  }
+
+  tcross_vec <- function(x) {
+    declare(type(x = double(5)))
+    tcrossprod(x)
+  }
+
+  set.seed(3)
+  x <- matrix(rnorm(5 * 4), nrow = 5)
+  v <- rnorm(5)
+
+  expect_quick_equal(cross_single, list(x = x))
+  expect_quick_equal(tcross_single, list(x = x))
+  expect_quick_equal(cross_vec, list(x = v))
+  expect_quick_equal(tcross_vec, list(x = v))
+})
+
+test_that("crossprod and tcrossprod handle vector inputs", {
+  cross_vec_mat <- function(x, y) {
+    declare(
+      type(x = double(4)),
+      type(y = double(4, 3))
+    )
+    crossprod(x, y)
+  }
+
+  cross_vec_vec <- function(x, y) {
+    declare(
+      type(x = double(4)),
+      type(y = double(4))
+    )
+    crossprod(x, y)
+  }
+
+  tcross_vec_vec <- function(x, y) {
+    declare(
+      type(x = double(4)),
+      type(y = double(5))
+    )
+    tcrossprod(x, y)
+  }
+
+  set.seed(13)
+  x <- rnorm(4)
+  y_cross <- matrix(rnorm(12), nrow = 4)
+  y_vec <- rnorm(4)
+  y_vec_long <- rnorm(5)
+
+  expect_quick_equal(cross_vec_mat, list(x = x, y = y_cross))
+  expect_quick_equal(cross_vec_vec, list(x = x, y = y_vec))
+  expect_quick_equal(tcross_vec_vec, list(x = x, y = y_vec_long))
+})
+
+test_that("crossprod rejects incompatible destination dimensions", {
+  crossprod_bad_dest <- function(x) {
+    declare(type(x = double(4, 3)), type(out = double(2, 2)))
+    out <- crossprod(x)
+    out
+  }
+
+  expect_error(
+    quick(crossprod_bad_dest),
+    "assignment target has incompatible dimensions"
+  )
+})
+
+test_that("outer supports multiplication and %o%", {
+  outer_default <- function(x, y) {
+    declare(
+      type(x = double(3)),
+      type(y = double(4))
+    )
+    outer(x, y)
+  }
+
+  outer_mul <- function(x, y) {
+    declare(
+      type(x = double(3)),
+      type(y = double(4))
+    )
+    outer(x, y, "*")
+  }
+
+  outer_op <- function(x, y) {
+    declare(
+      type(x = double(3)),
+      type(y = double(4))
+    )
+    x %o% y
+  }
+
+  set.seed(10)
+  x <- rnorm(3)
+  y <- rnorm(4)
+
+  expect_quick_equal(outer_default, list(x = x, y = y))
+  expect_quick_equal(outer_mul, list(x = x, y = y))
+  expect_quick_equal(outer_op, list(x = x, y = y))
+})
+
+test_that("outer supports scalar inputs", {
+  outer_scalar <- function(x, y) {
+    declare(
+      type(x = double(NA)),
+      type(y = double(3))
+    )
+    out <- outer(x, y)
+    out
+  }
+
+  outer_scalar_op <- function(x, y) {
+    declare(
+      type(x = double(3)),
+      type(y = double(NA))
+    )
+    out <- x %o% y
+    out
+  }
+
+  set.seed(16)
+  x <- 1.25
+  y <- rnorm(3)
+  x_vec <- rnorm(3)
+  y_scalar <- -0.5
+
+  expect_quick_equal(outer_scalar, list(x = x, y = y))
+  expect_quick_equal(outer_scalar_op, list(x = x_vec, y = y_scalar))
+})
+
+test_that("blas operations support preallocated outputs", {
+  matmul_out <- function(A, B) {
+    declare(
+      type(A = double(2, 3)),
+      type(B = double(3, 2)),
+      type(out = double(2, 2))
+    )
+    out <- A %*% B
+    out
+  }
+
+  crossprod_out <- function(x) {
+    declare(
+      type(x = double(4, 3)),
+      type(out = double(3, 3))
+    )
+    out <- crossprod(x)
+    out
+  }
+
+  outer_out <- function(x, y) {
+    declare(
+      type(x = double(2)),
+      type(y = double(3)),
+      type(out = double(2, 3))
+    )
+    out <- outer(x, y)
+    out
+  }
+
+  set.seed(12)
+  A <- matrix(rnorm(2 * 3), nrow = 2)
+  B <- matrix(rnorm(3 * 2), nrow = 3)
+  X <- matrix(rnorm(4 * 3), nrow = 4)
+  x <- rnorm(2)
+  y <- rnorm(3)
+
+  expect_quick_equal(matmul_out, list(A = A, B = B))
+  expect_quick_equal(crossprod_out, list(x = X))
+  expect_quick_equal(outer_out, list(x = x, y = y))
+})
+
+test_that("outer errors on unsupported FUN", {
+  outer_add <- function(x, y) {
+    declare(
+      type(x = double(3)),
+      type(y = double(4))
+    )
+    outer(x, y, "+")
+  }
+
+  set.seed(1)
+  x <- rnorm(3)
+  y <- rnorm(4)
+
+  expect_error(quick(outer_add), "outer\\(\\) only supports FUN = \"\\*\"")
+})
+
+test_that("forwardsolve and backsolve match R", {
+  forward_vec <- function(L, b) {
+    declare(
+      type(L = double(4, 4)),
+      type(b = double(4))
+    )
+    forwardsolve(L, b)
+  }
+
+  forward_mat <- function(L, b) {
+    declare(
+      type(L = double(4, 4)),
+      type(b = double(4, 2))
+    )
+    forwardsolve(L, b)
+  }
+
+  back_vec <- function(U, b) {
+    declare(
+      type(U = double(4, 4)),
+      type(b = double(4))
+    )
+    backsolve(U, b)
+  }
+
+  back_mat <- function(U, b) {
+    declare(
+      type(U = double(4, 4)),
+      type(b = double(4, 2))
+    )
+    backsolve(U, b)
+  }
+
+  back_transpose <- function(U, b) {
+    declare(
+      type(U = double(4, 4)),
+      type(b = double(4))
+    )
+    backsolve(U, b, transpose = TRUE)
+  }
+
+  forward_upper <- function(U, b) {
+    declare(
+      type(U = double(4, 4)),
+      type(b = double(4))
+    )
+    forwardsolve(U, b, upper.tri = TRUE)
+  }
+
+  forward_transpose <- function(L, b) {
+    declare(
+      type(L = double(4, 4)),
+      type(b = double(4))
+    )
+    forwardsolve(L, b, transpose = TRUE)
+  }
+
+  back_lower <- function(L, b) {
+    declare(
+      type(L = double(4, 4)),
+      type(b = double(4))
+    )
+    backsolve(L, b, upper.tri = FALSE)
+  }
+
+  set.seed(11)
+  base <- matrix(rnorm(16), nrow = 4)
+  L <- base
+  L[upper.tri(L)] <- 0
+  diag(L) <- diag(L) + 5
+  U <- base
+  U[lower.tri(U)] <- 0
+  diag(U) <- diag(U) + 5
+  b_vec <- rnorm(4)
+  b_mat <- matrix(rnorm(8), nrow = 4)
+
+  expect_quick_equal(forward_vec, list(L = L, b = b_vec))
+  expect_quick_equal(forward_mat, list(L = L, b = b_mat))
+  expect_quick_equal(back_vec, list(U = U, b = b_vec))
+  expect_quick_equal(back_mat, list(U = U, b = b_mat))
+  expect_quick_equal(back_transpose, list(U = U, b = b_vec))
+  expect_quick_equal(forward_upper, list(U = U, b = b_vec))
+  expect_quick_equal(forward_transpose, list(L = L, b = b_vec))
+  expect_quick_equal(back_lower, list(L = L, b = b_vec))
+})
diff --git a/tests/testthat/test-matrix.R b/tests/testthat/test-matrix.R
index 6f785d1..8f79c32 100644
--- a/tests/testthat/test-matrix.R
+++ b/tests/testthat/test-matrix.R
@@ -62,7 +62,7 @@ test_that("reuse implicit size", {
     cat(c_wrapper)
   })
 
-  n <- 1000
+  n <- 400
   a1 <- as.double(1:n)
   a2 <- matrix(runif(n), n, n)
 
diff --git a/tests/testthat/test-openmp-parallelization.R b/tests/testthat/test-openmp-parallelization.R
index 5f15a13..12283bf 100644
--- a/tests/testthat/test-openmp-parallelization.R
+++ b/tests/testthat/test-openmp-parallelization.R
@@ -161,7 +161,7 @@ test_that("parallel loops show parallelism without large slowdowns", {
   serial <- function(x, n) {
     declare(type(x = double(n)), type(n = integer(1)), type(out = double(n)))
     out <- double(n)
-    iters <- 20L
+    iters <- 12L
     for (i in seq_len(n)) {
       v <- x[i]
       for (k in seq_len(iters)) {
@@ -176,7 +176,7 @@ test_that("parallel loops show parallelism without large slowdowns", {
   parallel <- function(x, n) {
     declare(type(x = double(n)), type(n = integer(1)), type(out = double(n)))
     out <- double(n)
-    iters <- 20L
+    iters <- 12L
     declare(parallel())
     for (i in seq_len(n)) {
       v <- x[i]
@@ -189,7 +189,7 @@ test_that("parallel loops show parallelism without large slowdowns", {
     out
   }
 
-  n <- 2000000L
+  n <- 500000L
   set.seed(1)
   x <- runif(n)
   serial_q <- quick(serial)
@@ -198,7 +198,7 @@ test_that("parallel loops show parallelism without large slowdowns", {
   serial_q(x, n)
   gc()
 
-  reps <- 2L
+  reps <- 1L
   serial_time <- timed_run(serial_q, x, n, reps = reps)
   parallel_time <- withr::with_envvar(
     c(
@@ -234,13 +234,27 @@ test_that("parallel loops show parallelism without large slowdowns", {
     signif(parallel_time$cpu, 3)
   )
 
+  slowdown_factor <- if (identical(Sys.info()[["sysname"]], "Darwin")) {
+    2.5
+  } else {
+    1.5
+  }
+
   if (!anyNA(c(parallel_time$cpu, serial_time$cpu))) {
     cpu_increase <- parallel_time$cpu > serial_time$cpu * 1.1
     elapsed_improve <- parallel_time$elapsed < serial_time$elapsed * 0.95
     expect_true(cpu_increase || elapsed_improve, label = info)
-    expect_lt(parallel_time$elapsed, serial_time$elapsed * 1.5, label = info)
+    expect_lt(
+      parallel_time$elapsed,
+      serial_time$elapsed * slowdown_factor,
+      label = info
+    )
   } else {
-    expect_lt(parallel_time$elapsed, serial_time$elapsed * 1.5, label = info)
+    expect_lt(
+      parallel_time$elapsed,
+      serial_time$elapsed * slowdown_factor,
+      label = info
+    )
   }
 })
 
@@ -249,7 +263,7 @@ test_that("openmp responds to OMP_NUM_THREADS across sessions", {
 
   check_thread_scaling_subprocess(
     label = "iter-map",
-    n = 1000000L,
-    iters = 100L
+    n = 400000L,
+    iters = 80L
   )
 })
diff --git a/tests/testthat/test-openmp-utils.R b/tests/testthat/test-openmp-utils.R
new file mode 100644
index 0000000..5fd303e
--- /dev/null
+++ b/tests/testthat/test-openmp-utils.R
@@ -0,0 +1,63 @@
+test_that("openmp_makevars_lines uses explicit env flags", {
+  withr::local_envvar(c(
+    QUICKR_OPENMP_FFLAGS = "-fopenmp",
+    QUICKR_OPENMP_LIBS = "-fopenmp"
+  ))
+
+  expect_equal(
+    quickr:::openmp_makevars_lines(),
+    c("PKG_FFLAGS += -fopenmp", "PKG_LIBS += -fopenmp")
+  )
+})
+
+test_that("openmp_makevars_lines errors when flags are missing", {
+  local_mocked_bindings(
+    openmp_fflags = function() "",
+    .package = "quickr"
+  )
+
+  expect_error(
+    quickr:::openmp_makevars_lines(),
+    "OpenMP was requested but no OpenMP flags",
+    class = "quickr_openmp_unavailable"
+  )
+})
+
+test_that("openmp_makevars_lines errors when linker flags are missing", {
+  local_mocked_bindings(
+    openmp_fflags = function() "-fopenmp",
+    openmp_link_flags = function(...) "",
+    .package = "quickr"
+  )
+
+  expect_error(
+    quickr:::openmp_makevars_lines(),
+    "OpenMP was requested but no OpenMP linker flags",
+    class = "quickr_openmp_unavailable"
+  )
+})
+
+test_that("openmp_config_value caches toolchain lookups", {
+  cache_env <- environment(quickr:::openmp_config_value)
+  old_cache <- cache_env$cached
+  old_config <- cache_env$quickr_r_cmd_config_value
+  withr::defer(cache_env$cached <- old_cache)
+  withr::defer({
+    if (is.null(old_config)) {
+      rm(quickr_r_cmd_config_value, envir = cache_env)
+    } else {
+      cache_env$quickr_r_cmd_config_value <- old_config
+    }
+  })
+  cache_env$cached <- NULL
+
+  calls <- 0
+  cache_env$quickr_r_cmd_config_value <- function(...) {
+    calls <<- calls + 1
+    "value"
+  }
+
+  expect_equal(quickr:::openmp_config_value("QUICKR_TEST_CACHE"), "value")
+  expect_equal(quickr:::openmp_config_value("QUICKR_TEST_CACHE"), "value")
+  expect_equal(calls, 1)
+})
diff --git a/tests/testthat/test-parallel-declare.R b/tests/testthat/test-parallel-declare.R
index 5af6b69..767c1aa 100644
--- a/tests/testthat/test-parallel-declare.R
+++ b/tests/testthat/test-parallel-declare.R
@@ -161,3 +161,24 @@ test_that("parallel declarations do not cross control flow boundaries", {
     regexp = "parallel\\(\\)/omp\\(\\) must be followed by a for-loop or sapply\\(\\)"
   )
 })
+
+test_that("openmp functions that use BLAS load and run", {
+  openmp_supported_or_skip()
+
+  blas_parallel <- function(x, n) {
+    declare(
+      type(x = double(n, n)),
+      type(n = integer(1)),
+      type(out = double(n, n))
+    )
+    declare(parallel())
+    for (i in seq_len(1L)) {
+      out <- x %*% x
+    }
+    out
+  }
+
+  set.seed(123)
+  x <- matrix(runif(16), nrow = 4)
+  expect_quick_equal(blas_parallel, list(x, 4L))
+})
diff --git a/tests/testthat/test-quick-context.R b/tests/testthat/test-quick-context.R
new file mode 100644
index 0000000..593d10f
--- /dev/null
+++ b/tests/testthat/test-quick-context.R
@@ -0,0 +1,51 @@
+test_that("quick requires explicit names inside packages", {
+  pkg_call <- function() {
+    quick(function(x) {
+      declare(type(x = double(1)))
+      x + 1
+    })
+  }
+  environment(pkg_call) <- asNamespace("stats")
+
+  expect_error(pkg_call(), "must provide a unique `name`", fixed = TRUE)
+})
+
+test_that("quick returns a closure in package context when named", {
+  pkg_call <- function() {
+    quick("pkg_fun", function(x) {
+      declare(type(x = double(1)))
+      x + 1
+    })
+  }
+  environment(pkg_call) <- asNamespace("stats")
+
+  qfn <- pkg_call()
+  expect_true(is.function(qfn))
+})
+
+test_that("quick activates the collector during pkgload load_code", {
+  skip_if_not_installed("pkgload")
+
+  withr::local_envvar(DEVTOOLS_LOAD = "quickr-test")
+  quickr:::collector$get_collected()
+  withr::defer(quickr:::collector$get_collected())
+
+  local_mocked_bindings(
+    dump_collected = function() invisible(NULL),
+    .package = "quickr"
+  )
+  local_mocked_bindings(
+    load_code = function(code) code(),
+    .package = "pkgload"
+  )
+
+  qfn <- pkgload::load_code(function() {
+    quick(function(x) {
+      declare(type(x = double(1)))
+      x + 1
+    })
+  })
+
+  expect_true(is.function(qfn))
+  expect_true(quickr:::collector$is_active())
+})
diff --git a/tests/testthat/test-quick-windows-paths.R b/tests/testthat/test-quick-windows-paths.R
new file mode 100644
index 0000000..1b9f492
--- /dev/null
+++ b/tests/testthat/test-quick-windows-paths.R
@@ -0,0 +1,109 @@
+test_that("quickr_windows_add_dll_paths is a no-op off Windows", {
+  withr::local_envvar(PATH = "path-entry")
+
+  expect_false(
+    isTRUE(quickr:::quickr_windows_add_dll_paths(
+      flags = character(),
+      os_type = "unix"
+    ))
+  )
+  expect_identical(Sys.getenv("PATH"), "path-entry")
+})
+
+test_that("quickr_windows_add_dll_paths adds missing directories on Windows", {
+  temp <- withr::local_tempdir()
+  lib_dir <- file.path(temp, "lib")
+  bin_dir <- file.path(temp, "bin")
+  compiler_dir <- file.path(temp, "compiler")
+  dir.create(lib_dir)
+  dir.create(bin_dir)
+  dir.create(compiler_dir)
+
+  withr::local_envvar(c(
+    PATH = temp,
+    RTOOLS45_HOME = "",
+    RTOOLS44_HOME = "",
+    RTOOLS43_HOME = "",
+    RTOOLS42_HOME = "",
+    RTOOLS40_HOME = "",
+    RTOOLS_HOME = ""
+  ))
+
+  res <- quickr:::quickr_windows_add_dll_paths(
+    flags = c(paste0("-L", lib_dir)),
+    os_type = "windows",
+    config_value = function(...) "",
+    which = function(cmds) setNames(file.path(compiler_dir, cmds), cmds)
+  )
+
+  expect_true(isTRUE(res))
+  path <- Sys.getenv("PATH")
+  path_entries <- strsplit(path, ";", fixed = TRUE)[[1L]]
+  path_entries <- path_entries[nzchar(path_entries)]
+  path_norm <- tolower(normalizePath(
+    path_entries,
+    winslash = "\\",
+    mustWork = FALSE
+  ))
+  bin_sibling <- file.path(lib_dir, "..", "bin")
+  lib_dir_norm <- tolower(normalizePath(
+    lib_dir,
+    winslash = "\\",
+    mustWork = FALSE
+  ))
+  bin_sibling_norm <- tolower(normalizePath(
+    bin_sibling,
+    winslash = "\\",
+    mustWork = FALSE
+  ))
+  bin_dir_norm <- tolower(normalizePath(
+    bin_dir,
+    winslash = "\\",
+    mustWork = FALSE
+  ))
+
+  expect_true(lib_dir_norm %in% path_norm)
+  expect_true(
+    bin_sibling_norm %in% path_norm || bin_dir_norm %in% path_norm
+  )
+  compiler_dir_norm <- tolower(normalizePath(
+    compiler_dir,
+    winslash = "\\",
+    mustWork = FALSE
+  ))
+  expect_true(compiler_dir_norm %in% path_norm)
+})
+
+test_that("quickr_windows_add_dll_paths leaves PATH unchanged when complete", {
+  temp <- withr::local_tempdir()
+  lib_dir <- file.path(temp, "lib")
+  bin_dir <- file.path(temp, "bin")
+  compiler_dir <- file.path(temp, "compiler")
+  dir.create(lib_dir)
+  dir.create(bin_dir)
+  dir.create(compiler_dir)
+
+  base_path <- paste(
+    c(lib_dir, bin_dir, R.home("bin"), compiler_dir),
+    collapse = ";"
+  )
+  withr::local_envvar(c(
+    PATH = base_path,
+    RTOOLS45_HOME = "",
+    RTOOLS44_HOME = "",
+    RTOOLS43_HOME = "",
+    RTOOLS42_HOME = "",
+    RTOOLS40_HOME = "",
+    RTOOLS_HOME = ""
+  ))
+
+  res <- quickr:::quickr_windows_add_dll_paths(
+    flags = c(paste0("-L", lib_dir)),
+    os_type = "windows",
+    config_value = function(...) "",
+    which = function(cmds) setNames(file.path(compiler_dir, cmds), cmds)
+  )
+
+  expect_false(isTRUE(res))
+  expect_identical(Sys.getenv("PATH"), base_path)
+})