Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1018,23 +1018,24 @@ class TestCLIParsing {
"daffodil-cli/src/test/resources/org/apache/daffodil/cli/cli_schema_05.dfdl.xsd"
)

runCLI(args"parse -s $schema -I jdom -TinfosetWalkerSkipMin=0 -TinfosetWalkerSkipMax=0") {
cli =>
// this is not enough data for the scema, which leads to a parse error about insufficient bits
cli.sendBytes(Array[Byte](0, 0, 0, 1), inputDone = true)

// there was a bug Daffodil that is most easily observed using the jdom infoset outputter
// with a non skipping infoset walker. With this setup, when an element fails to parse
// inside a choice dispatch (and no surrounding points of uncertainty) the infoset walker
// could walk into the failed element, which leads to an SDE when using the JDOM infoset
// outputter. This SDE prevents backtracking so we do not see a diagnostic about the
// choice dispatch branch failing. If the bug is fixed, we should never walk into the
// invalid element, we should not get an SDE, and we should get a diagnostic about choice
// dispatch.
cli.expectErr("Parse Error: Choice dispatch branch failed")

// this is the core failure diagnostic, which we see regardless of bug
cli.expectErr("Parse Error: Insufficient bits in data.")
runCLI(
args"parse -s $schema -I jdom -TinfosetWalkerMode=streaming -TinfosetWalkerSkipMin=0 -TinfosetWalkerSkipMax=0"
) { cli =>
// this is not enough data for the scema, which leads to a parse error about insufficient bits
cli.sendBytes(Array[Byte](0, 0, 0, 1), inputDone = true)

// there was a bug Daffodil that is most easily observed using the jdom infoset outputter
// with a non skipping infoset walker. With this setup, when an element fails to parse
// inside a choice dispatch (and no surrounding points of uncertainty) the infoset walker
// could walk into the failed element, which leads to an SDE when using the JDOM infoset
// outputter. This SDE prevents backtracking so we do not see a diagnostic about the
// choice dispatch branch failing. If the bug is fixed, we should never walk into the
// invalid element, we should not get an SDE, and we should get a diagnostic about choice
// dispatch.
cli.expectErr("Parse Error: Choice dispatch branch failed")

// this is the core failure diagnostic, which we see regardless of bug
cli.expectErr("Parse Error: Insufficient bits in data.")
}(ExitCode.ParseError)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,7 @@ class DaffodilDebugger(
private def infosetToString(ie: InfosetElement): String = {
val bos = new java.io.ByteArrayOutputStream()
val xml = new XMLTextInfosetOutputter(bos, pretty = true, minimal = true)
val iw = InfosetWalker(
val iw = StreamingInfosetWalker(
ie.asInstanceOf[DIElement],
xml,
walkHidden = !DebuggerConfig.removeHidden,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ import java.util.concurrent.atomic.AtomicInteger
import java.util.concurrent.atomic.AtomicLong
import scala.collection.mutable.ArrayBuffer

import org.apache.daffodil.api
import org.apache.daffodil.api.infoset.InfosetArray
import org.apache.daffodil.api.infoset.InfosetComplexElement
import org.apache.daffodil.api.infoset.InfosetDocument
import org.apache.daffodil.api.infoset.InfosetElement
import org.apache.daffodil.api.infoset.InfosetOutputter
import org.apache.daffodil.api.infoset.InfosetSimpleElement
import org.apache.daffodil.api.infoset.InfosetTypeException
import org.apache.daffodil.api.metadata.ComplexElementMetadata
Expand All @@ -48,6 +50,7 @@ import org.apache.daffodil.lib.equality.TypeEqual
import org.apache.daffodil.lib.equality.ViewEqual
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.exceptions.ThinException
import org.apache.daffodil.lib.exceptions.ThrowsSDE
import org.apache.daffodil.lib.iapi.DaffodilTunables
import org.apache.daffodil.lib.iapi.Diagnostic
import org.apache.daffodil.lib.iapi.ThinDiagnostic
Expand Down Expand Up @@ -193,6 +196,33 @@ sealed trait DINode {
* Array or Complex exception.
*/
def requireFinal(): Unit

/**
* Eagerly walk the entire subtree rooted at this node, emitting
* start/end events to `outputter` in document order. Hidden nodes are
* skipped. The walk is complete and blocking — all events for this node
* and its descendants are emitted before the method returns.
*
* Used by [[NonStreamingInfosetWalker]] to project the whole infoset in
* one pass after parsing is finished.
*/
def walk(outputter: api.infoset.InfosetOutputter): Unit
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a comment that documents that this is an alternative to using the InfosetWalker and that the two are not compatible. A single parse must either call InfosetWalker.walk or DINode.walk, but never combined since the two methods are incompatible.


protected def doOutputter(outputterFunc: => Unit, desc: String, context: ThrowsSDE): Unit = {
try {
outputterFunc
} catch {
case e: Exception => {
// FIXME: DAFFODIL-2884 This escalates a parser data exception to an SDE
// Which breaks if string-as-xml encounters a string that is malformed XML.
// We get the error thrown by the xml parser here outside of parsing, which is
// too late.
val cause = e.getCause
val msg = if (cause == null) e.toString else cause.toString
context.SDE("Failed to %s: %s", desc, msg)
}
}
}
}

/**
Expand Down Expand Up @@ -1313,6 +1343,16 @@ final class DIArray(
}
}
}

override def walk(outputter: InfosetOutputter): Unit = {
if (!isHidden) {
doOutputter(outputter.startArray(this), "start infoset array", erd)
_contents.foreach { child =>
child.walk(outputter)
}
doOutputter(outputter.endArray(this), "end infoset array", erd)
}
}
}

/**
Expand Down Expand Up @@ -1666,6 +1706,13 @@ sealed class DISimple(override val erd: ElementRuntimeData)
}

override def getObject: Object = getAnyRef

override def walk(outputter: api.infoset.InfosetOutputter): Unit = {
if (!isHidden) {
doOutputter(outputter.startSimple(this), "start infoset simple element", erd)
doOutputter(outputter.endSimple(this), "end infoset simple element", erd)
}
}
}

/**
Expand Down Expand Up @@ -1710,7 +1757,7 @@ sealed class DIComplex(override val erd: ElementRuntimeData)
if (!isFinal) throw nfe
}

private val childNodes = new ArrayBuffer[DINode]
protected val childNodes = new ArrayBuffer[DINode]

private lazy val nameToChildNodeLookup =
new java.util.HashMap[NamedQName, ArrayBuffer[DINode]]
Expand Down Expand Up @@ -2008,6 +2055,15 @@ sealed class DIComplex(override val erd: ElementRuntimeData)
}
}

override def walk(outputter: InfosetOutputter): Unit = {
if (!isHidden) {
doOutputter(outputter.startComplex(this), "start infoset complex element", erd)
childNodes.foreach { child =>
child.walk(outputter)
}
doOutputter(outputter.endComplex(this), "end infoset complex element", erd)
}
}
}

/*
Expand All @@ -2022,6 +2078,14 @@ final class DIDocument(erd: ElementRuntimeData) extends DIComplex(erd) with Info
* a constant value
*/
var isCompileExprFalseRoot: Boolean = false

override def walk(outputter: InfosetOutputter): Unit = {
doOutputter(outputter.startDocument(), "start infoset document", erd)
childNodes.foreach { child =>
child.walk(outputter)
}
doOutputter(outputter.endDocument(), "end infoset document", erd)
}
}

object Infoset {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,85 @@ import org.apache.daffodil.lib.exceptions.ThrowsSDE
import org.apache.daffodil.lib.util.MStackOf
import org.apache.daffodil.lib.util.MStackOfInt

object InfosetWalker {
/**
* Walks Daffodil's internal infoset representation (DINodes) and emits
* start/end events to an [[api.infoset.InfosetOutputter]], which projects the
* infoset to the caller's desired format (XML, JSON, SAX, etc.).
*
* Two concrete implementations exist, selectable via the `infosetWalkerMode`
* tunable:
*
* - [[StreamingInfosetWalker]] (`infosetWalkerMode = "streaming"`): emits events
* incrementally as elements are finalized during parsing. Keeps memory usage
* bounded for large or deeply-nested infosets, but incurs overhead from
* repeated speculative walk attempts.
*
* - [[NonStreamingInfosetWalker]] (`infosetWalkerMode = "nonStreaming"`, default):
* defers all output until the entire infoset is available, then walks it in
* one pass. Faster for schemas where the infoset fits comfortably in memory,
* because it avoids the overhead of incremental walk attempts.
*
* Callers invoke [[walk]] periodically during parsing. When `lastWalk = true`
* the walker must flush any remaining events before returning. [[isFinished]]
* returns `true` once the entire infoset has been walked.
*/
trait InfosetWalker {

/**
* The outputter to which events are written.
*/
def outputter: api.infoset.InfosetOutputter

/**
* Returns `true` once the entire infoset has been walked and all events have
* been emitted. Calling [[walk]] after this is an error.
*/
def isFinished: Boolean

/**
* Take zero or more steps in the infoset, emitting events to [[outputter]].
*
* A single call is not guaranteed to walk the entire infoset in some
* implementations, as the walker may pause (e.g. because parsing has
* not yet finalized the next element). In those instances, the caller should
* invoke this periodically and check [[isFinished]].
*
* @param lastWalk `true` if this is the final call; the walker must emit all
* remaining events before returning.
*/
def walk(lastWalk: Boolean = false): Unit
}

/**
* An [[InfosetWalker]] that defers all output until the parse is complete,
* then walks the entire infoset in a single pass when `walk(lastWalk = true)`
* is called. Intermediate `walk()` calls are no-ops.
*
* This is the default walker (tunable `infosetWalkerMode = "nonStreaming"`).
* It is faster than [[StreamingInfosetWalker]] for most schemas because it
* avoids the overhead of repeated speculative walk attempts, at the cost of
* holding the full infoset in memory until parsing finishes. For very large
* infosets or memory-constrained environments, prefer [[StreamingInfosetWalker]].
*
* @param root The root [[DIElement]] of the infoset to walk.
* @param outputter The [[api.infoset.InfosetOutputter]] that receives events.
*/
class NonStreamingInfosetWalker(root: DIElement, val outputter: api.infoset.InfosetOutputter)
extends InfosetWalker {

private var finished: Boolean = false

override def isFinished: Boolean = finished

def walk(lastWalk: Boolean = false): Unit = {
if (lastWalk) {
root.walk(outputter)
finished = true
}
}
}

object StreamingInfosetWalker {

/**
* Create an infoset walker starting with a specified DINode. If the caller
Expand Down Expand Up @@ -79,7 +157,7 @@ object InfosetWalker {
releaseUnneededInfoset: Boolean,
walkSkipMin: Int = 32,
walkSkipMax: Int = 2048
): InfosetWalker = {
): StreamingInfosetWalker = {

// Determine the container of the root node and the index in which it
// appears in that node
Expand All @@ -99,7 +177,7 @@ object InfosetWalker {
(container, container.indexOf(root))
}
}
new InfosetWalker(
new StreamingInfosetWalker(
startingContainerNode,
startingContainerIndex,
outputter,
Expand Down Expand Up @@ -173,7 +251,7 @@ object InfosetWalker {
* and increases the number of walk() calls to skip before trying again. This
* defines the maximum number of skiped calls, even as this number increases.
*/
class InfosetWalker private (
class StreamingInfosetWalker private (
startingContainerNode: DINode,
startingContainerIndex: Int,
val outputter: api.infoset.InfosetOutputter,
Expand All @@ -182,7 +260,7 @@ class InfosetWalker private (
releaseUnneededInfoset: Boolean,
walkSkipMin: Int,
walkSkipMax: Int
) {
) extends InfosetWalker {

/**
* These two pieces of mutable state are all that is needed to keep track of
Expand Down Expand Up @@ -227,10 +305,7 @@ class InfosetWalker private (

private var finished = false

/**
* Determine if the walker has finished walking.
*/
def isFinished = finished
override def isFinished = finished

/**
* The following variables are used to determine when to skip the walk()
Expand Down Expand Up @@ -269,7 +344,7 @@ class InfosetWalker private (
* walk() will be called, the lastWalk parameter should be set to true, which
* will cause walk() to not skip any steps.
*/
def walk(lastWalk: Boolean = false): Unit = {
override def walk(lastWalk: Boolean = false): Unit = {
Assert.usage(!finished)

if (walkSkipRemaining > 0 && !lastWalk) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import org.apache.daffodil.lib.exceptions.Abort
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.exceptions.ThrowsSDE
import org.apache.daffodil.lib.iapi.DaffodilTunables
import org.apache.daffodil.lib.iapi.InfosetWalkerMode
import org.apache.daffodil.lib.util.MStack
import org.apache.daffodil.lib.util.MStackOf
import org.apache.daffodil.lib.util.MStackOfInt
Expand All @@ -52,6 +53,8 @@ import org.apache.daffodil.runtime1.infoset.DISimpleState
import org.apache.daffodil.runtime1.infoset.DataValue.DataValuePrimitive
import org.apache.daffodil.runtime1.infoset.Infoset
import org.apache.daffodil.runtime1.infoset.InfosetWalker
import org.apache.daffodil.runtime1.infoset.NonStreamingInfosetWalker
import org.apache.daffodil.runtime1.infoset.StreamingInfosetWalker
import org.apache.daffodil.runtime1.processors.DataLoc
import org.apache.daffodil.runtime1.processors.DataProcessor
import org.apache.daffodil.runtime1.processors.ElementRuntimeData
Expand Down Expand Up @@ -750,15 +753,19 @@ object PState {
val diagnostics = Nil
val mutablePState = MPState()
val tunables = dataProc.tunables
val infosetWalker = InfosetWalker(
doc.asInstanceOf[DIElement],
output,
walkHidden = false,
ignoreBlocks = false,
releaseUnneededInfoset = !areDebugging && tunables.releaseUnneededInfoset,
walkSkipMin = tunables.infosetWalkerSkipMin,
walkSkipMax = tunables.infosetWalkerSkipMax
)
val infosetWalker = if (tunables.infosetWalkerMode == InfosetWalkerMode.Streaming) {
StreamingInfosetWalker(
doc.asInstanceOf[DIElement],
output,
walkHidden = false,
ignoreBlocks = false,
releaseUnneededInfoset = !areDebugging && tunables.releaseUnneededInfoset,
walkSkipMin = tunables.infosetWalkerSkipMin,
walkSkipMax = tunables.infosetWalkerSkipMax
)
} else {
new NonStreamingInfosetWalker(doc.asInstanceOf[DIElement], output)
}

dis.cst.setPriorBitOrder(root.defaultBitOrder)
val newState = new PState(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ abstract class SequenceParserBase(
// should not increment the group index.
pstate.mpstate.moveOverOneGroupIndexOnly()
}

// we might have added a new instance to the array. Attempt to project it to an
// infoset if there are no PoU's or anything blocking it
pstate.walker.walk()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ object TestInfosetFree {

val compiler = Compiler()
.withTunable("releaseUnneededInfoset", "false")
.withTunable("infosetWalkerMode", "streaming")

val pf = compiler.compileNode(schema)
if (pf.isError) {
Expand Down Expand Up @@ -92,7 +93,7 @@ object TestInfosetFree {
val detailedOutputter =
new ScalaXMLInfosetOutputter(showFreedInfo = true)

val infosetWalker = InfosetWalker(
val infosetWalker = StreamingInfosetWalker(
doc,
detailedOutputter,
walkHidden = true, // let's ensure any hidden elements are free
Expand Down
Loading
Loading