Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1018,23 +1018,24 @@ class TestCLIParsing {
"daffodil-cli/src/test/resources/org/apache/daffodil/cli/cli_schema_05.dfdl.xsd"
)

runCLI(args"parse -s $schema -I jdom -TinfosetWalkerSkipMin=0 -TinfosetWalkerSkipMax=0") {
cli =>
// this is not enough data for the scema, which leads to a parse error about insufficient bits
cli.sendBytes(Array[Byte](0, 0, 0, 1), inputDone = true)

// there was a bug Daffodil that is most easily observed using the jdom infoset outputter
// with a non skipping infoset walker. With this setup, when an element fails to parse
// inside a choice dispatch (and no surrounding points of uncertainty) the infoset walker
// could walk into the failed element, which leads to an SDE when using the JDOM infoset
// outputter. This SDE prevents backtracking so we do not see a diagnostic about the
// choice dispatch branch failing. If the bug is fixed, we should never walk into the
// invalid element, we should not get an SDE, and we should get a diagnostic about choice
// dispatch.
cli.expectErr("Parse Error: Choice dispatch branch failed")

// this is the core failure diagnostic, which we see regardless of bug
cli.expectErr("Parse Error: Insufficient bits in data.")
runCLI(
args"parse -s $schema -I jdom -TinfosetWalkerMode=streaming -TinfosetWalkerSkipMin=0 -TinfosetWalkerSkipMax=0"
) { cli =>
// this is not enough data for the scema, which leads to a parse error about insufficient bits
cli.sendBytes(Array[Byte](0, 0, 0, 1), inputDone = true)

// there was a bug Daffodil that is most easily observed using the jdom infoset outputter
// with a non skipping infoset walker. With this setup, when an element fails to parse
// inside a choice dispatch (and no surrounding points of uncertainty) the infoset walker
// could walk into the failed element, which leads to an SDE when using the JDOM infoset
// outputter. This SDE prevents backtracking so we do not see a diagnostic about the
// choice dispatch branch failing. If the bug is fixed, we should never walk into the
// invalid element, we should not get an SDE, and we should get a diagnostic about choice
// dispatch.
cli.expectErr("Parse Error: Choice dispatch branch failed")

// this is the core failure diagnostic, which we see regardless of bug
cli.expectErr("Parse Error: Insufficient bits in data.")
}(ExitCode.ParseError)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@ import java.util.concurrent.atomic.AtomicInteger
import java.util.concurrent.atomic.AtomicLong
import scala.collection.mutable.ArrayBuffer

import org.apache.daffodil.api
import org.apache.daffodil.api.infoset.InfosetArray
import org.apache.daffodil.api.infoset.InfosetComplexElement
import org.apache.daffodil.api.infoset.InfosetDocument
import org.apache.daffodil.api.infoset.InfosetElement
import org.apache.daffodil.api.infoset.InfosetOutputter
import org.apache.daffodil.api.infoset.InfosetSimpleElement
import org.apache.daffodil.api.infoset.InfosetTypeException
import org.apache.daffodil.api.metadata.ComplexElementMetadata
Expand All @@ -48,6 +50,7 @@ import org.apache.daffodil.lib.equality.TypeEqual
import org.apache.daffodil.lib.equality.ViewEqual
import org.apache.daffodil.lib.exceptions.Assert
import org.apache.daffodil.lib.exceptions.ThinException
import org.apache.daffodil.lib.exceptions.ThrowsSDE
import org.apache.daffodil.lib.iapi.DaffodilTunables
import org.apache.daffodil.lib.iapi.Diagnostic
import org.apache.daffodil.lib.iapi.ThinDiagnostic
Expand Down Expand Up @@ -193,6 +196,24 @@ sealed trait DINode {
* Array or Complex exception.
*/
def requireFinal(): Unit

def walk(outputter: api.infoset.InfosetOutputter): Unit
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a comment that documents that this is an alternative to using the InfosetWalker and that the two are not compatible. A single parse must either call InfosetWalker.walk or DINode.walk, but never combined since the two methods are incompatible.


protected def doOutputter(outputterFunc: => Unit, desc: String, context: ThrowsSDE): Unit = {
try {
outputterFunc
} catch {
case e: Exception => {
// FIXME: DAFFODIL-2884 This escalates a parser data exception to an SDE
// Which breaks if string-as-xml encounters a string that is malformed XML.
// We get the error thrown by the xml parser here outside of parsing, which is
// too late.
val cause = e.getCause
val msg = if (cause == null) e.toString else cause.toString
context.SDE("Failed to %s: %s", desc, msg)
}
}
}
}

/**
Expand Down Expand Up @@ -1313,6 +1334,16 @@ final class DIArray(
}
}
}

override def walk(outputter: InfosetOutputter): Unit = {
if (!isHidden) {
doOutputter(outputter.startArray(this), "start infoset array", erd)
_contents.foreach { child =>
child.walk(outputter)
}
doOutputter(outputter.endArray(this), "end infoset array", erd)
}
}
}

/**
Expand Down Expand Up @@ -1666,6 +1697,13 @@ sealed class DISimple(override val erd: ElementRuntimeData)
}

override def getObject: Object = getAnyRef

override def walk(outputter: api.infoset.InfosetOutputter): Unit = {
if (!isHidden) {
doOutputter(outputter.startSimple(this), "start infoset simple element", erd)
doOutputter(outputter.endSimple(this), "end infoset simple element", erd)
}
}
}

/**
Expand Down Expand Up @@ -1710,7 +1748,7 @@ sealed class DIComplex(override val erd: ElementRuntimeData)
if (!isFinal) throw nfe
}

private val childNodes = new ArrayBuffer[DINode]
protected val childNodes = new ArrayBuffer[DINode]

private lazy val nameToChildNodeLookup =
new java.util.HashMap[NamedQName, ArrayBuffer[DINode]]
Expand Down Expand Up @@ -2008,6 +2046,15 @@ sealed class DIComplex(override val erd: ElementRuntimeData)
}
}

override def walk(outputter: InfosetOutputter): Unit = {
if (!isHidden) {
doOutputter(outputter.startComplex(this), "start infoset complex element", erd)
childNodes.foreach { child =>
child.walk(outputter)
}
doOutputter(outputter.endComplex(this), "end infoset complex element", erd)
}
}
}

/*
Expand All @@ -2022,6 +2069,14 @@ final class DIDocument(erd: ElementRuntimeData) extends DIComplex(erd) with Info
* a constant value
*/
var isCompileExprFalseRoot: Boolean = false

override def walk(outputter: InfosetOutputter): Unit = {
doOutputter(outputter.startDocument(), "start infoset document", erd)
childNodes.foreach { child =>
child.walk(outputter)
}
doOutputter(outputter.endDocument(), "end infoset document", erd)
}
}

object Infoset {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import org.apache.daffodil.api.validation.ValidatorInitializationException
import org.apache.daffodil.api.validation.Validators
import org.apache.daffodil.lib.equality.*
import org.apache.daffodil.lib.iapi.DaffodilTunables
import org.apache.daffodil.lib.iapi.InfosetWalkerMode
import org.apache.daffodil.lib.iapi.WithDiagnostics
import org.apache.daffodil.runtime1.dsom.*
import org.apache.daffodil.runtime1.iapi.DFDL
Expand Down Expand Up @@ -386,11 +387,15 @@ class DataProcessor(
state.setMaybeProcessor(Maybe(p))

if (state.processorStatus == Success) {
// At this point all infoset nodes have been set final, all PoUs
// resolved, and all infoset walker blocks released. Do one last walk
// to project any unwalked elements to the target infoset
state.walker.walk(lastWalk = true)
Assert.invariant(state.walker.isFinished)
if (tunables.infosetWalkerMode == InfosetWalkerMode.NonStreaming) {
state.infoset.walk(state.output)
} else {
// At this point all infoset nodes have been set final, all PoUs
// resolved, and all infoset walker blocks released. Do one last walk
// to project any unwalked elements to the target infoset
state.walker.walk(lastWalk = true)
Assert.invariant(state.walker.isFinished)
}
}
} catch {
// We will actually be handling all errors in the outer loop
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,6 @@ abstract class SequenceParserBase(
// should not increment the group index.
pstate.mpstate.moveOverOneGroupIndexOnly()
}

// we might have added a new instance to the array. Attempt to project it to an
// infoset if there are no PoU's or anything blocking it
pstate.walker.walk()

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we still want these walker.walk() calls, they should just be gated on the value of the new tunable being streaming. That way if streaming mode is enabled we still try walk the infoset in the middle of parsing to output whatever parses of the infoset are marked as final.

} // end while for each repeat
parser.endArray(pstate)
parser.arrayCompleteChecks(pstate, resultOfTry, priorResultOfTry)
Expand Down Expand Up @@ -312,10 +307,6 @@ abstract class SequenceParserBase(
} // end case scalarParser
} // end match case parser

// we finished parsing one whole thing (scalar element, entire array, etc). Attempt to
// project it to an infoset if there are no PoU's or anything blocking it
pstate.walker.walk()

scpIndex += 1

} // end while for each sequence child parser
Expand All @@ -330,10 +321,6 @@ abstract class SequenceParserBase(
// that we incremented above. This will allow the infoset walker to walk
// into the new children that are now in the correct order.
pstate.infoset.infosetWalkerBlockCount -= 1

// we've unblocked the unordered sequence, try walking to output
// everything we've created
pstate.walker.walk()
}

if (child ne null) child.sequenceCompleteChecks(pstate, resultOfTry, priorResultOfTry)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ object TestInfosetFree {

val compiler = Compiler()
.withTunable("releaseUnneededInfoset", "false")
.withTunable("infosetWalkerMode", "streaming")

val pf = compiler.compileNode(schema)
if (pf.isError) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,17 @@
</xs:restriction>
</xs:simpleType>
</xs:element>
<xs:element name="infosetWalkerMode" type="daf:TunableInfosetWalkerMode" default="nonStreaming" minOccurs="0">
<xs:annotation>
<xs:documentation>
Daffodil can periodically walk the internal infoset to send events to the configured
InfosetOutputter (streaming) or it can walk the internal infoset once at the end of
parsing (nonStreaming). The idea being that simple schemas would benefit from the
nonStreaming infoset walker, while more complex schemas with lots of points of
uncertaintly would benefit from the streaming infoset walker.
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think schemas with PoU's are actual also likely to benefit with non-streaming mode, this is because PoU's tend to make it so the infoset walker can't do any work because the walker can't walk into something if there's a PoU where we might backtrack and create a different infoset. So streaming with lots of PoUs is likely to just lead to to attempts to walk that don't do anything.

I think the main situation where someone would want to use streaming mode is when the infoset is likely to be very large or when memory is constrained.

Note that this is because the main benefit of streaming mode is that it allows parts of the internal infoset that we know won't be changed to be sent to the outputter and garbage collected in the middle of a parse, which frees up memory while parsing. But if there is no real memory pressure, I imagine in most cases non-streaming will be faster or the same. I don't think we need to mention these details, just giving some background.

</xs:documentation>
</xs:annotation>
</xs:element>
<xs:element name="infosetWalkerSkipMin" default="32" minOccurs="0">
<xs:annotation>
<xs:documentation>
Expand Down Expand Up @@ -780,6 +791,13 @@
</xs:list>
</xs:simpleType>

<xs:simpleType name="TunableInfosetWalkerMode">
<xs:restriction base="xs:string">
<xs:enumeration value="streaming" />
<xs:enumeration value="nonStreaming" />
</xs:restriction>
</xs:simpleType>

<xs:element name="dfdlConfig">
<xs:complexType>
<xs:sequence>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
-->
<tdml:defineConfig name="cfg_infosetWalker_01">
<daf:tunables>
<daf:infosetWalkerMode>streaming</daf:infosetWalkerMode>
<daf:infosetWalkerSkipMin>0</daf:infosetWalkerSkipMin>
</daf:tunables>
</tdml:defineConfig>
Expand All @@ -41,11 +42,19 @@
-->
<tdml:defineConfig name="cfg_infosetWalker_02">
<daf:tunables>
<daf:infosetWalkerMode>streaming</daf:infosetWalkerMode>
<daf:infosetWalkerSkipMin>0</daf:infosetWalkerSkipMin>
<daf:releaseUnneededInfoset>false</daf:releaseUnneededInfoset>
</daf:tunables>
</tdml:defineConfig>

<!--uses streaming walker -->
<tdml:defineConfig name="cfg_infosetWalker_03">
<daf:tunables>
<daf:infosetWalkerMode>streaming</daf:infosetWalkerMode>
</daf:tunables>
</tdml:defineConfig>


<tdml:defineSchema name="schema_01" elementFormDefault="unqualified">
<xs:include schemaLocation="/org/apache/daffodil/xsd/DFDLGeneralFormat.dfdl.xsd" />
Expand All @@ -65,7 +74,7 @@

</tdml:defineSchema>

<tdml:parserTestCase name="infosetWalker_01" model="schema_01">
<tdml:parserTestCase name="infosetWalker_01" model="schema_01" config="cfg_infosetWalker_01">
<tdml:document>
<tdml:documentPart type="text">|header;body1;body2;body3;|</tdml:documentPart>
</tdml:document>
Expand Down Expand Up @@ -113,5 +122,21 @@
</tdml:infoset>
</tdml:parserTestCase>

<!--uses non-streaming walker -->
<tdml:parserTestCase name="infosetWalker_04" model="schema_01">
<tdml:document>
<tdml:documentPart type="text">|header;body1;body2;body3;|</tdml:documentPart>
</tdml:document>
<tdml:infoset>
<tdml:dfdlInfoset>
<ex:root_01 xmlns:ex="http://example.com">
<first>header</first>
<field>body1</field>
<field>body2</field>
<field>body3</field>
</ex:root_01>
</tdml:dfdlInfoset>
</tdml:infoset>
</tdml:parserTestCase>

</tdml:testSuite>
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@

<tdml:defineConfig name="infosetWalkerNoSkip">
<daf:tunables>
<daf:infosetWalkerMode>streaming</daf:infosetWalkerMode>
<daf:infosetWalkerSkipMin>0</daf:infosetWalkerSkipMin>
</daf:tunables>
</tdml:defineConfig>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,6 @@ class TestInfosetWalker extends TdmlTests {
// DAFFODIL-2755
@Test def infosetWalker_02 = test
@Test def infosetWalker_03 = test
// DAFFODIL-3070
@Test def infosetWalker_04 = test
}
Loading