diff --git a/build.sbt b/build.sbt index 38d910eacb..b9f9801745 100644 --- a/build.sbt +++ b/build.sbt @@ -247,6 +247,10 @@ def buildJavacOptions() = { "-deprecation", "-Xlint:dep-ann", "-Xlint:unchecked", + // Preserve Java parameter names in bytecode so Daffodil's reflection-based + // layer parameter resolution can match method parameters to DFDL variables + // by name (otherwise they appear as arg0/arg1/...). + "-parameters", "--release", minSupportedJavaVersion ) diff --git a/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/ConfigurableGZIPOutputStream.java b/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/ConfigurableGZIPOutputStream.java new file mode 100644 index 0000000000..6b042fb645 --- /dev/null +++ b/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/ConfigurableGZIPOutputStream.java @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.daffodil.layers.runtime1; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.zip.GZIPOutputStream; + +/** + * A {@link GZIPOutputStream} subclass that allows the caller to specify the + * compression level explicitly. The standard {@code GZIPOutputStream} uses + * the JDK's default level (which zlib resolves to level 6), with no + * mechanism to choose a different level. This subclass gives callers control + * over the speed-vs-compression-ratio trade-off that the gzip format supports. + * + *
According to the zlib documentation and the logic in {@code deflate.c}, + * compression levels range from 0 to 9: + *
As the compression level increases, encoding speed decreases. Some + * reports indicate that level 9 is approximately 10x slower than the default + * for only about 16% additional compression. + */ +public final class ConfigurableGZIPOutputStream extends GZIPOutputStream { + + /** + * Creates a {@code ConfigurableGZIPOutputStream} that writes compressed + * data to the given output stream at the specified compression level. + * + * @param out the output stream to write compressed data to + * @param level the compression level; an integer in the range 0-9, + * or {@link java.util.zip.Deflater#DEFAULT_COMPRESSION} + * ({@code -1}) for the default + * @throws IOException if an I/O error occurs + * @throws IllegalArgumentException if {@code level} is not a valid compression level + */ + public ConfigurableGZIPOutputStream(OutputStream out, int level) throws IOException { + super(out); + this.def.setLevel(level); + } +} \ No newline at end of file diff --git a/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/GZipLayer.java b/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/GZipLayer.java index df37f95ff9..bfab2b3373 100644 --- a/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/GZipLayer.java +++ b/daffodil-core/src/main/java/org/apache/daffodil/layers/runtime1/GZipLayer.java @@ -22,130 +22,50 @@ import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.Objects; import java.util.zip.GZIPInputStream; -import java.util.zip.GZIPOutputStream; +import java.util.zip.Deflater; public final class GZipLayer extends Layer { - - private static Boolean fixNeeded = null; - - public static boolean fixIsNeeded() { - if (Objects.isNull(fixNeeded)) { - // prior to java 16 - String versionString = System.getProperty("java.version"); - - // Extract the major version using string manipulation - int majorVersion; - if (versionString.startsWith("1.8")) { - majorVersion = 8; - } else { - String[] parts = versionString.split("\\."); - assert (parts.length > 0); - majorVersion = Integer.parseInt(parts[0]); - } - fixNeeded = (majorVersion < 16); - } - return fixNeeded; - } - + + private int compressionLevel = Deflater.DEFAULT_COMPRESSION; + public GZipLayer() { super("gzip", "urn:org.apache.daffodil.layers.gzip"); // convert IOExceptions to Processing Errors setProcessingErrorException(IOException.class); } - @Override - public InputStream wrapLayerInput(InputStream jis) throws Exception { - return new GZIPInputStream(jis); - } - - @Override - public OutputStream wrapLayerOutput(OutputStream jos) throws Exception { - OutputStream fixedOS = fixIsNeeded() ? new GZIPFixedOutputStream(jos) : jos; - return new GZIPOutputStream(fixedOS); - } - -} - -/** - * Prior to Java 16, the java.util.zip.GZIPOutputStream wrote a value of zero for - * the OS field in the header (byte index 9). - * In Java 16, this was changed to a - * value of 255 to better abide by the GZIP specification. Unfortunately, this - * means unparsed data using a GZIP layer might have a single byte difference, - * depending on the Java version used. This can lead to inconsistent behavior of - * test failures that expect a certain byte value. - *
- * To resolve this issue, we create this GZIPFixedOutputStream. This should wrap - * the underlying OutputStream and be passed as the OutputStream to the - * GZIPOutputStream. When the GZIPOutputStream writes the 9th byte to this - * GZIPFixedOutputStream, this will always write a value of 255, making all Java - * versions prior to 16 consistent with Java 16+ behavior. - */ -class GZIPFixedOutputStream extends OutputStream { - - private final OutputStream os; - - public GZIPFixedOutputStream(OutputStream os) { - this.os = os; - } - /** - * The next byte position that byte will be written to. If this is negative, - * that means we have already fixed the output and everything should just - * pass straight through. + * Provides the layer's parameter variables to the layer. + * + *
The compression level controls the trade-off between encoding speed
+ * and output size when gzip-compressing data during unparsing. Higher
+ * levels produce smaller output but take longer to encode.
+ *
+ * @param compressionLevel an integer specifying the gzip compression level
+ * to use when unparsing. Valid values are 0 through
+ * 9, where 0 means no compression and 9 means maximum
+ * compression, or {@link Deflater#DEFAULT_COMPRESSION}
+ * ({@code -1}) to use the underlying zlib library's
+ * default (level 6).
*/
- private int bytePosition = 0;
-
- @Override
- public void close() throws IOException {
- os.close();
+ public void setLayerVariableParameters(int compressionLevel) {
+ boolean isValidRange = (compressionLevel >= 0 && compressionLevel <= 9)
+ || compressionLevel == Deflater.DEFAULT_COMPRESSION;
+ if (!isValidRange)
+ runtimeSchemaDefinitionError("Invalid compression level: " + compressionLevel);
+
+ this.compressionLevel = compressionLevel;
}
-
+
@Override
- public void flush() throws IOException {
- os.flush();
+ public InputStream wrapLayerInput(InputStream jis) throws Exception {
+ return new GZIPInputStream(jis);
}
@Override
- public void write(byte[] b, int off, int len) throws IOException {
- if (bytePosition < 0) {
- // The bad byte has been fixed, pass all writes directly through to the
- // underlying OutputStream. This may be more efficient than the default
- // OutputStream write() function, which writes the bytes from this array
- // one at a time
- os.write(b, off, len);
- } else {
- // The bad byte has not been fixed yet. Unless a newer version of Java
- // has made changes, the GZIPOutputStreamm will have passed in a 10 byte
- // array to this function that includes the bad byte. Let's just write
- // that array using the default write(array) method that writes these
- // bytes one at a time and will call the write(int) method that will fix
- // that byte. Calling write() one at a time is maybe inefficient but for
- // such a small array it should not have a noticeable effect.
- super.write(b, off, len);
- }
+ public OutputStream wrapLayerOutput(OutputStream jos) throws Exception {
+ return new ConfigurableGZIPOutputStream(jos, compressionLevel);
}
- @Override
- public void write(int b) throws IOException {
- if (bytePosition < 0) {
- // The bad byte has already been fixed, simply pass this byte through to
- // the underlying OutputStream
- os.write(b);
- } else if (bytePosition < 9) {
- // The bad byte has not been fixed, and we haven't reached it yet, simply
- // pass this byte through and increment our byte position
- os.write(b);
- bytePosition += 1;
- } else if (bytePosition == 9) {
- // This is the bad byte, it is a 0 on some Java versions. Write 255
- // instead of to match Java 16+ behavior. Also, set bytePosition to -1 to
- // signify that we have fixed the bad byte and that all other writes
- // should just pass directly to the underlying OutputStream
- os.write(255);
- bytePosition = -1;
- }
- }
}
diff --git a/daffodil-core/src/main/resources/org/apache/daffodil/layers/xsd/gzipLayer.dfdl.xsd b/daffodil-core/src/main/resources/org/apache/daffodil/layers/xsd/gzipLayer.dfdl.xsd
index 30aca2ae04..d8a8a802fb 100644
--- a/daffodil-core/src/main/resources/org/apache/daffodil/layers/xsd/gzipLayer.dfdl.xsd
+++ b/daffodil-core/src/main/resources/org/apache/daffodil/layers/xsd/gzipLayer.dfdl.xsd
@@ -26,8 +26,8 @@