diff --git a/ci/test_files_touched.py b/ci/test_files_touched.py index 99fc4ec8..55c6d41d 100644 --- a/ci/test_files_touched.py +++ b/ci/test_files_touched.py @@ -98,17 +98,29 @@ 'tern report -f json -i photon:3.0', 'tern report -f spdxtagvalue -i photon:3.0', 'tern report -f spdxjson -i photon:3.0', + 'tern report -f spdxyaml -i photon:3.0', + 'tern report -f spdxxml -i photon:3.0', + 'tern report -f spdxrdf -i photon:3.0', 'tern report -d samples/alpine_python/Dockerfile', 'tern report -f html -i photon:3.0', 'tern report -f cyclonedxjson -i photon:3.0'], # tern/formats/spdx re.compile('tern/formats/spdx'): [ - 'tern report -f spdxtagvalue -i photon:3.0 -o spdx.spdx && ' \ - 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '\ + 'tern report -f spdxtagvalue -i photon:3.0 -o spdx.spdx && ' + 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar ' 'Verify spdx.spdx', - 'tern report -f spdxjson -i photon:3.0 -o spdx.json && ' \ - 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar '\ - 'Verify spdx.json'], + 'tern report -f spdxjson -i photon:3.0 -o spdx.json && ' + 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar ' + 'Verify spdx.json', + 'tern report -f spdxyaml -i photon:3.0 -o spdx.yaml && ' + 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar ' + 'Verify spdx.yaml', + 'tern report -f spdxxml -i photon:3.0 -o spdx.xml && ' + 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar ' + 'Verify spdx.xml', + 'tern report -f spdxrdf -i photon:3.0 -o spdx.rdf.xml && ' + 'java -jar tools-java/target/tools-java-*-jar-with-dependencies.jar ' + 'Verify spdx.rdf.xml'], # tern/tools re.compile('tern/tools'): ['tern report -i golang:alpine'], diff --git a/requirements.txt b/requirements.txt index 25c81da2..1b13552e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,4 +18,5 @@ GitPython~=3.1 prettytable~=3.8 packageurl-python>=0.11.1 license-expression>=30.1 +spdx-tools>=0.8.1 diff --git a/setup.cfg b/setup.cfg index e5132f68..8384af50 100644 --- a/setup.cfg +++ b/setup.cfg @@ -44,14 +44,19 @@ include_package_data = True [entry_points] tern.formats = default = tern.formats.default.generator:Default - spdxtagvalue = tern.formats.spdx.spdxtagvalue.generator:SpdxTagValue - spdxjson = tern.formats.spdx.spdxjson.generator:SpdxJSON - spdxjsonc = tern.formats.spdx.spdxjson.consumer:SpdxJSON json = tern.formats.json.generator:JSON jsonc = tern.formats.json.consumer:JSON yaml = tern.formats.yaml.generator:YAML html = tern.formats.html.generator:HTML cyclonedxjson = tern.formats.cyclonedx.cyclonedxjson.generator:CycloneDXJSON + spdxjson = tern.formats.spdx.spdxjson.generator:SpdxJSON + spdxyaml = tern.formats.spdx.spdxyaml.generator:SpdxYAML + spdxxml = tern.formats.spdx.spdxxml.generator:SpdxXML + spdxrdf = tern.formats.spdx.spdxrdf.generator:SpdxRDF + spdxtagvalue = tern.formats.spdx.spdxtagvalue.generator:SpdxTagValue + spdxtagvalue_legacy = tern.formats.spdx_legacy.spdxtagvalue.generator:SpdxTagValue + spdxjson_legacy = tern.formats.spdx_legacy.spdxjson.generator:SpdxJSON + spdxjsonc = tern.formats.spdx_legacy.spdxjson.consumer:SpdxJSON tern.extensions = cve_bin_tool = tern.extensions.cve_bin_tool.executor:CveBinTool scancode = tern.extensions.scancode.executor:Scancode diff --git a/tern/__main__.py b/tern/__main__.py index 0ef725ee..38e9b585 100755 --- a/tern/__main__.py +++ b/tern/__main__.py @@ -214,8 +214,13 @@ def main(): metavar='REPORT_FORMAT', help="Format the report using one of the " "available formats: " - "spdxtagvalue, spdxjson, cyclonedxjson, json, " - "yaml, html") + "spdxtagvalue, spdxjson, spdxyaml, spdxxml, spdxrdf, " + "cyclonedxjson, json, yaml, html") + parser_report.add_argument('-sv', '--spdx-version', + metavar='SPDX_VERSION', + help="Specify the version of the SPDX report format." + "This will be ignored for all non-SPDX formats." + "Possible values are '2.2' (default) or '2.3'.") parser_report.add_argument('-o', '--output-file', default=None, metavar='FILE', help="Write the report to a file. " diff --git a/tern/formats/cyclonedx/cyclonedxjson/generator.py b/tern/formats/cyclonedx/cyclonedxjson/generator.py index 5585f456..ec9432f4 100644 --- a/tern/formats/cyclonedx/cyclonedxjson/generator.py +++ b/tern/formats/cyclonedx/cyclonedxjson/generator.py @@ -50,12 +50,14 @@ def get_document_dict(image_obj_list): class CycloneDXJSON(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): + def generate(self, image_obj_list, spdx_version, print_inclusive=False): ''' Generate a CycloneDX document The whole document should be stored in a dictionary which can be converted to JSON and dumped to a file using the write_report function in report.py. ''' logger.debug('Generating CycloneDX JSON document...') + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for CycloneDX JSON.") report = get_document_dict(image_obj_list) diff --git a/tern/formats/default/generator.py b/tern/formats/default/generator.py index b1379bd3..4031446a 100644 --- a/tern/formats/default/generator.py +++ b/tern/formats/default/generator.py @@ -140,8 +140,11 @@ def print_licenses_only(image_obj_list): class Default(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): + def generate(self, image_obj_list, spdx_version: str, print_inclusive=False): '''Generate a default report''' + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for the default format.") + report = formats.disclaimer.format( version_info=content.get_tool_version()) logger.debug('Creating a detailed report of components in image...') @@ -154,8 +157,11 @@ def generate(self, image_obj_list, print_inclusive=False): return report return report + print_licenses_only(image_obj_list) - def generate_layer(self, layer): + def generate_layer(self, layer, spdx_version: str): """Generate a default report for one layer object""" + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for the default format.") + report = formats.disclaimer.format( version_info=content.get_tool_version()) logger.debug("Generating summary report for layer...") diff --git a/tern/formats/generator.py b/tern/formats/generator.py index 2f31a8c5..c476b498 100644 --- a/tern/formats/generator.py +++ b/tern/formats/generator.py @@ -10,6 +10,6 @@ class Generate(metaclass=ABCMeta): '''Base class for report plugins''' @abstractmethod - def generate(self, image_obj_list, print_inclusive=False): + def generate(self, image_obj_list, spdx_version, print_inclusive=False): '''Format the report according to the plugin style. Each subclass is responsible for their own formatting.''' diff --git a/tern/formats/html/generator.py b/tern/formats/html/generator.py index 1a7c7f96..f09fc1f3 100644 --- a/tern/formats/html/generator.py +++ b/tern/formats/html/generator.py @@ -321,16 +321,22 @@ def get_report_dict(image_obj_list): class HTML(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): + def generate(self, image_obj_list, spdx_version: str, print_inclusive=False): '''Given a list of image objects, create a html report for the images''' + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for HTML.") + report_dict = get_report_dict(image_obj_list) report = create_html_report(report_dict, image_obj_list) return report - def generate_layer(self, layer): + def generate_layer(self, layer, spdx_version: str): """Given a layer object, create a html report for the layer""" logger.debug("Creating HTML report...") + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for HTML.") + report = "" report = report + '\n' + head_layer % (css, get_tool_version()) report = report + '\n' + report_dict_to_html(layer.to_dict()) diff --git a/tern/formats/json/generator.py b/tern/formats/json/generator.py index 3b5df0c4..2cb58220 100644 --- a/tern/formats/json/generator.py +++ b/tern/formats/json/generator.py @@ -8,18 +8,28 @@ """ import json +import logging + from tern.formats import generator +from tern.utils import constants +logger = logging.getLogger(constants.logger_name) class JSON(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): + def generate(self, image_obj_list, spdx_version: str, print_inclusive=False): '''Given a list of image objects, create a json object string''' image_list = [] + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for JSON.") + for image in image_obj_list: image_list.append({'image': image.to_dict()}) image_dict = {'images': image_list} return json.dumps(image_dict) - def generate_layer(self, layer): + def generate_layer(self, layer, spdx_version: str): """Create a json object for one layer""" + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for JSON.") + return json.dumps(layer.to_dict()) diff --git a/tern/formats/spdx/__init__.py b/tern/formats/spdx/__init__.py index a048eda0..e69de29b 100644 --- a/tern/formats/spdx/__init__.py +++ b/tern/formats/spdx/__init__.py @@ -1,4 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 VMware, Inc. All Rights Reserved. -# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/formats/spdx/constants.py b/tern/formats/spdx/constants.py new file mode 100644 index 00000000..1c8486e1 --- /dev/null +++ b/tern/formats/spdx/constants.py @@ -0,0 +1,15 @@ +from spdx_tools.spdx.model import Version + +DOCUMENT_ID = 'SPDXRef-DOCUMENT' +DOCUMENT_NAME = 'Tern report for {image_name}' +SPDX_VERSION = 'SPDX-2.2' +DATA_LICENSE = 'CC0-1.0' +DOCUMENT_COMMENT = 'This document was generated by ' \ + 'the Tern Project: https://github.com/tern-tools/tern' +DOCUMENT_NAMESPACE = 'https://spdx.org/spdxdocs/tern-' \ + 'report-{version}-{image}-{uuid}' +LICENSE_LIST_VERSION = Version(3, 20) +CREATOR_NAME = 'tern-{version}' +DOCUMENT_NAME_SNAPSHOT = 'Tern SPDX SBoM' +DOCUMENT_NAMESPACE_SNAPSHOT = 'https://spdx.org/spdxdocs/tern-report-' \ + '{timestamp}-{uuid}' diff --git a/tern/formats/spdx/file_helpers.py b/tern/formats/spdx/file_helpers.py new file mode 100644 index 00000000..8720a981 --- /dev/null +++ b/tern/formats/spdx/file_helpers.py @@ -0,0 +1,134 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +File level helpers for SPDX document generator +""" +import logging +from datetime import datetime +from typing import List + +from spdx_tools.spdx.model import File as SpdxFile, SpdxNone, SpdxNoAssertion, Checksum, ChecksumAlgorithm, FileType + +from tern.classes.file_data import FileData +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.classes.template import Template +from tern.formats.spdx.layer_helpers import get_layer_checksum +from tern.formats.spdx.general_helpers import get_package_license_declared, get_file_spdxref +from tern.utils import constants + +logger = logging.getLogger(constants.logger_name) + + +def get_spdx_file_list_from_layer(layer_obj: ImageLayer, template: Template, timestamp: datetime, spdx_version: str) -> List[SpdxFile]: + """Given a layer object and the SPDX template mapping, return a list + of SPDX Files for each file in the layer""" + spdx_files: List[SpdxFile] = [] + file_refs = set() + for filedata in layer_obj.files: + # we do not know the layer's id, so we will use the timestamp instead + file_ref = get_file_spdxref(filedata, str(timestamp)) + if file_ref not in file_refs: + spdx_files.append(get_spdx_file_from_filedata(filedata, template, str(timestamp), spdx_version)) + file_refs.add(file_ref) + return spdx_files + + +def get_spdx_file_list_from_image(image_obj: Image, template: Template, spdx_version: str) -> List[SpdxFile]: + """Given an image_obj object, and the SPDX template mapping, return a list + of SPDX Files for each file in each layer of the image.""" + spdx_files: List[SpdxFile] = [] + + # use file refs to keep track of duplicate files that may be located + # in different places in the filesystem + file_refs = set() + for layer in image_obj.layers: + if layer.files_analyzed: + layer_checksum_value = get_layer_checksum(layer).value + for filedata in layer.files: + # we use the layer checksum as the layer id + file_ref = get_file_spdxref(filedata, layer_checksum_value) + if file_ref not in file_refs: + spdx_files.append(get_spdx_file_from_filedata(filedata, template, layer_checksum_value, spdx_version)) + file_refs.add(file_ref) + return spdx_files + + +def get_spdx_file_from_filedata(filedata: FileData, template: Template, layer_id: str, spdx_version: str) -> SpdxFile: + """Given a FileData object and its SPDX template mapping, return an + SPDX representation of the file. A layer_id is used to + distinguish copies of the same file occurring in different places in the + image""" + mapping = filedata.to_dict(template) + + if filedata.licenses: + # Add the license expression to the list if it is a valid SPDX + # identifier; otherwise, add the LicenseRef + license_info_in_file = [get_package_license_declared(lic) for lic in set(filedata.licenses)] + else: + license_info_in_file = [SpdxNone()] + + file_notice = get_file_notice(filedata) + file_comment = get_file_comment(filedata) + file_contributors = get_file_contributors(filedata) + + file_types = None + if mapping['FileType']: + file_types = [FileType[mapping['FileType'].upper()]] + + return SpdxFile( + spdx_id=get_file_spdxref(filedata, layer_id), + name=mapping['FileName'], + checksums=[get_file_checksum(filedata)], + license_concluded=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, # we don't provide this + copyright_text=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, # we don't know this + file_types=file_types, + license_info_in_file=license_info_in_file, + notice=file_notice if file_notice else None, + comment=file_comment if file_comment else None, + contributors=file_contributors if file_contributors else None, + ) + + +def get_file_checksum(filedata: FileData) -> Checksum: + """Given a FileData object, return the checksum required by SPDX. + Currently, the spec requires a SHA1 checksum""" + checksum = filedata.get_checksum('sha1') + if not checksum: + logger.error("No SHA1 checksum found in file. Resorting to empty file checksum.") + checksum = "da39a3ee5e6b4b0d3255bfef95601890afd80709" + return Checksum(ChecksumAlgorithm.SHA1, checksum) + + +def get_file_notice(filedata: FileData) -> str: + """Return a formatted string with all copyrights found in a file. Return + an empty string if there are no copyrights""" + notice = '' + for cp in filedata.copyrights: + notice = notice + cp + '\n' + return notice + + +def get_file_comment(filedata: FileData) -> str: + """Return a formatted comment string with all file level notices. Return + an empty string if no notices are present""" + comment = '' + for origin in filedata.origins.origins: + comment = comment + f'{origin.origin_str}:' + '\n' + for notice in origin.notices: + comment = comment + \ + f'{notice.level}: {notice.message}' + '\n' + return comment + + +def get_file_contributors(filedata: FileData) -> List[str]: + """The SPDX spec allows for an optional list of file contributors. + If there are any authors found in the file, return a list of authors. + If empty, return an empty list""" + contributors = [] + for author in filedata.authors: + contributors.append(author) + return contributors diff --git a/tern/formats/spdx/general_helpers.py b/tern/formats/spdx/general_helpers.py new file mode 100644 index 00000000..a90341d7 --- /dev/null +++ b/tern/formats/spdx/general_helpers.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +General helpers for SPDX document generator +""" +import hashlib +import re +import uuid +from datetime import datetime +from typing import Union, Tuple + +from license_expression import get_spdx_licensing, LicenseExpression +from spdx_tools.spdx.model import SpdxNone + +from tern.classes.file_data import FileData +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.classes.package import Package + + +def get_uuid() -> str: + return str(uuid.uuid4()) + + +def get_current_timestamp() -> datetime: + return datetime.utcnow().replace(microsecond=0) + + +def get_string_id(string: str) -> str: + """Return a unique identifier for the given string""" + return hashlib.sha256(string.encode('utf-8')).hexdigest()[-7:] + + +def get_license_ref(license_string: str) -> str: + """For SPDX format, return a LicenseRef string""" + return 'LicenseRef-' + get_string_id(str(license_string)) + + +def replace_invalid_chars_in_license_expression(license_string: str) -> str: + """Given a license string, replace common invalid SPDX license characters.""" + not_allowed = [',', ';', '/', '&'] + if any(x in license_string for x in not_allowed): + # Try to replace common invalid license characters + license_string = license_string.replace(',', ' and') + license_string = license_string.replace('/', '-') + license_string = license_string.replace(';', '.') + license_string = license_string.replace('&', 'and') + return license_string + + +def is_valid_license_expression(license_string: str) -> bool: + """Given a license string, return True if the license expression is valid, + False otherwise.""" + licensing = get_spdx_licensing() + try: + return licensing.validate(license_string).errors == [] + # Catch any invalid license chars here + except AttributeError: + return False + + +def get_package_license_declared(package_license_declared: str) -> Union[LicenseExpression, SpdxNone]: + """After substituting common invalid SPDX license characters using + the is_spdx_license_expression() function, determines if the declared + license string for a package or file is a valid SPDX license expression. + If license expression is valid after substitutions, return the updated string. + If not, return the LicenseRef of the original declared license expression + passed in to the function. If a blank string is passed in, return `NONE`.""" + if package_license_declared: + replaced_license = replace_invalid_chars_in_license_expression(package_license_declared) + if is_valid_license_expression(replaced_license): + return get_spdx_licensing().parse(replaced_license) + + return get_spdx_licensing().parse(get_license_ref(package_license_declared)) + return SpdxNone() + + +########################################################################################### +# central place for SPDXRef-generators to avoid circular imports as these are widely used # +########################################################################################### + +def get_image_spdxref(image_obj: Image) -> str: + """Given the image object, return an SPDX reference ID""" + # here we return the image name, tag and id + return f'SPDXRef-{image_obj.get_human_readable_id()}' + + +def get_package_spdxref(package_obj: Package) -> Tuple[str, str]: + """Given the package obj, return an SPDX reference ID for the binary + and source package, if available""" + pkg_ref = f"{package_obj.name}-{package_obj.version}" + src_ref = '' + if package_obj.src_name: + # differentiate between binary and source package refs + src_ver = package_obj.src_version + "-src" + src_ref = f"{package_obj.src_name}-{src_ver}" + # replace all the strings that SPDX doesn't like + # allowed characters are: letters, numbers, "." and "-" + clean_pkg_ref = re.sub(r'[:+~_/]', r'-', pkg_ref) + if src_ref: + clean_src_ref = re.sub(r'[:+~/]', r'-', src_ref) + return f'SPDXRef-{clean_pkg_ref}', f'SPDXRef-{clean_src_ref}' + return f'SPDXRef-{clean_pkg_ref}', '' + + +def get_layer_spdxref(layer_obj: ImageLayer) -> str: + """Given the layer object, return an SPDX reference ID""" + # here we return the shortened diff_id of the layer + return f'SPDXRef-{layer_obj.diff_id[:10]}' + + +def get_file_spdxref(filedata: FileData, layer_id: str) -> str: + """Given a FileData object, return a unique identifier for the SPDX + document. According to the spec, this should be of the form: SPDXRef- + We will use a combination of the file name, checksum and layer_id and + calculate a hash of this string""" + file_string = filedata.path + filedata.checksum[:7] + layer_id + fileid = get_string_id(file_string) + return f'SPDXRef-{fileid}' diff --git a/tern/formats/spdx/image_helpers.py b/tern/formats/spdx/image_helpers.py new file mode 100644 index 00000000..c90e93ef --- /dev/null +++ b/tern/formats/spdx/image_helpers.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Image level helpers for SPDX document generator +Images for SPDX act like a Package +""" +from typing import List + +from spdx_tools.spdx.model import ExtractedLicensingInfo, Package as SpdxPackage, \ + SpdxNoAssertion + +from tern.classes.image import Image +from tern.classes.template import Template +from tern.formats.spdx.layer_helpers import get_layer_licenses +from tern.formats.spdx.general_helpers import get_license_ref, get_uuid, is_valid_license_expression, \ + get_image_spdxref +from tern.utils.general import get_git_rev_or_version + + +def get_image_extracted_licenses(image_obj: Image) -> List[ExtractedLicensingInfo]: + """Given an image_obj, return a unique list of ExtractedLicensingInfo + that contains all the file and package LicenseRef and their corresponding plain text.""" + + unique_licenses = set() + for layer in image_obj.layers: + # Get all of the unique file licenses, if they exist + unique_licenses.update(get_layer_licenses(layer)) + # Next, collect any package licenses not already accounted for + for package in layer.packages: + if package.pkg_license: + unique_licenses.add(package.pkg_license) + # Add debian licenses from copyright text as one license + if package.pkg_licenses: + unique_licenses.add(", ".join(package.pkg_licenses)) + extracted_licensing_info = [] + for lic in list(unique_licenses): + valid_spdx = is_valid_license_expression(lic) + if not valid_spdx: + extracted_licensing_info.append(ExtractedLicensingInfo(license_id=get_license_ref(lic), extracted_text=lic)) + + return extracted_licensing_info + + +def get_spdx_package_from_image(image_obj: Image, template: Template, spdx_version: str) -> SpdxPackage: + """Given an image object, the template object for SPDX and the SPDX version, return the + SPDX Package for the given image. If the version is not SPDX-2.2, we can omit NoAssertions in most cases.""" + mapping = image_obj.to_dict(template) + return SpdxPackage( + spdx_id=get_image_spdxref(image_obj), + name=mapping["PackageName"], + download_location=SpdxNoAssertion(), + version=mapping["PackageVersion"], + supplier=SpdxNoAssertion(), + files_analyzed=False, + license_concluded=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + license_declared=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + copyright_text=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + ) + + +def get_document_namespace(image_obj: Image) -> str: + """Given the image object, return a unique SPDX document uri. + This is a combination of the tool name and version, the image name + and the uuid""" + return f'https://spdx.org/spdxdocs/tern-report-{get_git_rev_or_version()[1]}-{image_obj.name}-{get_uuid()}' diff --git a/tern/formats/spdx/layer_helpers.py b/tern/formats/spdx/layer_helpers.py new file mode 100644 index 00000000..4718ebaf --- /dev/null +++ b/tern/formats/spdx/layer_helpers.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Layer level helpers for SPDX document generator +Layers for SPDX act like a Package +""" +import hashlib +import logging +import os +from typing import List, Optional, Tuple + +from license_expression import Licensing + +from spdx_tools.spdx.model import Package as SpdxPackage, SpdxNoAssertion, SpdxNone, PackageVerificationCode, Checksum, \ + ChecksumAlgorithm, Relationship, RelationshipType, ExtractedLicensingInfo + +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.formats.spdx.general_helpers import get_license_ref, is_valid_license_expression, \ + get_image_spdxref, get_package_spdxref, get_layer_spdxref, get_file_spdxref +from tern.formats.spdx.constants import DOCUMENT_ID +from tern.utils import constants +from tern.report import content + + +# global logger +logger = logging.getLogger(constants.logger_name) + + +def get_layer_extracted_licenses(layer_obj: ImageLayer) -> List[ExtractedLicensingInfo]: + """Given an image_obj, return a unique list of ExtractedLicensingInfo + that contains all the file and package LicenseRef and the corresponding plain text.""" + + # Get all of the unique file licenses, if they exist + unique_licenses = set(get_layer_licenses(layer_obj)) + # Next, collect any package licenses not already accounted for + for package in layer_obj.packages: + if package.pkg_license: + unique_licenses.add(package.pkg_license) + extracted_texts = [] + for lic in list(unique_licenses): + valid_spdx = is_valid_license_expression(lic) + if not valid_spdx: + extracted_texts.append(ExtractedLicensingInfo(license_id=get_license_ref(lic), extracted_text=lic)) + return extracted_texts + + +def get_image_layer_relationships(image_obj: Image) -> List[Relationship]: + """Given an image object, return a list of dictionaries describing the + relationship between each layer "package" and the image and packages + related to it.""" + layer_relationships = [] + image_ref = get_image_spdxref(image_obj) + + # Required - DOCUMENT_DESCRIBES relationship + layer_relationships.append(Relationship(DOCUMENT_ID, RelationshipType.DESCRIBES, image_ref)) + + for index, layer in enumerate(image_obj.layers): + layer_ref = get_layer_spdxref(layer) + # First, add dictionaries for the layer relationship to the image + layer_relationships.append(Relationship(image_ref, RelationshipType.CONTAINS, layer_ref)) + # Next, add dictionary of the layer relationship to other layers + if index != 0: + prev_layer_ref = get_layer_spdxref(image_obj.layers[index - 1]) + layer_relationships.append(Relationship(prev_layer_ref, RelationshipType.HAS_PREREQUISITE, layer_ref)) + # Finally, add package relationships for the layer + if layer.packages: + for package in layer.packages: + pkg_ref, src_ref = get_package_spdxref(package) + layer_relationships.append(Relationship(layer_ref, RelationshipType.CONTAINS, pkg_ref)) + if src_ref: + layer_relationships.append(Relationship(pkg_ref, RelationshipType.GENERATED_FROM, src_ref)) + + return layer_relationships + + +def get_layer_file_data_list(layer_obj: ImageLayer) -> List[str]: + """Given a layer object return the SPDX list of file refs in the layer. + Return an empty list if the files are not analyzed""" + file_refs = [] + if layer_obj.files_analyzed: + layer_checksum = get_layer_checksum(layer_obj) + file_refs = [get_file_spdxref(filedata, layer_checksum.value) for filedata in layer_obj.files] + # some files are located in different places in the filesystem + # we make sure they don't occur as duplicates in this list + return list(set(file_refs)) + + +def get_layer_package_comment(layer_obj: ImageLayer) -> str: + """Return a package comment string value for a list of NoticeOrigin + objects for the given layer object""" + comment = '' + if "headers" in layer_obj.extension_info.keys(): + for header in layer_obj.extension_info.get("headers"): + comment += header + comment += '\n' + if not layer_obj.origins.is_empty(): + for notice_origin in layer_obj.origins.origins: + comment += content.print_notices(notice_origin, '', '\t') + return comment + + +def get_spdx_package_from_layer(layer_obj: ImageLayer, spdx_version: str) -> Tuple[SpdxPackage, List[Relationship]]: + """Given a layer object and the SPDX version, return an SPDX Package representation + of the layer and the list of CONTAINS relationships to all files in that layer. + The analyzed files will go in a separate part of the document. + If the version is not SPDX-2.2, we can omit NoAssertions in most cases.""" + + comment = get_layer_package_comment(layer_obj) + verification_code = get_layer_verification_code(layer_obj) if layer_obj.files_analyzed else None + + layer_licenses = get_layer_licenses(layer_obj) + license_info_from_files = [] + if layer_licenses: + # Use the layer LicenseRef in the list instead of license expression + for lic in layer_licenses: + license_info_from_files.append(get_license_ref(lic)) # TODO: potential bug here that converts valid expressions to LicenseRef- identifiers + license_info_from_files = [Licensing().parse(lic) for lic in license_info_from_files] + + layer_spdx_id = get_layer_spdxref(layer_obj) + package = SpdxPackage( + spdx_id=layer_spdx_id, + name=os.path.basename(layer_obj.tar_file), + version=layer_obj.layer_index, + supplier=SpdxNoAssertion(), + file_name=layer_obj.tar_file, + download_location=SpdxNone(), + files_analyzed=bool(layer_obj.files_analyzed), + verification_code=verification_code, + checksums=[get_layer_checksum(layer_obj)], + license_concluded=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + license_declared=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + copyright_text=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + comment=comment if comment else None, + license_info_from_files=license_info_from_files, + ) + + relationships = [ + Relationship(layer_spdx_id, RelationshipType.CONTAINS, file_ref) + for file_ref in get_layer_file_data_list(layer_obj) + ] + + return package, relationships + + +def get_layer_licenses(layer_obj: ImageLayer) -> List[str]: + """Return a list of unique licenses from the files analyzed + in the layer object. It is assumed that the files were analyzed and + there should be some license expressions. If there are not, an empty list + is returned""" + licenses = set() + for filedata in layer_obj.files: + # we will use the SPDX license expressions here as they will be + # valid SPDX license identifiers + if filedata.licenses: + for lic in list(set(filedata.licenses)): + licenses.add(lic) + return list(licenses) + + +def get_layer_verification_code(layer_obj: ImageLayer) -> Optional[PackageVerificationCode]: + """Calculate the verification code from the files in an image layer. This + assumes that layer_obj.files_analyzed is True. The implementation follows + the algorithm in the SPDX spec v 2.2 which requires SHA1 to be used to + calculate the checksums of the file and the final verification code""" + sha1_list = [] + for filedata in layer_obj.files: + filesha = filedata.get_checksum('sha1') + if not filesha: + # we cannot create a verification code, hence file generation + # is aborted + logger.critical( + 'File %s does not have a sha1 checksum. Failed to generate ' + 'an SPDX report', filedata.path) + return None + sha1_list.append(filesha) + sha1_list.sort() + sha1s = ''.join(sha1_list) + verification_code = hashlib.sha1(sha1s.encode('utf-8')).hexdigest() # nosec + return PackageVerificationCode(verification_code) + + +def get_layer_checksum(layer_obj: ImageLayer) -> Checksum: + return Checksum( + ChecksumAlgorithm[layer_obj.checksum_type.upper()], + layer_obj.checksum + ) diff --git a/tern/formats/spdx/make_spdx_model.py b/tern/formats/spdx/make_spdx_model.py new file mode 100644 index 00000000..a54b10fd --- /dev/null +++ b/tern/formats/spdx/make_spdx_model.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Functions to create an SPDX model instance from a list of Images or an ImageLayer +""" + +from typing import List + +from spdx_tools.spdx.model import Document, CreationInfo, Actor, ActorType, Relationship, RelationshipType, \ + PackagePurpose + +from tern.classes.image_layer import ImageLayer +from tern.classes.template import Template +from tern.formats.spdx.constants import DOCUMENT_ID, DOCUMENT_NAME, DATA_LICENSE, DOCUMENT_COMMENT, \ + LICENSE_LIST_VERSION, CREATOR_NAME, DOCUMENT_NAME_SNAPSHOT, DOCUMENT_NAMESPACE_SNAPSHOT +from tern.formats.spdx.file_helpers import get_spdx_file_list_from_layer +from tern.formats.spdx.general_helpers import get_current_timestamp, get_uuid +from tern.classes.image import Image +from tern.formats.spdx.spdx_template import SPDX +from tern.formats.spdx.file_helpers import get_spdx_file_list_from_image +from tern.formats.spdx.image_helpers import get_image_extracted_licenses, \ + get_spdx_package_from_image, get_document_namespace +from tern.formats.spdx.layer_helpers import get_spdx_package_from_layer, get_image_layer_relationships, get_layer_extracted_licenses +from tern.formats.spdx.package_helpers import get_spdx_package_list_from_image, get_layer_packages_list + +from tern.utils.general import get_git_rev_or_version + + +def make_spdx_model(image_obj_list: List[Image], spdx_version: str) -> Document: + """Given a list of tern Images, return a complete SPDX document generated from them.""" + template = SPDX() + # we still don't know how SPDX documents could represent multiple + # images. Hence, we will assume only one image is analyzed and the + # input is a list of length 1 + image_obj = image_obj_list[0] + + creation_info = CreationInfo( + spdx_version=spdx_version, + spdx_id=DOCUMENT_ID, + name=DOCUMENT_NAME.format(image_name=image_obj.name), + document_namespace=get_document_namespace(image_obj), + creators=[Actor(actor_type=ActorType.TOOL, name=CREATOR_NAME.format(version=get_git_rev_or_version()[1]))], + created=get_current_timestamp(), + license_list_version=LICENSE_LIST_VERSION, + data_license=DATA_LICENSE, + document_comment=DOCUMENT_COMMENT, + ) + container_package = get_spdx_package_from_image(image_obj, template, spdx_version) + if spdx_version == "SPDX-2.3": + container_package.primary_package_purpose = PackagePurpose.CONTAINER + + packages = [container_package] + image_layer_relationships = get_image_layer_relationships(image_obj) + + layer_file_relationships = [] + for layer in image_obj.layers: + package, relationships = get_spdx_package_from_layer(layer, spdx_version) + packages.append(package) + layer_file_relationships.extend(relationships) + + packages.extend(get_spdx_package_list_from_image(image_obj, template, spdx_version)) + files = get_spdx_file_list_from_image(image_obj, template, spdx_version) + extracted_licensing_info = get_image_extracted_licenses(image_obj) + + return Document( + creation_info=creation_info, + packages=packages, + files=files, + relationships=image_layer_relationships + layer_file_relationships, + extracted_licensing_info=extracted_licensing_info + ) + + +def make_spdx_model_snapshot(layer_obj: ImageLayer, template: Template, spdx_version: str) -> Document: + """This returns the SPDX document containing just the packages found at + container build time""" + timestamp = get_current_timestamp() + + creation_info = CreationInfo( + spdx_version=spdx_version, + spdx_id=DOCUMENT_ID, + name=DOCUMENT_NAME_SNAPSHOT, + document_namespace=DOCUMENT_NAMESPACE_SNAPSHOT.format(timestamp=timestamp, uuid=get_uuid()), + creators=[Actor(actor_type=ActorType.TOOL, name=CREATOR_NAME.format(get_git_rev_or_version()[1]))], + created=timestamp, + license_list_version=LICENSE_LIST_VERSION, + data_license=DATA_LICENSE, + document_comment=DOCUMENT_COMMENT, + ) + + # Add list of package dictionaries to packages list, if they exist + packages = get_layer_packages_list(layer_obj, template, spdx_version) + describes_relationships = [ + Relationship(DOCUMENT_ID, RelationshipType.DESCRIBES, package.spdx_id) + for package in packages + ] + + # Add list of file dictionaries, if they exist + files = get_spdx_file_list_from_layer(layer_obj, template, timestamp, spdx_version) + + # Add package and file extracted license texts, if they exist + extracted_licensing_info = get_layer_extracted_licenses(layer_obj) + + return Document( + creation_info=creation_info, + packages=packages, + files=files, + relationships=describes_relationships, + extracted_licensing_info=extracted_licensing_info + ) diff --git a/tern/formats/spdx/package_helpers.py b/tern/formats/spdx/package_helpers.py new file mode 100644 index 00000000..c054e15c --- /dev/null +++ b/tern/formats/spdx/package_helpers.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Package level helpers for SPDX document generator +""" +from typing import List + +from packageurl import PackageURL +from spdx_tools.spdx.model import Package as SpdxPackage, SpdxNoAssertion, SpdxNone, Actor, ActorType, \ + ExternalPackageRef, ExternalPackageRefCategory + +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.classes.package import Package +from tern.classes.template import Template +from tern.formats.spdx.general_helpers import get_package_license_declared, get_package_spdxref +from tern.report import content + + +SOURCE_PACKAGE_COMMENT = 'This package refers to a source package associated' \ + ' with one or more binary packages installed in this container. ' \ + 'This source pacakge is NOT installed in the container but may be useful' \ + ' for CVE lookups.' + + +def get_layer_packages_list(layer: ImageLayer, template: Template, spdx_version: str) -> List[SpdxPackage]: + """Given a layer object, an SPDX template object and the SPDX version, + return a list of SPDX Packages for each of the packages in the layer + and their package references""" + package_dicts = [] + package_refs = [] + for package in layer.packages: + # Create a list of SpdxPackages, each one representing + # one package object in the image + pkg_ref = get_package_spdxref(package) + if pkg_ref not in package_refs: + package_dicts.append(get_spdx_package_from_tern_package(package, template, spdx_version)) + package_refs.append(pkg_ref) + return package_dicts + + +def get_package_comment(package: Package) -> str: + """Given a package object, return a PackageComment string for a list of + NoticeOrigin objects""" + comment = '' + if package.origins.origins: + for notice_origin in package.origins.origins: + comment = comment + content.print_notices( + notice_origin, '', '\t') + return comment + + +def get_spdx_package_from_source_package(package: Package, template: Template, spdx_version: str) -> SpdxPackage: + """Given a package object, its SPDX template mapping and the SPDX version, + return an SPDX Package of the associated source package. + The analyzed files will go in a separate part of the SPDX document.""" + mapping = package.to_dict(template) + + _, src_ref = get_package_spdxref(package) + declared_lic = mapping['PackageLicenseDeclared'] + # Define debian licenses from copyright text as one license + if package.pkg_format == 'deb': + declared_lic = ', '.join(package.pkg_licenses) + + return SpdxPackage( + spdx_id=src_ref, + name=mapping['SourcePackageName'], + version=mapping['SourcePackageVersion'] if mapping['SourcePackageVersion'] else 'NOASSERTION', + download_location=mapping['PackageDownloadLocation'] if mapping['PackageDownloadLocation'] else SpdxNoAssertion(), + files_analyzed=False, + license_concluded=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + license_declared=get_package_license_declared(declared_lic), + copyright_text=mapping['PackageCopyrightText'] if mapping['PackageCopyrightText'] else SpdxNone(), + comment=SOURCE_PACKAGE_COMMENT, + ) + + +def get_spdx_package_from_tern_package(package: Package, template: Template, spdx_version: str) -> SpdxPackage: + """Given a package object, its SPDX template mapping and the SPDX version, + return an SPDX Package. The analyzed files will go in a separate part of the SPDX document.""" + mapping = package.to_dict(template) + + if mapping['PackageSupplier']: + supplier = Actor(ActorType.ORGANIZATION, mapping['PackageSupplier']) + else: + supplier = SpdxNoAssertion() + + external_ref = [] + if get_purl(package): + external_ref.append(ExternalPackageRef( + ExternalPackageRefCategory.PACKAGE_MANAGER, + "purl", + get_purl(package) + )) + + pkg_ref, _ = get_package_spdxref(package) + # Define debian licenses from copyright text as one license + declared_lic = mapping['PackageLicenseDeclared'] + if package.pkg_format == 'deb': + declared_lic = ', '.join(package.pkg_licenses) + + return SpdxPackage( + spdx_id=pkg_ref, + name=mapping['PackageName'], + version=mapping['PackageVersion'] if mapping['PackageVersion'] else 'NOASSERTION', + supplier=supplier, + download_location=mapping['PackageDownloadLocation'] if mapping['PackageDownloadLocation'] else SpdxNoAssertion(), + files_analyzed=False, + license_concluded=SpdxNoAssertion() if spdx_version == "SPDX-2.2" else None, + license_declared=get_package_license_declared(declared_lic), + copyright_text=mapping['PackageCopyrightText'] if mapping['PackageCopyrightText'] else SpdxNone(), + external_references=external_ref, + comment=get_package_comment(package), + ) + + +def get_spdx_package_list_from_image(image_obj: Image, template: Template, spdx_version: str) -> List[SpdxPackage]: + """Given an image object and the template object for SPDX, return a list + of SPDX Packages for each of the packages in the image. + The SPDX spec for packages requires: + name + versionInfo + downloadLocation""" + packages = [] + package_refs = set() + + for layer in image_obj.layers: + for package in layer.packages: + # Create a list of dictionaries. Each dictionary represents + # one package object in the image + pkg_ref, src_ref = get_package_spdxref(package) + if pkg_ref not in package_refs and package.name: + packages.append(get_spdx_package_from_tern_package(package, template, spdx_version)) + package_refs.add(pkg_ref) + if src_ref and src_ref not in package_refs: + packages.append(get_spdx_package_from_source_package( + package, template, spdx_version)) + package_refs.add(src_ref) + return packages + + +purl_types_with_namespaces = [ + 'deb', + 'rpm', + 'apk', + 'alpm' +] + + +def get_purl(package_obj: Package) -> str: + """Return a purl string for a given package""" + purl_type = package_obj.pkg_format + purl_namespace = '' + if purl_type in purl_types_with_namespaces and package_obj.pkg_supplier: + # https://github.com/package-url/purl-spec/pull/214 + if package_obj.pkg_supplier.split(' ')[0] == "VMware": + purl_namespace = package_obj.pkg_supplier.split(' ')[1].lower() + else: + purl_namespace = package_obj.pkg_supplier.split(' ')[0].lower() + try: + # TODO- this might need adjusting for alpm. Currently can't test on M1 + purl = PackageURL(purl_type, purl_namespace, package_obj.name.lower(), package_obj.version, + qualifiers={'arch': package_obj.arch if package_obj.arch else ''}) + return purl.to_string() + except ValueError: + return '' diff --git a/tern/formats/spdx/spdx_formats_helper.py b/tern/formats/spdx/spdx_formats_helper.py new file mode 100644 index 00000000..318ff4e6 --- /dev/null +++ b/tern/formats/spdx/spdx_formats_helper.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +Handle imports and logging for different SPDX formats +""" +import io +import logging +from typing import Callable, IO, List + +from spdx_tools.spdx.model import Document +from spdx_tools.spdx.writer.json.json_writer import write_document_to_stream as json_writer +from spdx_tools.spdx.writer.yaml.yaml_writer import write_document_to_stream as yaml_writer +from spdx_tools.spdx.writer.xml.xml_writer import write_document_to_stream as xml_writer +from spdx_tools.spdx.writer.tagvalue.tagvalue_writer import write_document_to_stream as tv_writer +from spdx_tools.spdx.writer.rdf.rdf_writer import write_document_to_stream as rdf_writer + +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.formats.spdx.spdx_template import SPDX +from tern.formats.spdx.make_spdx_model import make_spdx_model, make_spdx_model_snapshot +from tern.utils import constants + +logger = logging.getLogger(constants.logger_name) + +SPDX_VERSION_MAPPING = { + "2.2": "SPDX-2.2", + "2.3": "SPDX-2.3", +} + + +def get_spdx_from_image_list(image_obj_list: List[Image], spdx_format: str, spdx_version: str) -> str: + """Given a list of image objects and an SPDX format and version, + return the serialized string of the SPDX document representation in that format and version + generated from the image objects. + + WARNING: This assumes that the list consists of one image or the base + image and a stub image, in which case, the information in the stub + image is not applicable in the SPDX case as it is an empty image + object with no metadata as nothing got built. + + For the sake of SPDX, an image is a 'Package' which 'CONTAINS' each + layer which is also a 'Package' which 'CONTAINS' the real Packages""" + logger.debug("Generating SPDX %s document...", spdx_format) + + if spdx_version is None: + spdx_version = "2.2" + + if spdx_version not in SPDX_VERSION_MAPPING: + raise ValueError(f"SPDX version {spdx_version} is not supported by tern.") + + spdx_document: Document = make_spdx_model(image_obj_list, SPDX_VERSION_MAPPING[spdx_version]) + + return convert_document_to_serialized_string(spdx_document, spdx_format) + + +def get_spdx_from_layer(layer: ImageLayer, spdx_format: str, spdx_version: str) -> str: + """Given an Image layer and an SPDX format and version, + returns the serialized string of the SPDX document containing package and file information + at container build time""" + logger.debug("Generating SPDX %s snapshot document...", spdx_format) + + if spdx_version is None: + spdx_version = "2.2" + + if spdx_version not in SPDX_VERSION_MAPPING: + raise ValueError(f"SPDX version {spdx_version} is not supported by tern.") + + template = SPDX() + spdx_document: Document = make_spdx_model_snapshot(layer, template, SPDX_VERSION_MAPPING[spdx_version]) + + return convert_document_to_serialized_string(spdx_document, spdx_format) + + +def convert_document_to_serialized_string(spdx_document: Document, spdx_format: str) -> str: + """Given an SPDX document and a format, return the serialized string of the + representation of that document in the specified format.""" + if spdx_format == "JSON": + return get_serialized_document_string(spdx_document, json_writer) + if spdx_format == "YAML": + return get_serialized_document_string(spdx_document, yaml_writer) + if spdx_format == "XML": + return get_serialized_document_string(spdx_document, xml_writer) + if spdx_format == "RDF-XML": + return get_serialized_rdf_document_string(spdx_document) + if spdx_format == "Tag-Value": + return get_serialized_document_string(spdx_document, tv_writer) + + raise ValueError(f"{spdx_format} is not a known SPDX format.") + + +def get_serialized_document_string(spdx_document: Document, writer_function: Callable[[Document, IO[str]], str]) -> str: + with io.StringIO() as stream: + writer_function(spdx_document, stream, validate=False) + return stream.getvalue() + + +def get_serialized_rdf_document_string(spdx_document: Document) -> str: + with io.BytesIO() as stream: + rdf_writer(spdx_document, stream, validate=False) + return stream.getvalue().decode("UTF-8") diff --git a/tern/formats/spdx/spdx.py b/tern/formats/spdx/spdx_template.py similarity index 100% rename from tern/formats/spdx/spdx.py rename to tern/formats/spdx/spdx_template.py diff --git a/tern/formats/spdx/spdxjson/__init__.py b/tern/formats/spdx/spdxjson/__init__.py index 605cea42..e69de29b 100644 --- a/tern/formats/spdx/spdxjson/__init__.py +++ b/tern/formats/spdx/spdxjson/__init__.py @@ -1,4 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2021 VMware, Inc. All Rights Reserved. -# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/formats/spdx/spdxjson/generator.py b/tern/formats/spdx/spdxjson/generator.py index 6c28c5e1..6a6469f6 100644 --- a/tern/formats/spdx/spdxjson/generator.py +++ b/tern/formats/spdx/spdxjson/generator.py @@ -1,172 +1,22 @@ # -*- coding: utf-8 -*- # -# Copyright (c) 2021 VMware, Inc. All Rights Reserved. +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. # SPDX-License-Identifier: BSD-2-Clause """ SPDX JSON document generator """ +from typing import List -import json -import logging - -from tern.formats.spdx.spdx import SPDX -from tern.formats.spdx import spdx_common -from tern.utils.general import get_git_rev_or_version -from tern.utils import constants -from tern.formats.spdx.spdxjson import formats as json_formats -from tern.formats.spdx.spdxjson import image_helpers as mhelpers -from tern.formats.spdx.spdxjson import file_helpers as fhelpers -from tern.formats.spdx.spdxjson import layer_helpers as lhelpers -from tern.formats.spdx.spdxjson import package_helpers as phelpers +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer from tern.formats import generator - - -# global logger -logger = logging.getLogger(constants.logger_name) - - -def get_document_namespace(image_obj): - '''Given the image object, return a unique SPDX document uri. - This is a combination of the tool name and version, the image name - and the uuid''' - return json_formats.document_namespace.format( - version=get_git_rev_or_version()[1], image=image_obj.name, - uuid=spdx_common.get_uuid()) - - -def get_document_namespace_snapshot(timestamp): - """Get the document namespace for the container image snapshot. We pass - the timestamp so we have a common timestamp across the whole document""" - return json_formats.document_namespace_snapshot.format( - timestamp=timestamp, uuid=spdx_common.get_uuid()) - - -def get_document_dict(image_obj, template): - '''Return document info as a dictionary''' - docu_dict = { - 'SPDXID': json_formats.spdx_id, - 'spdxVersion': json_formats.spdx_version, - 'creationInfo': { - 'created': json_formats.created.format( - timestamp=spdx_common.get_timestamp()), - 'creators': [json_formats.creator.format( - version=get_git_rev_or_version()[1])], - 'licenseListVersion': json_formats.license_list_version, - }, - 'name': json_formats.document_name.format(image_name=image_obj.name), - 'dataLicense': json_formats.data_license, - 'comment': json_formats.document_comment, - 'documentNamespace': get_document_namespace(image_obj), - 'documentDescribes': [spdx_common.get_image_spdxref(image_obj)], - 'packages': [ - # image dict will be a single dictionary - # we'll add the layer and package dicts later if available - mhelpers.get_image_dict(image_obj, template)], - 'relationships': lhelpers.get_image_layer_relationships(image_obj) - } - - # Add list of layer dictionaries to packages list - docu_dict['packages'] += lhelpers.get_layers_list(image_obj) - - # Add list of package dictionaries to packages list, if they exist - pkgs_dict_list = phelpers.get_packages_list(image_obj, template) - if pkgs_dict_list: - docu_dict['packages'] += pkgs_dict_list - - # Add list of file dictionaries, if they exist - files = fhelpers.get_files_list(image_obj, template) - if files: - docu_dict['files'] = files - - # Add package and file extracted license texts, if they exist - extracted_texts = mhelpers.get_image_extracted_licenses(image_obj) - if extracted_texts: - docu_dict['hasExtractedLicensingInfos'] = extracted_texts - - return docu_dict - - -def get_document_dict_snapshot(layer_obj, template): - """This is the SPDX document containing just the packages found at - container build time""" - timestamp = spdx_common.get_timestamp() - docu_dict = { - 'SPDXID': json_formats.spdx_id, - 'spdxVersion': json_formats.spdx_version, - 'creationInfo': { - 'created': json_formats.created.format( - timestamp=timestamp), - 'creators': json_formats.creator.format( - version=get_git_rev_or_version()[1]), - 'licenseListVersion': json_formats.license_list_version, - }, - 'name': json_formats.document_name_snapshot, - 'dataLicense': json_formats.data_license, - 'comment': json_formats.document_comment, - 'documentNamespace': get_document_namespace_snapshot(timestamp), - # we will list all the unique package SPDXRefs here later - 'documentDescribes': [], - # these will contain just the packages as there is no layer - # package at the time of this document's generation - 'packages': [], - # we will fill in document to package ref relationships later - 'relationships': [] - } - - # Add list of package dictionaries to packages list, if they exist - pkgs_dict_list, package_refs = phelpers.get_layer_packages_list( - layer_obj, template) - if pkgs_dict_list: - docu_dict['packages'] = pkgs_dict_list - docu_dict['documentDescribes'] = package_refs - - # add the package relationships to the document - for ref in package_refs: - docu_dict['relationships'].append(json_formats.get_relationship_dict( - json_formats.spdx_id, ref, 'DESCRIBES')) - - # Add list of file dictionaries, if they exist - files = fhelpers.get_layer_files_list(layer_obj, template, timestamp) - if files: - docu_dict['files'] = files - - # Add package and file extracted license texts, if they exist - extracted_texts = lhelpers.get_layer_extracted_licenses(layer_obj) - if extracted_texts: - docu_dict['hasExtractedLicensingInfos'] = extracted_texts - - return docu_dict +from tern.formats.spdx.spdx_formats_helper import get_spdx_from_image_list, get_spdx_from_layer class SpdxJSON(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): - '''Generate an SPDX document - WARNING: This assumes that the list consists of one image or the base - image and a stub image, in which case, the information in the stub - image is not applicable in the SPDX case as it is an empty image - object with no metadata as nothing got built. - The whole document should be stored in a dictionary which can be - converted to JSON and dumped to a file using the write_report function - in report.py. - - For the sake of SPDX, an image is a 'Package' which 'CONTAINS' each - layer which is also a 'Package' which 'CONTAINS' the real Packages''' - logger.debug("Generating SPDX JSON document...") - - # we still don't know how SPDX documents could represent multiple - # images. Hence we will assume only one image is analyzed and the - # input is a list of length 1 - image_obj = image_obj_list[0] - template = SPDX() - report = get_document_dict(image_obj, template) - - return json.dumps(report) + def generate(self, image_obj_list: List[Image], spdx_version: str, print_inclusive=False) -> str: + return get_spdx_from_image_list(image_obj_list, "JSON", spdx_version) - def generate_layer(self, layer): - """Generate an SPDX document containing package and file information - at container build time""" - logger.debug("Generating SPDX JSON document...") - template = SPDX() - report = get_document_dict_snapshot(layer, template) - return json.dumps(report) + def generate_layer(self, layer: ImageLayer, spdx_version) -> str: + return get_spdx_from_layer(layer, "JSON", spdx_version) diff --git a/tern/formats/spdx/spdxrdf/__init__.py b/tern/formats/spdx/spdxrdf/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tern/formats/spdx/spdxrdf/generator.py b/tern/formats/spdx/spdxrdf/generator.py new file mode 100644 index 00000000..0abf903b --- /dev/null +++ b/tern/formats/spdx/spdxrdf/generator.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +SPDX RDF-XML document generator +""" +from typing import List + +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.formats import generator +from tern.formats.spdx.spdx_formats_helper import get_spdx_from_image_list, get_spdx_from_layer + + +class SpdxRDF(generator.Generate): + def generate(self, image_obj_list: List[Image], spdx_version: str, print_inclusive=False) -> str: + return get_spdx_from_image_list(image_obj_list, "RDF-XML", spdx_version) + + def generate_layer(self, layer: ImageLayer, spdx_version: str) -> str: + return get_spdx_from_layer(layer, "RDF-XML", spdx_version) diff --git a/tern/formats/spdx/spdxtagvalue/__init__.py b/tern/formats/spdx/spdxtagvalue/__init__.py index a048eda0..e69de29b 100644 --- a/tern/formats/spdx/spdxtagvalue/__init__.py +++ b/tern/formats/spdx/spdxtagvalue/__init__.py @@ -1,4 +0,0 @@ -# -*- coding: utf-8 -*- -# -# Copyright (c) 2019 VMware, Inc. All Rights Reserved. -# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/formats/spdx/spdxtagvalue/generator.py b/tern/formats/spdx/spdxtagvalue/generator.py old mode 100755 new mode 100644 index e67e4c5d..db70d7d8 --- a/tern/formats/spdx/spdxtagvalue/generator.py +++ b/tern/formats/spdx/spdxtagvalue/generator.py @@ -1,148 +1,23 @@ # -*- coding: utf-8 -*- # -# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved. +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. # SPDX-License-Identifier: BSD-2-Clause """ -SPDX document generator +SPDX Tag-Value document generator """ -import datetime -import logging +from typing import List -from tern.formats.spdx.spdx import SPDX -from tern.formats.spdx import spdx_common -from tern.utils.general import get_git_rev_or_version -from tern.utils import constants -from tern.formats.spdx.spdxtagvalue import formats as spdx_formats -from tern.formats.spdx.spdxtagvalue import image_helpers as mhelpers +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer from tern.formats import generator - - -# global logger -logger = logging.getLogger(constants.logger_name) - - -def get_document_namespace(image_obj): - '''Given the image object, return a unique SPDX document uri. - This is a combination of the tool name and version, the image name - and the uuid''' - return spdx_formats.document_namespace.format( - version=get_git_rev_or_version()[1], image=image_obj.name, - uuid=spdx_common.get_uuid()) - - -def get_document_block(image_obj): - '''Return document related SPDX tag-values''' - block = spdx_formats.spdx_version + '\n' - block = block + spdx_formats.data_license + '\n' - block = block + spdx_formats.spdx_id + '\n' - block = block + spdx_formats.document_name.format( - image_name=image_obj.name) + '\n' - block = block + get_document_namespace(image_obj) + '\n' - block = block + spdx_formats.license_list_version + '\n' - block = block + spdx_formats.creator.format( - version=get_git_rev_or_version()[1]) + '\n' - block = block + spdx_formats.created.format( - timestamp=datetime.datetime.utcnow().strftime( - "%Y-%m-%dT%H:%M:%SZ")) + '\n' - block = block + spdx_formats.document_comment + '\n' - return block +from tern.formats.spdx.spdx_formats_helper import get_spdx_from_image_list, get_spdx_from_layer class SpdxTagValue(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): - '''Generate an SPDX document - WARNING: This assumes that the list consists of one image or the base - image and a stub image, in which case, the information in the stub - image is not applicable in the SPDX case as it is an empty image - object with no metadata as nothing got built. - The whole document should be stored in a string which can be written - to a file using the write_report function in report.py - First convert the image object into a dictionary. The dictionary - should be in this form: - image:{ - origins: [...] - layers: [ - {origins: [...], - packages: [ - {name: package1,..origins: [...]}, - {name: package2,..origins: [...]},..], - files: [ - {name: file1,..origins: [...]}, - {name: file2,..origins: [...]},..]} - ...]} - Then convert this into a flat format starting from top to bottom - So: - ## image - List all the tag-values here - make a PackageComment: - - ## relationships - spdx-ref CONTAINS layer1 - spdx-ref CONTAINS layer2 - ... - - ## layer1 - List all the tag-values here - make a PackageComment here - - ## if layer1 has files analyzed - ### extra package info here - ### file level information here - - ## if not then package relationships - spdx-ref CONTAINS package1 - spdx-ref CONTAINS package2 - .... - - # layer2 - tag-values - PackageComment - - # relationships - spdx-ref HAS_PREREQUISITE layer1 - spdx-ref CONTAINS package3 - spdx-ref CONTAINS package4 - - .... - - # package1 - tag-values - PackageComment - - # package2 - - # package3 - - # package4 - - - Everything in Origins can be in a tag-value format as - PackageComment: - - For the sake of SPDX, an image is a 'Package' which 'CONTAINS' each - layer which is also a 'Package' which 'CONTAINS' the real Package''' - logger.debug("Generating SPDX document...") - report = '' - - # we still don't know how SPDX documents could represent multiple - # images. Hence we will assume only one image is analyzed and the - # input is a list of length 1 - image_obj = image_obj_list[0] - template = SPDX() - - # first part is the document tag-value - # this doesn't change at all - report += get_document_block(image_obj) + '\n' - - # this is the image part - # this will bring in layer and package information - report += mhelpers.get_image_block(image_obj, template) + '\n' - - return report + def generate(self, image_obj_list: List[Image], spdx_version: str, print_inclusive=False) -> str: + return get_spdx_from_image_list(image_obj_list, "Tag-Value", spdx_version) - def generate_layer(self, layer_obj): # pylint: disable=unused-argument - """Currently Unsupported. Provide debug statement""" - logger.critical("Generating SPDX tag-value documents at container " - "build time is currently unsupported") + def generate_layer(self, layer: ImageLayer, spdx_version: str) -> str: + return get_spdx_from_layer(layer, "Tag-Value", spdx_version) diff --git a/tern/formats/spdx/spdxxml/__init__.py b/tern/formats/spdx/spdxxml/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tern/formats/spdx/spdxxml/generator.py b/tern/formats/spdx/spdxxml/generator.py new file mode 100644 index 00000000..3227e109 --- /dev/null +++ b/tern/formats/spdx/spdxxml/generator.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +SPDX XML document generator +""" +from typing import List + +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.formats import generator +from tern.formats.spdx.spdx_formats_helper import get_spdx_from_image_list, get_spdx_from_layer + + +class SpdxXML(generator.Generate): + def generate(self, image_obj_list: List[Image], spdx_version: str, print_inclusive=False) -> str: + return get_spdx_from_image_list(image_obj_list, "XML", spdx_version) + + def generate_layer(self, layer: ImageLayer, spdx_version: str) -> str: + return get_spdx_from_layer(layer, "XML", spdx_version) diff --git a/tern/formats/spdx/spdxyaml/__init__.py b/tern/formats/spdx/spdxyaml/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tern/formats/spdx/spdxyaml/generator.py b/tern/formats/spdx/spdxyaml/generator.py new file mode 100644 index 00000000..4f423995 --- /dev/null +++ b/tern/formats/spdx/spdxyaml/generator.py @@ -0,0 +1,22 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 VMWare, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +SPDX YAML document generator +""" +from typing import List + +from tern.classes.image import Image +from tern.classes.image_layer import ImageLayer +from tern.formats import generator +from tern.formats.spdx.spdx_formats_helper import get_spdx_from_image_list, get_spdx_from_layer + + +class SpdxYAML(generator.Generate): + def generate(self, image_obj_list: List[Image], spdx_version: str, print_inclusive=False) -> str: + return get_spdx_from_image_list(image_obj_list, "YAML", spdx_version) + + def generate_layer(self, layer: ImageLayer, spdx_version: str) -> str: + return get_spdx_from_layer(layer, "YAML", spdx_version) diff --git a/tern/formats/spdx_legacy/__init__.py b/tern/formats/spdx_legacy/__init__.py new file mode 100644 index 00000000..a048eda0 --- /dev/null +++ b/tern/formats/spdx_legacy/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/formats/spdx_legacy/spdx.py b/tern/formats/spdx_legacy/spdx.py new file mode 100644 index 00000000..d9caff75 --- /dev/null +++ b/tern/formats/spdx_legacy/spdx.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +from tern.classes.template import Template + + +class SPDX(Template): + '''This is the SPDX Template class + It provides mappings for the SPDX tag-value document format''' + + def file_data(self): + return {'path': 'FileName', + 'short_file_type': 'FileType'} + + def package(self): + return {'name': 'PackageName', + 'version': 'PackageVersion', + 'pkg_license': 'PackageLicenseDeclared', + 'copyright': 'PackageCopyrightText', + 'download_url': 'PackageDownloadLocation', + 'src_name': 'SourcePackageName', + 'src_version': 'SourcePackageVersion', + 'pkg_supplier': 'PackageSupplier'} + + def image_layer(self): + return {'tar_file': 'PackageFileName'} + + def image(self): + return {'name': 'PackageName', + 'tag': 'PackageVersion'} diff --git a/tern/formats/spdx/spdx_common.py b/tern/formats/spdx_legacy/spdx_common.py similarity index 99% rename from tern/formats/spdx/spdx_common.py rename to tern/formats/spdx_legacy/spdx_common.py index 85a68097..73cb0715 100644 --- a/tern/formats/spdx/spdx_common.py +++ b/tern/formats/spdx_legacy/spdx_common.py @@ -15,7 +15,7 @@ from license_expression import get_spdx_licensing from tern.utils import constants -from tern.formats.spdx.spdxtagvalue import formats as spdx_formats +from tern.formats.spdx_legacy.spdxtagvalue import formats as spdx_formats from packageurl import PackageURL # global logger diff --git a/tern/formats/spdx_legacy/spdxjson/__init__.py b/tern/formats/spdx_legacy/spdxjson/__init__.py new file mode 100644 index 00000000..605cea42 --- /dev/null +++ b/tern/formats/spdx_legacy/spdxjson/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/formats/spdx/spdxjson/consumer.py b/tern/formats/spdx_legacy/spdxjson/consumer.py similarity index 100% rename from tern/formats/spdx/spdxjson/consumer.py rename to tern/formats/spdx_legacy/spdxjson/consumer.py diff --git a/tern/formats/spdx/spdxjson/file_helpers.py b/tern/formats/spdx_legacy/spdxjson/file_helpers.py similarity index 98% rename from tern/formats/spdx/spdxjson/file_helpers.py rename to tern/formats/spdx_legacy/spdxjson/file_helpers.py index af50532d..897e2def 100644 --- a/tern/formats/spdx/spdxjson/file_helpers.py +++ b/tern/formats/spdx_legacy/spdxjson/file_helpers.py @@ -7,7 +7,7 @@ File level helpers for SPDX JSON document generator """ -from tern.formats.spdx import spdx_common +from tern.formats.spdx_legacy import spdx_common def get_file_contributors(filedata): diff --git a/tern/formats/spdx/spdxjson/formats.py b/tern/formats/spdx_legacy/spdxjson/formats.py similarity index 100% rename from tern/formats/spdx/spdxjson/formats.py rename to tern/formats/spdx_legacy/spdxjson/formats.py diff --git a/tern/formats/spdx_legacy/spdxjson/generator.py b/tern/formats/spdx_legacy/spdxjson/generator.py new file mode 100644 index 00000000..9436c894 --- /dev/null +++ b/tern/formats/spdx_legacy/spdxjson/generator.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2021 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +SPDX JSON document generator +""" + +import json +import logging + +from tern.formats.spdx_legacy.spdx import SPDX +from tern.formats.spdx_legacy import spdx_common +from tern.utils.general import get_git_rev_or_version +from tern.utils import constants +from tern.formats.spdx_legacy.spdxjson import formats as json_formats +from tern.formats.spdx_legacy.spdxjson import image_helpers as mhelpers +from tern.formats.spdx_legacy.spdxjson import file_helpers as fhelpers +from tern.formats.spdx_legacy.spdxjson import layer_helpers as lhelpers +from tern.formats.spdx_legacy.spdxjson import package_helpers as phelpers +from tern.formats import generator + + +# global logger +logger = logging.getLogger(constants.logger_name) + + +def get_document_namespace(image_obj): + '''Given the image object, return a unique SPDX document uri. + This is a combination of the tool name and version, the image name + and the uuid''' + return json_formats.document_namespace.format( + version=get_git_rev_or_version()[1], image=image_obj.name, + uuid=spdx_common.get_uuid()) + + +def get_document_namespace_snapshot(timestamp): + """Get the document namespace for the container image snapshot. We pass + the timestamp so we have a common timestamp across the whole document""" + return json_formats.document_namespace_snapshot.format( + timestamp=timestamp, uuid=spdx_common.get_uuid()) + + +def get_document_dict(image_obj, template): + '''Return document info as a dictionary''' + docu_dict = { + 'SPDXID': json_formats.spdx_id, + 'spdxVersion': json_formats.spdx_version, + 'creationInfo': { + 'created': json_formats.created.format( + timestamp=spdx_common.get_timestamp()), + 'creators': [json_formats.creator.format( + version=get_git_rev_or_version()[1])], + 'licenseListVersion': json_formats.license_list_version, + }, + 'name': json_formats.document_name.format(image_name=image_obj.name), + 'dataLicense': json_formats.data_license, + 'comment': json_formats.document_comment, + 'documentNamespace': get_document_namespace(image_obj), + 'documentDescribes': [spdx_common.get_image_spdxref(image_obj)], + 'packages': [ + # image dict will be a single dictionary + # we'll add the layer and package dicts later if available + mhelpers.get_image_dict(image_obj, template)], + 'relationships': lhelpers.get_image_layer_relationships(image_obj) + } + + # Add list of layer dictionaries to packages list + docu_dict['packages'] += lhelpers.get_layers_list(image_obj) + + # Add list of package dictionaries to packages list, if they exist + pkgs_dict_list = phelpers.get_packages_list(image_obj, template) + if pkgs_dict_list: + docu_dict['packages'] += pkgs_dict_list + + # Add list of file dictionaries, if they exist + files = fhelpers.get_files_list(image_obj, template) + if files: + docu_dict['files'] = files + + # Add package and file extracted license texts, if they exist + extracted_texts = mhelpers.get_image_extracted_licenses(image_obj) + if extracted_texts: + docu_dict['hasExtractedLicensingInfos'] = extracted_texts + + return docu_dict + + +def get_document_dict_snapshot(layer_obj, template): + """This is the SPDX document containing just the packages found at + container build time""" + timestamp = spdx_common.get_timestamp() + docu_dict = { + 'SPDXID': json_formats.spdx_id, + 'spdxVersion': json_formats.spdx_version, + 'creationInfo': { + 'created': json_formats.created.format( + timestamp=timestamp), + 'creators': json_formats.creator.format( + version=get_git_rev_or_version()[1]), + 'licenseListVersion': json_formats.license_list_version, + }, + 'name': json_formats.document_name_snapshot, + 'dataLicense': json_formats.data_license, + 'comment': json_formats.document_comment, + 'documentNamespace': get_document_namespace_snapshot(timestamp), + # we will list all the unique package SPDXRefs here later + 'documentDescribes': [], + # these will contain just the packages as there is no layer + # package at the time of this document's generation + 'packages': [], + # we will fill in document to package ref relationships later + 'relationships': [] + } + + # Add list of package dictionaries to packages list, if they exist + pkgs_dict_list, package_refs = phelpers.get_layer_packages_list( + layer_obj, template) + if pkgs_dict_list: + docu_dict['packages'] = pkgs_dict_list + docu_dict['documentDescribes'] = package_refs + + # add the package relationships to the document + for ref in package_refs: + docu_dict['relationships'].append(json_formats.get_relationship_dict( + json_formats.spdx_id, ref, 'DESCRIBES')) + + # Add list of file dictionaries, if they exist + files = fhelpers.get_layer_files_list(layer_obj, template, timestamp) + if files: + docu_dict['files'] = files + + # Add package and file extracted license texts, if they exist + extracted_texts = lhelpers.get_layer_extracted_licenses(layer_obj) + if extracted_texts: + docu_dict['hasExtractedLicensingInfos'] = extracted_texts + + return docu_dict + + +class SpdxJSON(generator.Generate): + def generate(self, image_obj_list, spdx_version: str, print_inclusive=False): + '''Generate an SPDX document + WARNING: This assumes that the list consists of one image or the base + image and a stub image, in which case, the information in the stub + image is not applicable in the SPDX case as it is an empty image + object with no metadata as nothing got built. + The whole document should be stored in a dictionary which can be + converted to JSON and dumped to a file using the write_report function + in report.py. + + For the sake of SPDX, an image is a 'Package' which 'CONTAINS' each + layer which is also a 'Package' which 'CONTAINS' the real Packages''' + logger.debug("Generating SPDX JSON document...") + + # we still don't know how SPDX documents could represent multiple + # images. Hence we will assume only one image is analyzed and the + # input is a list of length 1 + image_obj = image_obj_list[0] + + template = SPDX() + report = get_document_dict(image_obj, template) + + return json.dumps(report) + + def generate_layer(self, layer, spdx_version: str): # pylint: disable=unused-argument + """Generate an SPDX document containing package and file information + at container build time""" + logger.debug("Generating SPDX JSON document...") + template = SPDX() + report = get_document_dict_snapshot(layer, template) + return json.dumps(report) diff --git a/tern/formats/spdx/spdxjson/image_helpers.py b/tern/formats/spdx_legacy/spdxjson/image_helpers.py similarity index 96% rename from tern/formats/spdx/spdxjson/image_helpers.py rename to tern/formats/spdx_legacy/spdxjson/image_helpers.py index d95fca4f..a5c92f29 100644 --- a/tern/formats/spdx/spdxjson/image_helpers.py +++ b/tern/formats/spdx_legacy/spdxjson/image_helpers.py @@ -7,8 +7,8 @@ Helper functions for image level JSON SPDX document dictionaries Images for SPDX act like a Package """ -from tern.formats.spdx import spdx_common -from tern.formats.spdx.spdxjson import formats as json_formats +from tern.formats.spdx_legacy import spdx_common +from tern.formats.spdx_legacy.spdxjson import formats as json_formats def get_image_extracted_licenses(image_obj): diff --git a/tern/formats/spdx/spdxjson/layer_helpers.py b/tern/formats/spdx_legacy/spdxjson/layer_helpers.py similarity index 98% rename from tern/formats/spdx/spdxjson/layer_helpers.py rename to tern/formats/spdx_legacy/spdxjson/layer_helpers.py index b57d87fe..a0999c21 100644 --- a/tern/formats/spdx/spdxjson/layer_helpers.py +++ b/tern/formats/spdx_legacy/spdxjson/layer_helpers.py @@ -11,8 +11,8 @@ import os from tern.utils import constants -from tern.formats.spdx import spdx_common -from tern.formats.spdx.spdxjson import formats as json_formats +from tern.formats.spdx_legacy import spdx_common +from tern.formats.spdx_legacy.spdxjson import formats as json_formats from tern.report import content diff --git a/tern/formats/spdx/spdxjson/package_helpers.py b/tern/formats/spdx_legacy/spdxjson/package_helpers.py similarity index 97% rename from tern/formats/spdx/spdxjson/package_helpers.py rename to tern/formats/spdx_legacy/spdxjson/package_helpers.py index 1f96af95..5fb66415 100644 --- a/tern/formats/spdx/spdxjson/package_helpers.py +++ b/tern/formats/spdx_legacy/spdxjson/package_helpers.py @@ -8,8 +8,8 @@ """ from tern.report import content -from tern.formats.spdx import spdx_common -from tern.formats.spdx.spdxjson import formats as json_formats +from tern.formats.spdx_legacy import spdx_common +from tern.formats.spdx_legacy.spdxjson import formats as json_formats def get_package_comment(package): diff --git a/tern/formats/spdx_legacy/spdxtagvalue/__init__.py b/tern/formats/spdx_legacy/spdxtagvalue/__init__.py new file mode 100644 index 00000000..a048eda0 --- /dev/null +++ b/tern/formats/spdx_legacy/spdxtagvalue/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause diff --git a/tern/formats/spdx/spdxtagvalue/file_helpers.py b/tern/formats/spdx_legacy/spdxtagvalue/file_helpers.py similarity index 98% rename from tern/formats/spdx/spdxtagvalue/file_helpers.py rename to tern/formats/spdx_legacy/spdxtagvalue/file_helpers.py index 7f0d06a0..e4e86492 100644 --- a/tern/formats/spdx/spdxtagvalue/file_helpers.py +++ b/tern/formats/spdx_legacy/spdxtagvalue/file_helpers.py @@ -7,7 +7,7 @@ File level helpers for SPDX tag-value document generator """ -from tern.formats.spdx import spdx_common +from tern.formats.spdx_legacy import spdx_common def get_file_comment(filedata): diff --git a/tern/formats/spdx/spdxtagvalue/formats.py b/tern/formats/spdx_legacy/spdxtagvalue/formats.py similarity index 100% rename from tern/formats/spdx/spdxtagvalue/formats.py rename to tern/formats/spdx_legacy/spdxtagvalue/formats.py diff --git a/tern/formats/spdx_legacy/spdxtagvalue/generator.py b/tern/formats/spdx_legacy/spdxtagvalue/generator.py new file mode 100755 index 00000000..11ccc5ea --- /dev/null +++ b/tern/formats/spdx_legacy/spdxtagvalue/generator.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# +# Copyright (c) 2019-2020 VMware, Inc. All Rights Reserved. +# SPDX-License-Identifier: BSD-2-Clause + +""" +SPDX document generator +""" + +import datetime +import logging + +from tern.formats.spdx_legacy.spdx import SPDX +from tern.formats.spdx_legacy import spdx_common +from tern.utils.general import get_git_rev_or_version +from tern.utils import constants +from tern.formats.spdx_legacy.spdxtagvalue import formats as spdx_formats +from tern.formats.spdx_legacy.spdxtagvalue import image_helpers as mhelpers +from tern.formats import generator + + +# global logger +logger = logging.getLogger(constants.logger_name) + + +def get_document_namespace(image_obj): + '''Given the image object, return a unique SPDX document uri. + This is a combination of the tool name and version, the image name + and the uuid''' + return spdx_formats.document_namespace.format( + version=get_git_rev_or_version()[1], image=image_obj.name, + uuid=spdx_common.get_uuid()) + + +def get_document_block(image_obj): + '''Return document related SPDX tag-values''' + block = spdx_formats.spdx_version + '\n' + block = block + spdx_formats.data_license + '\n' + block = block + spdx_formats.spdx_id + '\n' + block = block + spdx_formats.document_name.format( + image_name=image_obj.name) + '\n' + block = block + get_document_namespace(image_obj) + '\n' + block = block + spdx_formats.license_list_version + '\n' + block = block + spdx_formats.creator.format( + version=get_git_rev_or_version()[1]) + '\n' + block = block + spdx_formats.created.format( + timestamp=datetime.datetime.utcnow().strftime( + "%Y-%m-%dT%H:%M:%SZ")) + '\n' + block = block + spdx_formats.document_comment + '\n' + return block + + +class SpdxTagValue(generator.Generate): + def generate(self, image_obj_list, spdx_version: str, print_inclusive=False): + '''Generate an SPDX document + WARNING: This assumes that the list consists of one image or the base + image and a stub image, in which case, the information in the stub + image is not applicable in the SPDX case as it is an empty image + object with no metadata as nothing got built. + The whole document should be stored in a string which can be written + to a file using the write_report function in report.py + First convert the image object into a dictionary. The dictionary + should be in this form: + image:{ + origins: [...] + layers: [ + {origins: [...], + packages: [ + {name: package1,..origins: [...]}, + {name: package2,..origins: [...]},..], + files: [ + {name: file1,..origins: [...]}, + {name: file2,..origins: [...]},..]} + ...]} + Then convert this into a flat format starting from top to bottom + So: + ## image + List all the tag-values here + make a PackageComment: + + ## relationships + spdx-ref CONTAINS layer1 + spdx-ref CONTAINS layer2 + ... + + ## layer1 + List all the tag-values here + make a PackageComment here + + ## if layer1 has files analyzed + ### extra package info here + ### file level information here + + ## if not then package relationships + spdx-ref CONTAINS package1 + spdx-ref CONTAINS package2 + .... + + # layer2 + tag-values + PackageComment + + # relationships + spdx-ref HAS_PREREQUISITE layer1 + spdx-ref CONTAINS package3 + spdx-ref CONTAINS package4 + + .... + + # package1 + tag-values + PackageComment + + # package2 + + # package3 + + # package4 + + + Everything in Origins can be in a tag-value format as + PackageComment: + + For the sake of SPDX, an image is a 'Package' which 'CONTAINS' each + layer which is also a 'Package' which 'CONTAINS' the real Package''' + logger.debug("Generating SPDX document...") + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported in this implementation." + " Please use the new SPDX implementation.") + report = '' + + # we still don't know how SPDX documents could represent multiple + # images. Hence we will assume only one image is analyzed and the + # input is a list of length 1 + image_obj = image_obj_list[0] + template = SPDX() + + # first part is the document tag-value + # this doesn't change at all + report += get_document_block(image_obj) + '\n' + + # this is the image part + # this will bring in layer and package information + report += mhelpers.get_image_block(image_obj, template) + '\n' + + return report + + def generate_layer(self, layer_obj, spdx_version: str): # pylint: disable=unused-argument + """Currently Unsupported. Provide debug statement""" + logger.critical("Generating SPDX tag-value documents at container " + "build time is currently unsupported") diff --git a/tern/formats/spdx/spdxtagvalue/image_helpers.py b/tern/formats/spdx_legacy/spdxtagvalue/image_helpers.py similarity index 95% rename from tern/formats/spdx/spdxtagvalue/image_helpers.py rename to tern/formats/spdx_legacy/spdxtagvalue/image_helpers.py index 8e4164d0..c59b50a9 100644 --- a/tern/formats/spdx/spdxtagvalue/image_helpers.py +++ b/tern/formats/spdx_legacy/spdxtagvalue/image_helpers.py @@ -7,10 +7,10 @@ Helper functions for image level SPDX document blocks Images for SPDX act like a Package """ -from tern.formats.spdx.spdxtagvalue import formats as spdx_formats -from tern.formats.spdx.spdxtagvalue import layer_helpers as lhelpers -from tern.formats.spdx.spdxtagvalue import package_helpers as phelpers -from tern.formats.spdx import spdx_common +from tern.formats.spdx_legacy.spdxtagvalue import formats as spdx_formats +from tern.formats.spdx_legacy.spdxtagvalue import layer_helpers as lhelpers +from tern.formats.spdx_legacy.spdxtagvalue import package_helpers as phelpers +from tern.formats.spdx_legacy import spdx_common def get_image_layer_relationships(image_obj): diff --git a/tern/formats/spdx/spdxtagvalue/layer_helpers.py b/tern/formats/spdx_legacy/spdxtagvalue/layer_helpers.py similarity index 96% rename from tern/formats/spdx/spdxtagvalue/layer_helpers.py rename to tern/formats/spdx_legacy/spdxtagvalue/layer_helpers.py index abfc2ae8..3ff2ff4f 100644 --- a/tern/formats/spdx/spdxtagvalue/layer_helpers.py +++ b/tern/formats/spdx_legacy/spdxtagvalue/layer_helpers.py @@ -10,10 +10,10 @@ import logging import os -from tern.formats.spdx import spdx_common -from tern.formats.spdx.spdxtagvalue import formats as spdx_formats +from tern.formats.spdx_legacy import spdx_common +from tern.formats.spdx_legacy.spdxtagvalue import formats as spdx_formats from tern.utils import constants -from tern.formats.spdx.spdxtagvalue import file_helpers as fhelpers +from tern.formats.spdx_legacy.spdxtagvalue import file_helpers as fhelpers from tern.report import content # global logger diff --git a/tern/formats/spdx/spdxtagvalue/package_helpers.py b/tern/formats/spdx_legacy/spdxtagvalue/package_helpers.py similarity index 97% rename from tern/formats/spdx/spdxtagvalue/package_helpers.py rename to tern/formats/spdx_legacy/spdxtagvalue/package_helpers.py index 503bb9f4..d2523bfa 100644 --- a/tern/formats/spdx/spdxtagvalue/package_helpers.py +++ b/tern/formats/spdx_legacy/spdxtagvalue/package_helpers.py @@ -7,8 +7,8 @@ Helper functions for packages in SPDX document """ -from tern.formats.spdx.spdxtagvalue import formats as spdx_formats -from tern.formats.spdx import spdx_common +from tern.formats.spdx_legacy.spdxtagvalue import formats as spdx_formats +from tern.formats.spdx_legacy import spdx_common from tern.report import content diff --git a/tern/formats/yaml/generator.py b/tern/formats/yaml/generator.py index e75ebdec..a4c7204e 100644 --- a/tern/formats/yaml/generator.py +++ b/tern/formats/yaml/generator.py @@ -6,12 +6,15 @@ """ YAML document generator """ +import logging import yaml from tern.report import formats +from tern.utils import constants from tern.utils.general import get_git_rev_or_version from tern.formats import generator +logger = logging.getLogger(constants.logger_name) def print_yaml_report(image): '''Given an image object, create a yaml report''' @@ -21,14 +24,20 @@ def print_yaml_report(image): class YAML(generator.Generate): - def generate(self, image_obj_list, print_inclusive=False): + def generate(self, image_obj_list, spdx_version: str, print_inclusive=False): '''Generate a yaml report''' + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for YAML.") + report = formats.disclaimer_yaml.format( version_info=get_git_rev_or_version()) for image in image_obj_list: report = report + print_yaml_report(image) return report - def generate_layer(self, layer): + def generate_layer(self, layer, spdx_version: str): """Generate a yaml report for the given layer object""" + if spdx_version is not None: + logger.warning("The SPDX version parameter is not supported for YAML.") + return yaml.dump(layer.to_dict(), default_flow_style=False) diff --git a/tern/report/report.py b/tern/report/report.py index 9e84eedc..1a175794 100644 --- a/tern/report/report.py +++ b/tern/report/report.py @@ -39,11 +39,11 @@ def generate_report(args, *images): '''Generate a report based on the command line options''' if args.report_format: return generate_format( - images, args.report_format, args.print_inclusive) - return generate_format(images, 'default', args.print_inclusive) + images, args.report_format, args.spdx_version, args.print_inclusive) + return generate_format(images, 'default', args.spdx_version, args.print_inclusive) -def generate_format(images, format_string, print_inclusive): +def generate_format(images, format_string, spdx_version, print_inclusive): '''Generate a report in the format of format_string given one or more image objects. Here we will load the required module and run the generate function to get back a report''' @@ -53,12 +53,12 @@ def generate_format(images, format_string, print_inclusive): name=format_string, invoke_on_load=True, ) - return mgr.driver.generate(images, print_inclusive) + return mgr.driver.generate(images, spdx_version, print_inclusive) except NoMatches: return None -def generate_format_layer(layer, format_string): +def generate_format_layer(layer, format_string, spdx_version): """Generate a report in the format of format_string given one layer object. This is similar to the generate_format function""" try: @@ -67,7 +67,7 @@ def generate_format_layer(layer, format_string): name=format_string, invoke_on_load=True, ) - return mgr.driver.generate_layer(layer) + return mgr.driver.generate_layer(layer, spdx_version) except NoMatches: return None @@ -96,9 +96,9 @@ def report_layer(layer, args): """Generate a report for one layer object""" layer_report = "" if args.report_format: - layer_report = generate_format_layer(layer, args.report_format) + layer_report = generate_format_layer(layer, args.report_format, args.spdx_version) else: - layer_report = generate_format_layer(layer, 'default') + layer_report = generate_format_layer(layer, 'default', args.spdx_version) if not layer_report: logger.error( "Unable to generate %s report type for layer", args.report_format)