diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..adb9f61 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +cli/scai-venv diff --git a/README.md b/README.md new file mode 100644 index 0000000..22cca8a --- /dev/null +++ b/README.md @@ -0,0 +1,70 @@ +# Software Supply Chain Attribute Integrity + +The Software Supply Chain Attribute Integrity, or SCAI (pronounced "sky"), specification proposes a data +format for capturing functional attribute and integrity information about software artifacts and their supply +chain. SCAI data can be associated with executable binaries, statically- or dynamically-linked libraries, +software packages, container images, software toolchains, and compute environments. + +As such, SCAI is intended to be implemented as part of an existing software supply chain attestation +framework by software development tools or services (e.g., builders, CI/CD pipelines, software analysis tools) +seeking to capture more granular information about the attributes and behavior of the software artifacts they +produce. That is, SCAI assumes that implementers will have appropriate processes and tooling in place for +capturing other types of software supply chain metadata, which can be extended to add support for SCAI. + +For more details and examples, see the full [specification document](). + +## Schema + +SCAI provides pluggable [schema](https://github.com/intel-sandbox/mmelara.supply-chain-attribute-integrity/tree/main/schema) to be used in conjunction +with existing software supply chain metadata schema. + +Currently supported frameworks: +* [in-toto attestation](https://github.com/in-toto/attestation/tree/main/spec) + +## Documentation + +All documentation can be found under [docs/](https://github.com/intel-sandbox/mmelara.supply-chain-attribute-integrity/tree/main/docs). + +## Usage + +The general flow is to first generate one or more Attribute +Assertions and then generate a SCAI Report. The +[examples](https://github.com/intel-sandbox/mmelara.supply-chain-attribute-integrity/tree/main/examples) show +how SCAI metadata is generated in a few different use cases. + +Note, that the CLI tools do not current generate **signed** +SCAI Reports. + +#### CLI Environment Setup + +To run the SCAI CLI tools and examples, the following packages +are required on a minimal Ubuntu system. We assume Ubuntu 20.04. + +``` +sudo apt install git python3 python3-dev python3-venv virtualenv build-essential +``` + +Then, set up the Python virtualenv for the SCAI CLI tools. + +``` +make -C cli +``` + +#### Basic CLI Invocation + +``` +cd cli +source scai-venv/bin/activate +``` + +To generate a basic attribute assertion: +``` +./scai-attr-assertion -a -o [-e ] [-c ] +``` + +To generate a basic SCAI Report with in-toto Link metadata: +``` +./scai-report -i -a -c +``` + +For a full list of CLI tool options, invoke with the `-h` option. \ No newline at end of file diff --git a/cli/Makefile b/cli/Makefile new file mode 100644 index 0000000..1ae7437 --- /dev/null +++ b/cli/Makefile @@ -0,0 +1,43 @@ +# Copyright 2020 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +DSTDIR=scai-venv + +PY_VERSION=${shell python3 --version | sed 's/Python \(3\.[0-9]\).*/\1/'} +PYTHON_DIR=$(DSTDIR)/lib/python$(PY_VERSION)/site-packages/ + +all : environment + +$(PYTHON_DIR) : + @echo INSTALL SCAI API + python3 -m venv $(DSTDIR) + . $(abspath $(DSTDIR)/bin/activate) && pip install --upgrade pip + . $(abspath $(DSTDIR)/bin/activate) && pip install --upgrade wheel + . $(abspath $(DSTDIR)/bin/activate) && pip install --upgrade in-toto + . $(abspath $(DSTDIR)/bin/activate) && pip install ../python + +$(DSTDIR) : + @echo CREATE SCAI VENV DIRECTORY $(DSTDIR) + mkdir -p $(DSTDIR) + +environment: $(DSTDIR) $(PYTHON_DIR) + +clean: + @echo REMOVE SCAI VENV AND PYTHON LIB DIRS + @rm -rf $(DSTDIR) __pycache__ + @cd ../python; rm -rf build dist *.egg-info + +.phony : all +.phony : clean +.phony : environment diff --git a/cli/scai-attr-assertion b/cli/scai-attr-assertion new file mode 100755 index 0000000..8a537e7 --- /dev/null +++ b/cli/scai-attr-assertion @@ -0,0 +1,116 @@ +#!/usr/bin/env python +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" + + scai-attr-assertion + + + Marcela Melara + + + See LICENSE for licensing information. + + + Command-line interface for generating ITE-9 SCAI Attribute Assertions. +""" + +import argparse +import subprocess +import json +import os +import attr + +from scai.attribute_assertion import AttributeAssertion +from scai.object_reference import ObjectReference +from scai.utility import load_json_file + +from securesystemslib.util import get_file_hashes + +def Main(): + parser = argparse.ArgumentParser(allow_abbrev=False) + + parser.add_argument('-a', '--attribute', help='Attribute keyword', type=str, required=True) + parser.add_argument('-c', '--conditions', help='Conditions string (arbitrary JSON encoding)', type=json.loads) + parser.add_argument('-e', '--evidence', help='Filename of json-encoded evidence descriptor', type=str) + parser.add_argument('-t', '--target', help='Filename of target artifact', type=str) + parser.add_argument('--target-name', help='Name for target artifact', type=str) + parser.add_argument('--target-type', help='Type of target artifact', type=str) + parser.add_argument('--target-location', help='Location URI of target artifact', type=str) + parser.add_argument('-o', '--outfile', help='Filename to write out this assertion object', type=str, required=True) + parser.add_argument('--target-dir', help='Directory for searching target files', type=str) + parser.add_argument('--evidence-dir', help='Directory for searching evidence files', type=str) + parser.add_argument('--conditions-dir', help='Directory for searching conditions files', type=str) + parser.add_argument('--out-dir', help='Directory for storing generated files', type=str) + parser.add_argument('--pretty-print', help='Flag to pretty-print all json before storing', action='store_true') + + options = parser.parse_args() + + # Create assertion dict + assertion_dict = { "attribute": options.attribute } + + # Read conditions + if options.conditions: + assertion_dict['conditions'] = options.conditions # FIXME: this should be a file + + # Read evidencee + if options.evidence: + evidence_dict = load_json_file(options.evidence, search_path=options.evidence_dir) + + # this validates the obj ref format + evidence = ObjectReference.read(evidence_dict) + assertion_dict['evidence'] = evidence.to_dict() + + # Create target reference (optional) + if options.target: + target_dict = '.' + if options.target_dict: + target_dict = options.target_dict + + target_file = target_dict + '/' + options.target + target_digest_dict = get_file_hashes(target_file) + + target_dict = {} + target_dict['name'] = options.target + target_dict['digest'] = target_digest_dict + + if options.target_name: + target_dict['name'] = options.target_name + + if options.target_location: + target_dict['locationURI'] = options.target_location + + if options.target_type: + target_dict['objectType'] = options.target_type + + # this validates the obj ref format + target = ObjectReference.read(target_dict) + + assertion_dict['target'] = target.to_dict() + + # this validates the assertion format + assertion = AttributeAssertion.read(assertion_dict) + assertion_dict = assertion.to_dict() + + # Write out the assertions file + out_dir = '.' + if options.out_dir: + out_dir = options.out_dir + + outfile = options.outfile + if not outfile.endswith('.json'): + outfile += '.json' + + indent = 0 + if options.pretty_print: + indent = 4 + + assertion_file = out_dir + '/' + outfile + with open(assertion_file, 'w+') as afile : + afile.write(json.dumps(assertion_dict, indent=indent)) + + print('Wrote attribute assertion to %s' % assertion_file) + +if __name__ == "__main__": + Main() diff --git a/cli/scai-report b/cli/scai-report new file mode 100755 index 0000000..97df329 --- /dev/null +++ b/cli/scai-report @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 +# Copyright 2021 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" + + scai-report + + + Marcela Melara + + + See LICENSE for licensing information. + + + Command-line interface for generating ITE-9 SCAI Reports. +""" + +import argparse +import subprocess +import json +import os +import attr + +from scai.report import Report, SCAI_REPORT_TYPE +from scai.attribute_assertion import AttributeAssertion +from scai.object_reference import ObjectReference +from scai.utility import load_json_file, parse_pem_file + +from in_toto.runlib import record_artifacts_as_dict, in_toto_run + +def Main(): + parser = argparse.ArgumentParser(allow_abbrev=False) + + parser.add_argument('-i', '--input-artifacts', type=str, help='Filenames of input artifacts', nargs='+', required=True) + parser.add_argument('-o', '--output-artifacts', type=str, help='Filenames of output artifacts', nargs='+') + parser.add_argument('-a', '--attribute-assertions', help='Filename of JSON files for output artifact attribute assertions', nargs='+', type=str, required=True) + parser.add_argument('-p', '--producer-attributes', help='Filename of JSON files for producer attribute assertions', nargs='+', type=str) + parser.add_argument('-c', '--command', help='Command to invoke the tool', type=str, required=True) + parser.add_argument('--metadata-dir', help='Directory for searching/storing metadata files', type=str) + parser.add_argument('--artifact-dir', help='Directory for searching/storing input/output artifacts', type=str) + parser.add_argument('--pretty-print', help='Flag to pretty-print all json before storing', action='store_true') + + options = parser.parse_args() + + print('Generating SCAI Attribute Report for operation: %s' % options.command) + + # assume all SCAI verifiable objects are in the same location + # still use list for compatibility with file util functions + metadata_dir = '.' + if options.metadata_dir: + metadata_dir = options.metadata_dir + + # Execute the command and get the operation metadata + # in in-toto Link format + command = options.command.split(' ') + command_list = [os.path.expanduser(arg) if arg.startswith('~') else arg for arg in command] + + intoto_link = in_toto_run('test', options.input_artifacts, options.output_artifacts, command_list, record_streams=True, base_path=options.artifact_dir) + intoto_link_dict = attr.asdict(intoto_link.signed) + intoto_link_dict['_type'] = 'https://in-toto.io/Link/v0.2' + + # Generate the producer metadata for the SCAI Report + producer_metadata = {} + + # Load the producer attribute assertions + if options.producer_attributes: + producer_assertions_list = [] + for a in options.producer_attributes: + assertion_dict = load_json_file(a, metadata_dir) + assertion = AttributeAssertion.read(assertion_dict) + del assertion_dict['_type'] + producer_assertions_list.append(assertion_dict) + + producer_metadata['attributes'] = producer_assertions_list + + # Load the subject attribute assertions + subject_assertions_list = [] + for a in options.attribute_assertions: + assertion_dict = load_json_file(a, metadata_dir) + assertion = AttributeAssertion.read(assertion_dict) + subject_assertions_list.append(assertion_dict) + + scai_report = Report(subjectAttributes=subject_assertions_list, producer=producer_metadata) + scai_report_dict = attr.asdict(scai_report) + del scai_report_dict['_type'] + + # Write the SCAI metadata to file + # in-toto attestation format + statement = {} # inner in-toto attesation layer + statement['_type'] = 'https://in-toto.io/Statement/v0.1' + + # though in-toto supports bundling multiple subjects + # per statement, we want single-subject statements + for pname, pdigest in intoto_link_dict['products'].items(): + subjects = [{'name': pname, 'digest': pdigest}] + + statement['subject'] = subjects + statement['predicateType'] = SCAI_REPORT_TYPE + statement['predicate'] = scai_report_dict + statement_json = json.dumps(statement) + + report_file = metadata_dir + '/' + pname + '-scai.st' + + with open(report_file, 'w+') as rfile : + if options.pretty_print: + rfile.write(json.dumps(statement, indent=4)) + else: + rfile.write(json.dumps(statement)) + + print('Wrote in-toto statement for SCAI predicate: %s' % report_file) + + # Generate the ITE-9 in-toto Link statement for the command invocation + link_subjects = [] + for pname, pdigest in intoto_link_dict['products'].items(): + link_subjects.append({'name': pname, 'digest': pdigest}) + + del intoto_link_dict['products'] + + statement['subject'] = link_subjects + statement['predicateType'] = intoto_link_dict['_type'] + + del intoto_link_dict['_type'] + + statement['predicate'] = intoto_link_dict + statement_json = json.dumps(statement) + + report_file = metadata_dir + '/invocation-intoto.st' + + with open(report_file, 'w+') as rfile : + if options.pretty_print: + rfile.write(json.dumps(statement, indent=4)) + else: + rfile.write(json.dumps(statement)) + + print('Wrote in-toto statement for Link predicate: %s' % report_file) + +if __name__ == "__main__": + Main() diff --git a/coverity-output.txt b/coverity-output.txt new file mode 100644 index 0000000..7098a1d --- /dev/null +++ b/coverity-output.txt @@ -0,0 +1,46 @@ +Coverity Static Analysis version 2022.3.1 on Linux 4.4.0-19041-Microsoft x86_64 +Internal version numbers: 09579d0e1a p-2022.3-push-62 + +Using 8 workers as limited by CPU(s) +Looking for translation units +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Computing links for 17 translation units +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Computing virtual overrides +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Resolving dataflow directives +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Computing callgraph +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Topologically sorting 43 functions +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Preparing for source code analysis +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Running Sigma analysis +[STATUS] Computing node costs +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Running analysis +|0----------25-----------50----------75---------100| +**************************************************** +[STATUS] Exporting summaries +|0----------25-----------50----------75---------100| +**************************************************** +Analysis summary report: +------------------------ +Files analyzed : 10 Total + Python 3 : 6 + Text : 4 +Total LoC input to cov-analyze : 345 +Functions analyzed : 43 +Paths analyzed : 109 +Time taken by analysis : 00:00:14 +Defect occurrences found : 0 + diff --git a/docs/obj-reference.md b/docs/obj-reference.md new file mode 100644 index 0000000..d0e129f --- /dev/null +++ b/docs/obj-reference.md @@ -0,0 +1,61 @@ +# Object Reference + +## Motivation + +An Object Reference is designed to be a size-efficient representation of any object, artifact or metadata, +that may be included in any SW supply chain metadata. The Object Reference must allow both humans and automated +verifier programs to easily parse, identify and locate the referenced objects. + +For more details, see Section 3 of the [SCAI v0.1 specification](). + +## Schema + +``` +{ + "name": "", + "digest": { "": "VALUE", "...": "..." }, + "locationURI": "", + "objectType": "" +} + +``` + +`name` _string, required_ + +> Human-readable identifier to distinguish the referenced object. +> +> The semantics are up to the producer and consumer. Because consumers may evaluate +> the name against a policy, the name SHOULD be stable between Attestations. +> +> Use: Object lookups. + +`digest` _object ([DigestSet](https://github.com/in-toto/Attestation/blob/main/spec/field_types.md#DigestSet)), required_ + +> Collection of one or more cryptographic digests of the referenced object. +> +> Two DigestSets are considered matching if ANY of the fields match. The +> producer and consumer must agree on acceptable algorithms. If there are no +> overlapping algorithms, the object is considered not matching. +> +> Use: Integrity checks and policy evaluation. + +`locationURI` _string ([ResourceURI](https://github.com/in-toto/Attestation/blob/main/spec/field_types.md#ResourceURI)), optional_ + +> URI to the location of the referenced object. +> +> Acceptable locations (web server, local, git etc.) are up to the producer +> and consumer. To enable a consumer to automatically validate the +> referenced object, the locationURI SHOULD resolve to the object. +> +> Use: Locating and downloading the object matching the `digest`. + +`objectType` _string, optional_ + +> Indicates the type of referenced object. +> +> Acceptable object type formats are up to the producer +> and consumer. Typically, the objectType for an artifact will be its file type. +> The objectType for a metadata object will commonly be a +> data format or schema identifier. +> +> Use: Provide hint about object type, enable type-specific validation. \ No newline at end of file diff --git a/examples/README.md b/examples/README.md new file mode 100644 index 0000000..e81fc57 --- /dev/null +++ b/examples/README.md @@ -0,0 +1,9 @@ +#SCAI Examples + +This directory contains examples of a few basic use cases for SCAI metadata. + +## gcc Hello World +This example shows the SCAI metadata produced when building +a simple executable binary with gcc. + +Goto [gcc-helloworld](./gcc-helloworld) \ No newline at end of file diff --git a/examples/gcc-helloworld/.gitignore b/examples/gcc-helloworld/.gitignore new file mode 100644 index 0000000..9bee72d --- /dev/null +++ b/examples/gcc-helloworld/.gitignore @@ -0,0 +1,2 @@ +hello-world +metadata/* diff --git a/examples/gcc-helloworld/README.md b/examples/gcc-helloworld/README.md new file mode 100644 index 0000000..b8c1315 --- /dev/null +++ b/examples/gcc-helloworld/README.md @@ -0,0 +1,15 @@ +# GCC Hello World Compilation Example + +This example generates a basic SCAI Attribute Assertion +about binaries built with gcc, and a SCAI Report with +in-toto Link metadata for the gcc compiler invocation on a +hello-world.c source file. + +Assuming you have followed the general setup [instructions](../../README.md#Usage), use the scripts to +run the example: + +``` +./run-example.sh +``` + +The resulting metadata will be stored in the `metadata/` directory. \ No newline at end of file diff --git a/examples/gcc-helloworld/hello-world.c b/examples/gcc-helloworld/hello-world.c new file mode 100644 index 0000000..9f9068d --- /dev/null +++ b/examples/gcc-helloworld/hello-world.c @@ -0,0 +1,6 @@ +#include + +int main(int argc, char *argv[]) { + printf("Hello, world!"); + return 0; +} diff --git a/examples/gcc-helloworld/run-example.sh b/examples/gcc-helloworld/run-example.sh new file mode 100755 index 0000000..59649ab --- /dev/null +++ b/examples/gcc-helloworld/run-example.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +SCAI_DIR=~/supply-chain-attribute-integrity +CLI_DIR=${SCAI_DIR}/cli +EXAMPLE_DIR=${SCAI_DIR}/examples/gcc-helloworld + +# ----------------------------------------------------------------- +# Run gcc hello-world example +# ----------------------------------------------------------------- + +mkdir -p ${EXAMPLE_DIR}/metadata + +source ${CLI_DIR}/scai-venv/bin/activate + +echo GENERATE STACK PROTECTION SCAI ATTRIBUTE ASSERTION + +${CLI_DIR}/scai-attr-assertion -a "WITH_STACK_PROTECTION" -c '{"flags": "-fstack-protector*"}' -o stack-protection-assertion.json --out-dir ${EXAMPLE_DIR}/metadata --pretty-print + +echo GENERATE SCAI REPORT FOR GCC COMPILATION + +GCC_CMD="gcc -fstack-protector -o ${EXAMPLE_DIR}/hello-world ${EXAMPLE_DIR}/hello-world.c" + +${CLI_DIR}/scai-report -i hello-world.c -o hello-world --artifact-dir ${EXAMPLE_DIR} -a stack-protection-assertion.json --metadata-dir ${EXAMPLE_DIR}/metadata --pretty-print -c "${GCC_CMD}" diff --git a/fuzzing/.coverage b/fuzzing/.coverage new file mode 100644 index 0000000..03b5ed6 Binary files /dev/null and b/fuzzing/.coverage differ diff --git a/fuzzing/fuzz-output.txt b/fuzzing/fuzz-output.txt new file mode 100644 index 0000000..12174bd --- /dev/null +++ b/fuzzing/fuzz-output.txt @@ -0,0 +1,41 @@ +INFO: Instrumenting scai.report +INFO: Instrumenting attr +INFO: Instrumenting attr.converters +INFO: Instrumenting attr._compat +INFO: Instrumenting attr._make +INFO: Instrumenting attr._config +INFO: Instrumenting attr.setters +INFO: Instrumenting attr.exceptions +INFO: Instrumenting attr.filters +INFO: Instrumenting attr.validators +INFO: Instrumenting attr._cmp +INFO: Instrumenting attr._funcs +INFO: Instrumenting attr._version_info +INFO: Instrumenting attr._next_gen +INFO: Instrumenting securesystemslib.formats +INFO: Instrumenting calendar +INFO: Instrumenting securesystemslib.schema +INFO: Instrumenting in_toto +INFO: Instrumenting in_toto.log +INFO: Instrumenting in_toto.settings +INFO: Instrumenting in_toto.models +INFO: Instrumenting in_toto.models.common +INFO: Instrumenting scai.attribute_assertion +INFO: Instrumenting scai.object_reference +INFO: Using built-in libfuzzer +WARNING: Failed to find function "__sanitizer_acquire_crash_state". +WARNING: Failed to find function "__sanitizer_print_stack_trace". +WARNING: Failed to find function "__sanitizer_set_death_callback". +INFO: Running with entropic power schedule (0xFF, 100). +INFO: Seed: 4033311005 +INFO: A corpus is not provided, starting from an empty corpus +#2 INITED exec/s: 0 rss: 40Mb +WARNING: no interesting inputs were found so far. Is the code instrumented for coverage? +This may also happen if the target rejected all inputs we tried so far +#524288 pulse corp: 1/1b lim: 5212 exec/s: 174762 rss: 40Mb +#1048576 pulse corp: 1/1b lim: 8192 exec/s: 104857 rss: 40Mb +#2097152 pulse corp: 1/1b lim: 8192 exec/s: 87381 rss: 40Mb +#3904947 NEW cov: 48 ft: 48 corp: 2/4b lim: 8192 exec/s: 86776 rss: 40Mb L: 3/3 MS: 5 ChangeByte-ChangeBit-CopyPart-CopyPart-InsertByte- +#4194304 pulse cov: 48 ft: 48 corp: 2/4b lim: 8192 exec/s: 82241 rss: 40Mb +#8388608 pulse cov: 48 ft: 48 corp: 2/4b lim: 8192 exec/s: 64527 rss: 40Mb +Done 10000000 in 161 second(s) \ No newline at end of file diff --git a/fuzzing/run-fuzz.sh b/fuzzing/run-fuzz.sh new file mode 100755 index 0000000..05f881a --- /dev/null +++ b/fuzzing/run-fuzz.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +SCAI_DIR=~/supply-chain-attribute-integrity +VENV=${SCAI_DIR}/cli/scai-venv/bin/activate + +if [ ! -f "$VENV" ]; then + echo "Need to setup SCAI virtualenv first. Please run `make -C cli` from the SCAI root directory first." + exit -1 +fi + +source ${VENV} + +# Install atheris and coverage +pip install --upgrade atheris +pip install --upgrade coverage + +# Run the JSON input fuzzer +echo RUN THE SCAI FUZZER + +python3 -m coverage run scai_fuzz.py -atheris_runs=10000000 -max_len=8192 > fuzz-output.txt 2>&1 diff --git a/fuzzing/scai_fuzz.py b/fuzzing/scai_fuzz.py new file mode 100644 index 0000000..bcdddc0 --- /dev/null +++ b/fuzzing/scai_fuzz.py @@ -0,0 +1,36 @@ +#!/usr/bin/env python +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import sys +import atheris +import json +from collections.abc import Mapping +import securesystemslib.exceptions + +with atheris.instrument_imports(): + from scai.report import Report + from scai.attribute_assertion import AttributeAssertion + from scai.object_reference import ObjectReference + +def TestOneInput(input_bytes): + fdp = atheris.FuzzedDataProvider(input_bytes) + data = fdp.ConsumeUnicode(sys.maxsize) + + try: + json_dict = json.loads(data) + except json.JSONDecodeError: + return + + if not isinstance(json_dict, Mapping): + return + + try: + ObjectReference.read(json_dict) + AttributeAssertion.read(json_dict) + Report.read(json_dict) + except securesystemslib.exceptions.FormatError: + pass + +atheris.Setup(sys.argv, TestOneInput) +atheris.Fuzz() \ No newline at end of file diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..2fc8ce3 --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,3 @@ +build +dist +*.egg-info diff --git a/python/scai/__init__.py b/python/scai/__init__.py new file mode 100644 index 0000000..fa39afb --- /dev/null +++ b/python/scai/__init__.py @@ -0,0 +1,15 @@ +# Copyright 2021 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__import__('pkg_resources').declare_namespace('cdi') diff --git a/python/scai/attribute_assertion.py b/python/scai/attribute_assertion.py new file mode 100644 index 0000000..9dc25e4 --- /dev/null +++ b/python/scai/attribute_assertion.py @@ -0,0 +1,158 @@ +#!/usr/bin/env python +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" + + attribute_assertion.py + + + Marcela Melara + + + See LICENSE for licensing information. + + + Provides a class for SCAI attribute assertions. +""" + +import attr +import securesystemslib.formats as SECSYS_FORMATS +import securesystemslib.exceptions +import securesystemslib.schema as SCHEMA +from in_toto.models.common import Signable +from scai.object_reference import SCAI_OBJ_REF_SCHEMA + + +SCAI_ATTRIBUTE_ASSERTION_TYPE = "scai/attribute-assertion/v0.1" + +SCAI_ATTRIBUTE_ASSERTION_SCHEMA = SCHEMA.Object( + object_name = 'SCAI_ATTRIBUTE_ASSERTION_SCHEMA', + attribute = SCHEMA.AnyNonemptyString(), + target = SCHEMA.Optional(SCAI_OBJ_REF_SCHEMA), + conditions = SCHEMA.Optional(SCHEMA.Object()), + evidence = SCHEMA.Optional(SCAI_OBJ_REF_SCHEMA)) + + +@attr.s(repr=False, init=False) +class AttributeAssertion(Signable): + """Attribute Assertions describe functional attributes of a software artifact and its + supply chain, capable of covering the full software stack of the toolchain that + produced the artifact down to the hardware platform. Assertions include information + about the conditions under which certain functional attributes arise, as well as + (authenticated) evidence for the asserted attributes. Together, this information + provides a high-integrity description of the functionality of a software artifact and + the integrity of its supply chain, which enables a human or program to determine the + trustworthiness of a given artifact based on specific attributes. + + Fields: + attribute: A string used to succinctly identify a specific attribute of an artifact. + + target: A SCAI-compliant Object Reference dictionary that describes a target + artifact or metadata object to which the asserted attribute applies, i.e.:: + + { + "objectType": "", + "name": "", + "digest": { "": "", ... }, + "locationURI": "" + } + + conditions: An opaque dictionary that describes specific conditions under + which the associated attribute arises. Acceptable conditions formats are up + to the producer and consumer. + + evidence: A SCAI-compliant Object Reference dictionary that describes evidence + for the asserted attribute. Though not required, providing a hint via the objectType + filed of the Reference is highly recommended to facilitate verification. + + """ + _type = attr.ib() + attribute = attr.ib() + target = attr.ib() + conditions = attr.ib() + evidence = attr.ib() + + def __init__(self, **kwargs): + super(AttributeAssertion, self).__init__() + + self._type = SCAI_ATTRIBUTE_ASSERTION_TYPE + self.attribute = kwargs.get("attribute") + self.target = kwargs.get("target", None) + self.conditions = kwargs.get("conditions", None) + self.evidence = kwargs.get("evidence", None) + + self.validate() + + + @property + def type_(self): + """The string SCAI_ATTRIBUTE_ASSERTION_TYPE to identify the SCAI Attribute Assertion type.""" + # NOTE: We expose the type_ attribute in the API documentation instead of + # _type to protect it against modification. + # NOTE: Trailing underscore is used by convention (pep8) to avoid conflict + # with Python's type keyword. + return self._type + + @staticmethod + def read(data): + """Creates a AttributeAssertion object from its dictionary representation. + Arguments: + data: A dictionary with SCAI Attribute Assertion metadata fields. + Raises: + securesystemslib.exceptions.FormatError: Passed data is invalid. + Returns: + The created AttributeAssertion object. + """ + return AttributeAssertion(**data) + + def to_dict(self): + """Creates a dictionary representation from the Attribute Assertion object. + Returns: + The created dictionary with non-null attribute assertion metadata fields. + """ + return attr.asdict(self, filter=lambda attr, value: attr.name != '_type' and value != None) + + def _validate_type(self): + """Private method to check that `_type` is set to SCAI_ATTRIBUTE_ASSERTION_TYPE.""" + if self._type != SCAI_ATTRIBUTE_ASSERTION_TYPE: + raise securesystemslib.exceptions.FormatError( + "Invalid Report: field `_type` must be set to 'scai/attribute-assertion/v0.1', got: {}" + .format(self._type)) + + def _validate_attribute(self): + """Private method to check that `attribute` is a `str`.""" + if not isinstance(self.attribute, str): + raise securesystemslib.exceptions.FormatError( + "Invalid AttributeAssertion: field `attribute` must be of type str, got: {}" + .format(type(self.attribute))) + + def _validate_target(self): + """Private method to check that `target` is a `dict`.""" + if self.target: + if not isinstance(self.target, dict): + raise securesystemslib.exceptions.FormatError( + "Invalid AttributeAssertion: field `target` must be of type dict, got: {}" + .format(type(self.target))) + + SCAI_OBJ_REF_SCHEMA.check_match(self.target) + + def _validate_conditions(self): + """Private method to check that `conditions` is a `dict`.""" + if self.conditions: + if not isinstance(self.conditions, dict): + raise securesystemslib.exceptions.FormatError( + "Invalid AttributeAssertion: field `conditions` must be of type dict, got: {}" + .format(type(self.conditions))) + + def _validate_evidence(self): + """Private method to check that `evidence` is a `dict`.""" + if self.evidence: + if not isinstance(self.evidence, dict): + raise securesystemslib.exceptions.FormatError( + "Invalid AttributeAssertion: field `evidence` must be of type dict, got: {}" + .format(type(self.evidence))) + + SCAI_OBJ_REF_SCHEMA.check_match(self.evidence) + + diff --git a/python/scai/object_reference.py b/python/scai/object_reference.py new file mode 100644 index 0000000..d00ae66 --- /dev/null +++ b/python/scai/object_reference.py @@ -0,0 +1,123 @@ +#!/usr/bin/env python + +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" + + object_reference.py + + + Marcela Melara + + + Oct 3, 2022 + + + See LICENSE for licensing information. + + + Provides a class for object references according to the SCAI specification. + A reference is a 4-tuple data structure that describes the name, cryptographic + digest set, resolvable location and type of an object, enabling consumers to + locate and validate the described object. +""" + +import attr +import securesystemslib.formats as SECSYS_FORMATS +import securesystemslib.exceptions +import securesystemslib.schema as SCHEMA +from in_toto.models.common import Signable + + +SCAI_OBJ_REF_SCHEMA = SCHEMA.Object( + object_name = 'SCAI_OBJ_REF_SCHEMA', + name = SECSYS_FORMATS.NAME_SCHEMA, + digest = SECSYS_FORMATS.HASHDICT_SCHEMA, + locationUri = SCHEMA.Optional(SECSYS_FORMATS.URL_SCHEMA), + objectType = SCHEMA.Optional(SCHEMA.AnyString())) + + +@attr.s(repr=False, init=False) +class ObjectReference(Signable): + """An Object Reference is designed to be a size-efficient representation of any + object, artifact or metadata, that may be included in any metadata object. The Object + Reference must allow both humans and automated verifier programs to easily parse, + identify and locate the referenced objects. + + Attributes: + name: A string used to succinctly identify a specific name of an artifact. + + digest: A set of cryptographic digests of the object. + + locationUri: Optional URI string pointing to the location of the object. + + objectType: Optional string indicating the type of referenced object. + """ + name = attr.ib() + digest = attr.ib() + locationUri = attr.ib() + objectType = attr.ib() + + + def __init__(self, **kwargs): + super(ObjectReference, self).__init__() + + self.name = kwargs.get("name") + self.digest = kwargs.get("digest", {}) + self.locationUri = kwargs.get("locationUri", None) + self.objectType = kwargs.get("objectType", None) + + self.validate() + + @staticmethod + def read(data): + """Creates an Object Reference object from its dictionary representation. + Arguments: + data: A dictionary with object reference metadata fields. + Raises: + securesystemslib.exceptions.FormatError: Passed data is invalid. + Returns: + The created ObjectReference object. + """ + return ObjectReference(**data) + + def to_dict(self): + """Creates a dictionary representation from the Object Reference object. + Returns: + The created dictionary with non-null object reference metadata fields. + """ + return attr.asdict(self, filter=lambda attr, value: value != None) + + def _validate_name(self): + """Private method to check that `name` is a `str`.""" + if not isinstance(self.name, str): + raise securesystemslib.exceptions.FormatError( + "Invalid ObjectReference: field `name` must be of type str, got: {}" + .format(type(self.name))) + + def _validate_digest(self): + """Private method to check that `digest` is a `HASHDICT`.""" + if not isinstance(self.digest, dict): + raise securesystemslib.exceptions.FormatError( + "Invalid ObjectReference: field `digest` must be of type dict, got: {}" + .format(type(self.digest))) + SECSYS_FORMATS.HASHDICT_SCHEMA.check_match(self.digest) + + def _validate_locationUri(self): + """Private method to check that `locationUri` is a `URL str`.""" + if self.locationUri: + if not isinstance(self.locationUri, str): + raise securesystemslib.exceptions.FormatError( + "Invalid ObjectReference: field `locationUri` must be of type str, got: {}" + .format(type(self.locationUri))) + + SECSYS_FORMATS.URL_SCHEMA.check_match(self.locationUri) + + def _validate_objectType(self): + """Private method to check that `objectType` is a `str`.""" + if self.objectType: + if not isinstance(self.objectType, str): + raise securesystemslib.exceptions.FormatError( + "Invalid ObjectReference: field `objectType` must be of type str, got: {}" + .format(type(self.objectType))) diff --git a/python/scai/report.py b/python/scai/report.py new file mode 100644 index 0000000..160e01d --- /dev/null +++ b/python/scai/report.py @@ -0,0 +1,141 @@ +#!/usr/bin/env python + +# Copyright 2022 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +""" + + scai.py + + + Marcela Melara + + + Sep 13, 2022 + + + See LICENSE for licensing information. + + + Provides a class for SCAI reports, + which describe a set of assertions about fine-grained attributes of + the subject artifact and its supply chain. SCAI Reports are composable with + other supply chain metadata formats such as SLSA. +""" + +import attr +import securesystemslib.formats as SECSYS_FORMATS +import securesystemslib.exceptions +import securesystemslib.schema as SCHEMA +from in_toto.models.common import Signable +from scai.attribute_assertion import SCAI_ATTRIBUTE_ASSERTION_SCHEMA + + +SCAI_REPORT_TYPE = "scai/attribute-report/v0.1" + +SCAI_REPORT_PRODUCER_SCHEMA = SCHEMA.Object( + object_name = "SCAI_REPORT_PRODUCER_SCHEMA", + attributes = SCHEMA.Optional(SCHEMA.ListOf(SCAI_ATTRIBUTE_ASSERTION_SCHEMA)) +) + + +@attr.s(repr=False, init=False) +class Report(Signable): + """A list of SCAI AttributeAssertions about fine-grained attributes of an artifact or + a producer (i.e. its compute environment or platform) in a step of the supply chain. + + Fields: + subjectAttributes: A list of dictionaries of SCAI Attribute Assertions *asserted* about a + subject artifact by the step, i.e:: + + [{ + "attribute": "", + + "target": A SCAI Object Reference dictionary that describes a target + artifact or metadata object to which the asserted attribute applies. + + "conditions": An opaque dictionary that describes specific conditions under + which the associated attribute arises. Acceptable conditions formats are up + to the producer and consumer. + + "evidence": An opaque dictionary that describes evidence metadata, or references + another metadata object (via a SCAI Object Reference), for the asserted attribute. + It should have at least the "type" (str) entry. + }, + ... ] + producer: + "attributes": A list of dictionaries of SCAI Attribute Assertions the step. + "operation": + """ + _type = attr.ib() + subjectAttributes = attr.ib() + producer = attr.ib() + + def __init__(self, **kwargs): + super(Report, self).__init__() + + self._type = SCAI_REPORT_TYPE + self.subjectAttributes = kwargs.get("subjectAttributes", []) + self.producer = kwargs.get("producer", None) + + self.validate() + + @property + def type_(self): + """The string "scai/report/v0.1" to indentify the SCAI Report type.""" + # NOTE: We expose the type_ attribute in the API documentation instead of + # _type to protect it against modification. + # NOTE: Trailing underscore is used by convention (pep8) to avoid conflict + # with Python's type keyword. + return self._type + + @staticmethod + def read(data): + """Creates a Report object from its dictionary representation. + + Arguments: + data: A dictionary with SCAI Report metadata fields. + + Raises: + securesystemslib.exceptions.FormatError: Passed data is invalid. + + Returns: + The created Report object. + + """ + return Report(**data) + + def to_dict(self): + """Creates a dictionary representation from the Report object. + Returns: + The created dictionary with non-null report metadata fields. + """ + return attr.asdict(self, filter=lambda attr, value: attr.name != '_type' and value != None) + + def _validate_type(self): + """Private method to check that `_type` is set to SCAI_REPORT_TYPE.""" + if self._type != SCAI_REPORT_TYPE: + raise securesystemslib.exceptions.FormatError( + "Invalid Report: field `_type` must be set to 'scai/attribute-report/v0.1', got: {}" + .format(self._type)) + + def _validate_subjectAttributes(self): + """Private method to check that `subjectAttributes` is a `list`.""" + if not isinstance(self.subjectAttributes, list): + raise securesystemslib.exceptions.FormatError( + "Invalid Report: field `subjectAttributes` must be of type list, got: {}" + .format(type(self.subjectAttributes))) + + for assertion in list(self.subjectAttributes): + SCAI_ATTRIBUTE_ASSERTION_SCHEMA.check_match(assertion) + + def _validate_producer(self): + """Private method to check that `producer` is a `dict`.""" + if self.producer: + if not isinstance(self.producer, dict): + raise securesystemslib.exceptions.FormatError( + "Invalid Report: field `producer` must be of type dict, got: {}" + .format(type(self.producer))) + + SCAI_REPORT_PRODUCER_SCHEMA.check_match(self.producer) + diff --git a/python/scai/utility.py b/python/scai/utility.py new file mode 100644 index 0000000..1acc2be --- /dev/null +++ b/python/scai/utility.py @@ -0,0 +1,60 @@ +#!/usr/bin/env python +# Copyright 2022 Intel Corporation + +import json +import os +import errno + +# ----------------------------------------------------------------- +# ----------------------------------------------------------------- +def find_file_in_path(filename, search_path='.') : + """general utility to search for a file name in a path + :param str filename: name of the file to locate, absolute path ignores search_path + :param list(str) search_path: list of directores where the files may be located + """ + + # os.path.abspath only works for full paths, not relative paths + # this check should catch './abc' + if os.path.split(filename)[0] : + if os.path.isfile(filename) : + return filename + raise FileNotFoundError(errno.ENOENT, "file does not exist", filename) + + if search_path: + full_filename = os.path.join(search_path, filename) + if os.path.isfile(full_filename) : + return full_filename + + raise FileNotFoundError(errno.ENOENT, "unable to locate file in search path", filename) + +def load_json_file(filename, search_path): + full_file = find_file_in_path(filename, search_path) + with open(full_file, "r") as rfile : + contents = rfile.read() + contents = contents.rstrip('\0') + return json.loads(contents) + +def load_text_file(filename, search_path): + full_file = find_file_in_path(filename, search_path) + with open(full_file, "r") as rfile : + contents = rfile.read() + contents = contents.rstrip('\0') + return contents + +# read private .pem key file +def parse_pem_file(key_file, search_path): + full_file = find_file_in_path(key_file, search_path) + with open(full_file, 'r') as k: + key = k.read() + assert key.startswith('-----BEGIN EC PRIVATE KEY-----\n') and key.endswith('\n-----END EC PRIVATE KEY-----\n'), "Malformed .pem key" + + return key + +# read public .pem key file +def parse_public_pem_file(key_file, search_path): + full_file = find_file_in_path(key_file, search_path) + with open(full_file, 'r') as k: + key = k.read() + assert key.startswith('-----BEGIN PUBLIC KEY-----\n') and key.endswith('\n-----END PUBLIC KEY-----\n'), "Malformed .pem key" + + return key \ No newline at end of file diff --git a/python/setup.py b/python/setup.py new file mode 100644 index 0000000..8256ba7 --- /dev/null +++ b/python/setup.py @@ -0,0 +1,37 @@ +#!/usr/bin/env python +# Copyright 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re + +import os +import shutil + +# this should only be run with python3 +import sys +if sys.version_info[0] < 3: + print('ERROR: must run with python3') + sys.exit(1) + +from setuptools import setup, find_packages, Extension + +setup(name='scai', + version='0.1', + description='Data objects library for SCAI', + author='Intel Corporation', + packages=find_packages(), + install_requires=[], + namespace_packages=['scai'], + py_modules=['report', 'attribute_assertion', 'object_reference', 'utility', 'reportlib'] +) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..a1b8612 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +# scai library python requirements +in-toto diff --git a/scail-bandit-output.txt b/scail-bandit-output.txt new file mode 100644 index 0000000..0a23be6 --- /dev/null +++ b/scail-bandit-output.txt @@ -0,0 +1,27 @@ +[main] INFO profile include tests: B405,B406,B309,B320,B308,B407,B401,B409,B310,B410,B302,B317,B304,B411,B324,B412,B316,B413,B319,B306,B305,B403,B325,B408,B314,B311,B301,B402,B318,B404,B315,B313,B303,B321,B323,B312 +[main] INFO profile exclude tests: B703,B601,B102,B108,B702,B606,B502,B504,B104,B101,B610,B609,B112,B201,B505,B603,B701,B103,B106,B611,B105,B501,B605,B607,B107,B506,B503,B507,B604,B110,B608,B602 +[main] INFO cli include tests: None +[main] INFO cli exclude tests: None +[main] INFO using config: ../ipas-bandit-config/ipas_default.config +[main] INFO running on Python 3.6.9 +Run started:2022-11-14 16:17:20.588204 + +Test results: + No issues identified. + +Code scanned: + Total lines of code: 731 + Total lines skipped (#nosec): 0 + +Run metrics: + Total issues (by severity): + Undefined: 0.0 + Low: 0.0 + Medium: 0.0 + High: 0.0 + Total issues (by confidence): + Undefined: 0.0 + Low: 0.0 + Medium: 0.0 + High: 0.0 +Files skipped (0): diff --git a/schema/attribute-assertion.schema.json b/schema/attribute-assertion.schema.json new file mode 100644 index 0000000..08ac51a --- /dev/null +++ b/schema/attribute-assertion.schema.json @@ -0,0 +1,11 @@ +{ + "title": "SCAI Attribute Assertion", + "description": "Fine-grained attribute metadata about the SW supply chain." + + "properties": { + "attribute": "", + "target": { "$ref": "object-reference.schema.json" }, // optional + "conditions": { /*object */ }, // optional + "evidence": { "$ref": "object-reference.schema.json" } // optional + } +} diff --git a/schema/in-toto-predicate.schema.json b/schema/in-toto-predicate.schema.json new file mode 100644 index 0000000..c511ca0 --- /dev/null +++ b/schema/in-toto-predicate.schema.json @@ -0,0 +1,41 @@ +{ + "title": "SCAI Predicate for in-toto Attestation", + "description": "Functional attribute and behavior metadata about a SW artifact or its supply chain." + + // Standard attestation fields + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [{ + "name": "", + "digest": { "": "" }, + "locationURI": "" // recommended + }], + + // info about subject's attributes + "predicateType": "scai/attribute-assertion/v0.1", + "predicate": { + "subjectAttributes": [{ + "attribute": "", + "target": { // optional + "name": "", + "digest": { "": "" }, + "locationURI": "", // recommended + "objectType": "" // optional + }, // optional + "conditions": { /*object */ }, // optional + "evidence": { // optional + "name": "", + "digest": { "": "" }, + "locationURI": "", // recommended + "objectType": "" // optional + } + }, ... ], + "producer": { + "attributes": [{ + "attribute": "", + "target": { /* object */ }, // optional + "conditions": { /* object */ }, // optional + "evidence": { /* object */ } // optional + }] + } + } +} diff --git a/schema/object-reference.schema.json b/schema/object-reference.schema.json new file mode 100644 index 0000000..abd6f34 --- /dev/null +++ b/schema/object-reference.schema.json @@ -0,0 +1,11 @@ +{ + "title": "Object Reference", + "description": "Generic (resolvable) reference to an object (SW artifact or metadata)." + + "properties": { + "name": "", + "digest": { "": "VALUE", "...": "..." }, + "locationURI": "", // resolveable URI, optional + "objectType": "" // optional + } +} \ No newline at end of file diff --git a/schema/scai-report.schema.json b/schema/scai-report.schema.json new file mode 100644 index 0000000..33f4124 --- /dev/null +++ b/schema/scai-report.schema.json @@ -0,0 +1,21 @@ +{ + "title": "SCAI Report for in-toto Attestation", + "description": "SCAI attribute metadata about an operation/step in the SW supply chain." + + // Standard attestation fields + "_type": "https://in-toto.io/Statement/v0.1", + "subject": [{ + "name": "", + "digest": { "": "" }, + "locationURI": "" // recommended + }], + + // info about subject's attributes + "predicateType": "scai/attribute-assertion/v0.1", + "predicate": { + "subjectAttributes": [{ "$ref": "attribute-assertion.schema.json" }, ... ], + "producer": { + "attributes": [{ "$ref": "attribute-assertion.schema.json" }, ... ], // optional + } + } +}