From a84c1a5669716f6c597a656c1fc02d42f60248ee Mon Sep 17 00:00:00 2001 From: Matt Davis Date: Mon, 29 Mar 2021 09:47:15 -0700 Subject: [PATCH] add --offline option to galaxy collection verify (#74040) * --offline allows in-place verify for installed collections with manifests * manifest hash, collection name, version, and path are now always displayed * test updates --- changelogs/fragments/galaxy_verify_local.yml | 2 + lib/ansible/cli/galaxy.py | 5 + lib/ansible/galaxy/collection/__init__.py | 164 ++++++++++-------- .../collection/concrete_artifact_manager.py | 50 +++--- .../tasks/verify.yml | 51 +++++- test/units/galaxy/test_collection_install.py | 2 +- 6 files changed, 181 insertions(+), 93 deletions(-) create mode 100644 changelogs/fragments/galaxy_verify_local.yml diff --git a/changelogs/fragments/galaxy_verify_local.yml b/changelogs/fragments/galaxy_verify_local.yml new file mode 100644 index 00000000000..e3ef77f06dc --- /dev/null +++ b/changelogs/fragments/galaxy_verify_local.yml @@ -0,0 +1,2 @@ +minor_changes: +- ansible-galaxy CLI - ``collection verify`` command now supports a ``--offline`` option for local-only verification diff --git a/lib/ansible/cli/galaxy.py b/lib/ansible/cli/galaxy.py index d34544e37b0..071fdc25345 100644 --- a/lib/ansible/cli/galaxy.py +++ b/lib/ansible/cli/galaxy.py @@ -363,6 +363,9 @@ class GalaxyCLI(CLI): 'path/url to a tar.gz collection artifact. This is mutually exclusive with --requirements-file.') verify_parser.add_argument('-i', '--ignore-errors', dest='ignore_errors', action='store_true', default=False, help='Ignore errors during verification and continue with the next specified collection.') + verify_parser.add_argument('--offline', dest='offline', action='store_true', default=False, + help='Validate collection integrity locally without contacting server for ' + 'canonical manifest hash.') verify_parser.add_argument('-r', '--requirements-file', dest='requirements', help='A file containing a list of collections to be verified.') @@ -1078,6 +1081,7 @@ class GalaxyCLI(CLI): collections = context.CLIARGS['args'] search_paths = context.CLIARGS['collections_path'] ignore_errors = context.CLIARGS['ignore_errors'] + local_verify_only = context.CLIARGS['offline'] requirements_file = context.CLIARGS['requirements'] requirements = self._require_one_of_collections_requirements( @@ -1090,6 +1094,7 @@ class GalaxyCLI(CLI): verify_collections( requirements, resolved_paths, self.api_servers, ignore_errors, + local_verify_only=local_verify_only, artifacts_manager=artifacts_manager, ) diff --git a/lib/ansible/galaxy/collection/__init__.py b/lib/ansible/galaxy/collection/__init__.py index 56bcc62f599..ed0512927d9 100644 --- a/lib/ansible/galaxy/collection/__init__.py +++ b/lib/ansible/galaxy/collection/__init__.py @@ -8,6 +8,7 @@ __metaclass__ = type import errno import fnmatch +import functools import json import os import shutil @@ -99,6 +100,7 @@ from ansible.galaxy import get_collections_galaxy_meta_info from ansible.galaxy.collection.concrete_artifact_manager import ( _consume_file, _download_file, + _get_json_from_installed_dir, _get_meta_from_src_dir, _tarfile_extract, ) @@ -124,6 +126,7 @@ from ansible.utils.version import SemanticVersion display = Display() MANIFEST_FORMAT = 1 +MANIFEST_FILENAME = 'MANIFEST.json' ModifiedContent = namedtuple('ModifiedContent', ['filename', 'expected', 'installed']) @@ -131,52 +134,69 @@ ModifiedContent = namedtuple('ModifiedContent', ['filename', 'expected', 'instal def verify_local_collection( local_collection, remote_collection, artifacts_manager, -): # type: (Candidate, Candidate, ConcreteArtifactsManager) -> None +): # type: (Candidate, Optional[Candidate], ConcreteArtifactsManager) -> None """Verify integrity of the locally installed collection. :param local_collection: Collection being checked. - :param remote_collection: Correct collection. + :param remote_collection: Upstream collection (optional, if None, only verify local artifact) :param artifacts_manager: Artifacts manager. """ - b_temp_tar_path = ( # NOTE: AnsibleError is raised on URLError - artifacts_manager.get_artifact_path - if remote_collection.is_concrete_artifact - else artifacts_manager.get_galaxy_artifact_path - )(remote_collection) - b_collection_path = to_bytes( local_collection.src, errors='surrogate_or_strict', ) - display.vvv("Verifying '{coll!s}'.".format(coll=local_collection)) - display.vvv( + display.display("Verifying '{coll!s}'.".format(coll=local_collection)) + display.display( u"Installed collection found at '{path!s}'". format(path=to_text(local_collection.src)), ) - display.vvv( - u"Remote collection cached as '{path!s}'". - format(path=to_text(b_temp_tar_path)), - ) - - # Compare installed version versus requirement version - if local_collection.ver != remote_collection.ver: - err = ( - "{local_fqcn!s} has the version '{local_ver!s}' but " - "is being compared to '{remote_ver!s}'".format( - local_fqcn=local_collection.fqcn, - local_ver=local_collection.ver, - remote_ver=remote_collection.ver, - ) - ) - display.display(err) - return modified_content = [] # type: List[ModifiedContent] - # Verify the manifest hash matches before verifying the file manifest - expected_hash = _get_tar_file_hash(b_temp_tar_path, 'MANIFEST.json') - _verify_file_hash(b_collection_path, 'MANIFEST.json', expected_hash, modified_content) - manifest = _get_json_from_tar_file(b_temp_tar_path, 'MANIFEST.json') + verify_local_only = remote_collection is None + if verify_local_only: + # partial away the local FS detail so we can just ask generically during validation + get_json_from_validation_source = functools.partial(_get_json_from_installed_dir, b_collection_path) + get_hash_from_validation_source = functools.partial(_get_file_hash, b_collection_path) + + # since we're not downloading this, just seed it with the value from disk + manifest_hash = get_hash_from_validation_source(MANIFEST_FILENAME) + else: + # fetch remote + b_temp_tar_path = ( # NOTE: AnsibleError is raised on URLError + artifacts_manager.get_artifact_path + if remote_collection.is_concrete_artifact + else artifacts_manager.get_galaxy_artifact_path + )(remote_collection) + + display.vvv( + u"Remote collection cached as '{path!s}'".format(path=to_text(b_temp_tar_path)) + ) + + # partial away the tarball details so we can just ask generically during validation + get_json_from_validation_source = functools.partial(_get_json_from_tar_file, b_temp_tar_path) + get_hash_from_validation_source = functools.partial(_get_tar_file_hash, b_temp_tar_path) + + # Compare installed version versus requirement version + if local_collection.ver != remote_collection.ver: + err = ( + "{local_fqcn!s} has the version '{local_ver!s}' but " + "is being compared to '{remote_ver!s}'".format( + local_fqcn=local_collection.fqcn, + local_ver=local_collection.ver, + remote_ver=remote_collection.ver, + ) + ) + display.display(err) + return + + # Verify the downloaded manifest hash matches the installed copy before verifying the file manifest + manifest_hash = get_hash_from_validation_source(MANIFEST_FILENAME) + _verify_file_hash(b_collection_path, MANIFEST_FILENAME, manifest_hash, modified_content) + + display.display('MANIFEST.json hash: {manifest_hash}'.format(manifest_hash=manifest_hash)) + + manifest = get_json_from_validation_source(MANIFEST_FILENAME) # Use the manifest to verify the file manifest checksum file_manifest_data = manifest['file_manifest_file'] @@ -185,7 +205,7 @@ def verify_local_collection( # Verify the file manifest before using it to verify individual files _verify_file_hash(b_collection_path, file_manifest_filename, expected_hash, modified_content) - file_manifest = _get_json_from_tar_file(b_temp_tar_path, file_manifest_filename) + file_manifest = get_json_from_validation_source(file_manifest_filename) # Use the file manifest to verify individual file checksums for manifest_data in file_manifest['files']: @@ -199,17 +219,14 @@ def verify_local_collection( 'in the following files:'. format(fqcn=to_text(local_collection.fqcn)), ) - display.display(to_text(local_collection.fqcn)) - display.vvv(to_text(local_collection.src)) for content_change in modified_content: display.display(' %s' % content_change.filename) - display.vvv(" Expected: %s\n Found: %s" % (content_change.expected, content_change.installed)) - # FIXME: Why doesn't this raise a failed return code? + display.v(" Expected: %s\n Found: %s" % (content_change.expected, content_change.installed)) else: - display.vvv( - "Successfully verified that checksums for '{coll!s}' " - 'match the remote collection'. - format(coll=local_collection), + what = "are internally consistent with its manifest" if verify_local_only else "match the remote collection" + display.display( + "Successfully verified that checksums for '{coll!s}' {what!s}.". + format(coll=local_collection, what=what), ) @@ -297,7 +314,7 @@ def download_collections( ): for fqcn, concrete_coll_pin in dep_map.copy().items(): # FIXME: move into the provider if concrete_coll_pin.is_virtual: - display.v( + display.display( 'Virtual collection {coll!s} is not downloadable'. format(coll=to_text(concrete_coll_pin)), ) @@ -555,6 +572,7 @@ def verify_collections( search_paths, # type: Iterable[str] apis, # type: Iterable[GalaxyAPI] ignore_errors, # type: bool + local_verify_only, # type: bool artifacts_manager, # type: ConcreteArtifactsManager ): # type: (...) -> None r"""Verify the integrity of locally installed collections. @@ -563,6 +581,7 @@ def verify_collections( :param search_paths: Locations for the local collection lookup. :param apis: A list of GalaxyAPIs to query when searching for a collection. :param ignore_errors: Whether to ignore any errors when verifying the collection. + :param local_verify_only: When True, skip downloads and only verify local manifests. :param artifacts_manager: Artifacts manager. """ api_proxy = MultiGalaxyAPIProxy(apis, artifacts_manager) @@ -606,33 +625,36 @@ def verify_collections( else: raise AnsibleError(message=default_err) - remote_collection = Candidate( - collection.fqcn, - collection.ver if collection.ver != '*' - else local_collection.ver, - None, 'galaxy', - ) + if local_verify_only: + remote_collection = None + else: + remote_collection = Candidate( + collection.fqcn, + collection.ver if collection.ver != '*' + else local_collection.ver, + None, 'galaxy', + ) - # Download collection on a galaxy server for comparison - try: - # NOTE: Trigger the lookup. If found, it'll cache - # NOTE: download URL and token in artifact manager. - api_proxy.get_collection_version_metadata( - remote_collection, - ) - except AnsibleError as e: # FIXME: does this actually emit any errors? - # FIXME: extract the actual message and adjust this: - expected_error_msg = ( - 'Failed to find collection {coll.fqcn!s}:{coll.ver!s}'. - format(coll=collection) - ) - if e.message == expected_error_msg: - raise AnsibleError( - 'Failed to find remote collection ' - "'{coll!s}' on any of the galaxy servers". + # Download collection on a galaxy server for comparison + try: + # NOTE: Trigger the lookup. If found, it'll cache + # NOTE: download URL and token in artifact manager. + api_proxy.get_collection_version_metadata( + remote_collection, + ) + except AnsibleError as e: # FIXME: does this actually emit any errors? + # FIXME: extract the actual message and adjust this: + expected_error_msg = ( + 'Failed to find collection {coll.fqcn!s}:{coll.ver!s}'. format(coll=collection) ) - raise + if e.message == expected_error_msg: + raise AnsibleError( + 'Failed to find remote collection ' + "'{coll!s}' on any of the galaxy servers". + format(coll=collection) + ) + raise verify_local_collection( local_collection, remote_collection, @@ -875,7 +897,7 @@ def _build_collection_tar( with tarfile.open(b_tar_filepath, mode='w:gz') as tar_file: # Add the MANIFEST.json and FILES.json file to the archive - for name, b in [('MANIFEST.json', collection_manifest_json), ('FILES.json', files_manifest_json)]: + for name, b in [(MANIFEST_FILENAME, collection_manifest_json), ('FILES.json', files_manifest_json)]: b_io = BytesIO(b) tar_info = tarfile.TarInfo(name) tar_info.size = len(b) @@ -941,7 +963,7 @@ def _build_collection_dir(b_collection_path, b_collection_output, collection_man collection_manifest_json = to_bytes(json.dumps(collection_manifest, indent=True), errors='surrogate_or_strict') # Write contents to the files - for name, b in [('MANIFEST.json', collection_manifest_json), ('FILES.json', files_manifest_json)]: + for name, b in [(MANIFEST_FILENAME, collection_manifest_json), ('FILES.json', files_manifest_json)]: b_path = os.path.join(b_collection_output, to_bytes(name, errors='surrogate_or_strict')) with open(b_path, 'wb') as file_obj, BytesIO(b) as b_io: shutil.copyfileobj(b_io, file_obj) @@ -1056,7 +1078,7 @@ def install_artifact(b_coll_targz_path, b_collection_path, b_temp_path): with _tarfile_extract(collection_tar, files_member_obj) as (dummy, files_obj): files = json.loads(to_text(files_obj.read(), errors='surrogate_or_strict')) - _extract_tar_file(collection_tar, 'MANIFEST.json', b_collection_path, b_temp_path) + _extract_tar_file(collection_tar, MANIFEST_FILENAME, b_collection_path, b_temp_path) _extract_tar_file(collection_tar, 'FILES.json', b_collection_path, b_temp_path) for file_info in files['files']: @@ -1243,6 +1265,12 @@ def _get_tar_file_hash(b_path, filename): return _consume_file(tar_obj) +def _get_file_hash(b_path, filename): # type: (bytes, str) -> str + filepath = os.path.join(b_path, to_bytes(filename, errors='surrogate_or_strict')) + with open(filepath, 'rb') as fp: + return _consume_file(fp) + + def _is_child_path(path, parent_path, link_name=None): """ Checks that path is a path within the parent_path specified. """ b_path = to_bytes(path, errors='surrogate_or_strict') diff --git a/lib/ansible/galaxy/collection/concrete_artifact_manager.py b/lib/ansible/galaxy/collection/concrete_artifact_manager.py index 33f5129dc78..b2550abdc03 100644 --- a/lib/ansible/galaxy/collection/concrete_artifact_manager.py +++ b/lib/ansible/galaxy/collection/concrete_artifact_manager.py @@ -49,6 +49,8 @@ import yaml display = Display() +MANIFEST_FILENAME = 'MANIFEST.json' + class ConcreteArtifactsManager: """Manager for on-disk collection artifacts. @@ -539,26 +541,26 @@ def _get_meta_from_src_dir( return _normalize_galaxy_yml_manifest(manifest, galaxy_yml) -def _get_meta_from_installed_dir( +def _get_json_from_installed_dir( b_path, # type: bytes -): # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] - n_manifest_json = 'MANIFEST.json' - b_manifest_json = to_bytes(n_manifest_json) - b_manifest_json_path = os.path.join(b_path, b_manifest_json) + filename, # type: str +): # type: (...) -> Dict + + b_json_filepath = os.path.join(b_path, to_bytes(filename, errors='surrogate_or_strict')) try: - with open(b_manifest_json_path, 'rb') as manifest_fd: - b_manifest_txt = manifest_fd.read() + with open(b_json_filepath, 'rb') as manifest_fd: + b_json_text = manifest_fd.read() except (IOError, OSError): raise LookupError( "The collection {manifest!s} path '{path!s}' does not exist.". format( - manifest=n_manifest_json, - path=to_native(b_manifest_json_path), + manifest=filename, + path=to_native(b_json_filepath), ) ) - manifest_txt = to_text(b_manifest_txt, errors='surrogate_or_strict') + manifest_txt = to_text(b_json_text, errors='surrogate_or_strict') try: manifest = json.loads(manifest_txt) @@ -566,18 +568,26 @@ def _get_meta_from_installed_dir( raise AnsibleError( 'Collection tar file member {member!s} does not ' 'contain a valid json string.'. - format(member=n_manifest_json), + format(member=filename), ) - else: - collection_info = manifest['collection_info'] + + return manifest + + +def _get_meta_from_installed_dir( + b_path, # type: bytes +): # type: (...) -> Dict[str, Optional[Union[str, List[str], Dict[str, str]]]] + manifest = _get_json_from_installed_dir(b_path, MANIFEST_FILENAME) + collection_info = manifest['collection_info'] version = collection_info.get('version') if not version: raise AnsibleError( - u'Collection metadata file at `{meta_file!s}` is expected ' + u'Collection metadata file `{manifest_filename!s}` at `{meta_file!s}` is expected ' u'to have a valid SemVer version value but got {version!s}'. format( - meta_file=to_text(b_manifest_json_path), + manifest_filename=MANIFEST_FILENAME, + meta_file=to_text(b_path), version=to_text(repr(version)), ), ) @@ -594,18 +604,16 @@ def _get_meta_from_tar( format(path=to_native(b_path)), ) - n_manifest_json = 'MANIFEST.json' - with tarfile.open(b_path, mode='r') as collection_tar: # type: tarfile.TarFile try: - member = collection_tar.getmember(n_manifest_json) + member = collection_tar.getmember(MANIFEST_FILENAME) except KeyError: raise AnsibleError( "Collection at '{path!s}' does not contain the " 'required file {manifest_file!s}.'. format( path=to_native(b_path), - manifest_file=n_manifest_json, + manifest_file=MANIFEST_FILENAME, ), ) @@ -613,7 +621,7 @@ def _get_meta_from_tar( if member_obj is None: raise AnsibleError( 'Collection tar file does not contain ' - 'member {member!s}'.format(member=n_manifest_json), + 'member {member!s}'.format(member=MANIFEST_FILENAME), ) text_content = to_text( @@ -627,7 +635,7 @@ def _get_meta_from_tar( raise AnsibleError( 'Collection tar file member {member!s} does not ' 'contain a valid json string.'. - format(member=n_manifest_json), + format(member=MANIFEST_FILENAME), ) return manifest['collection_info'] diff --git a/test/integration/targets/ansible-galaxy-collection/tasks/verify.yml b/test/integration/targets/ansible-galaxy-collection/tasks/verify.yml index 0047ca01d51..f951c654cd6 100644 --- a/test/integration/targets/ansible-galaxy-collection/tasks/verify.yml +++ b/test/integration/targets/ansible-galaxy-collection/tasks/verify.yml @@ -150,7 +150,7 @@ - assert: that: - - "'Collection ansible_test.verify contains modified content in the following files:\nansible_test.verify\n plugins/modules/test_module.py' in verify.stdout" + - "'Collection ansible_test.verify contains modified content in the following files:\n plugins/modules/test_module.py' in verify.stdout" - name: modify the FILES.json to match the new checksum lineinfile: @@ -168,7 +168,7 @@ - assert: that: - - "'Collection ansible_test.verify contains modified content in the following files:\nansible_test.verify\n FILES.json' in verify.stdout" + - "'Collection ansible_test.verify contains modified content in the following files:\n FILES.json' in verify.stdout" - name: get the checksum of the FILES.json stat: @@ -190,7 +190,7 @@ - assert: that: - - "'Collection ansible_test.verify contains modified content in the following files:\nansible_test.verify\n MANIFEST.json' in verify.stdout" + - "'Collection ansible_test.verify contains modified content in the following files:\n MANIFEST.json' in verify.stdout" - name: remove the artifact metadata to test verifying a collection without it file: @@ -221,3 +221,48 @@ that: - verify.failed - "'Collection ansible_test.verify does not have a MANIFEST.json' in verify.stderr" + +- name: update the collection version to something not present on the server + lineinfile: + regexp: "version: .*" + line: "version: '3.0.0'" + path: '{{ galaxy_dir }}/scratch/ansible_test/verify/galaxy.yml' + +- name: build the new version + command: ansible-galaxy collection build scratch/ansible_test/verify + args: + chdir: '{{ galaxy_dir }}' + +- name: force-install from local artifact + command: ansible-galaxy collection install '{{ galaxy_dir }}/ansible_test-verify-3.0.0.tar.gz' --force + environment: + ANSIBLE_COLLECTIONS_PATH: '{{ galaxy_dir }}' + +- name: verify locally only, no download or server manifest hash check + command: ansible-galaxy collection verify --offline ansible_test.verify + environment: + ANSIBLE_COLLECTIONS_PATH: '{{ galaxy_dir }}' + register: verify + +- assert: + that: + - >- + "Verifying 'ansible_test.verify:3.0.0'." in verify.stdout + - '"MANIFEST.json hash: " in verify.stdout' + - >- + "Successfully verified that checksums for 'ansible_test.verify:3.0.0' are internally consistent with its manifest." in verify.stdout + +- name: append a newline to a module to modify the checksum + shell: "echo '' >> {{ module_path }}" + +- name: verify modified collection locally-only (should fail) + command: ansible-galaxy collection verify --offline ansible_test.verify + register: verify + environment: + ANSIBLE_COLLECTIONS_PATH: '{{ galaxy_dir }}' + ignore_errors: yes + +- assert: + that: + - "'Collection ansible_test.verify contains modified content in the following files:' in verify.stdout" + - "'plugins/modules/test_module.py' in verify.stdout" diff --git a/test/units/galaxy/test_collection_install.py b/test/units/galaxy/test_collection_install.py index 23c35a703eb..37e099701be 100644 --- a/test/units/galaxy/test_collection_install.py +++ b/test/units/galaxy/test_collection_install.py @@ -243,7 +243,7 @@ def test_build_artifact_from_path_no_version(collection_artifact, monkeypatch): concrete_artifact_cm = collection.concrete_artifact_manager.ConcreteArtifactsManager(tmp_path, validate_certs=False) expected = ( - '^Collection metadata file at `.*` is expected to have a valid SemVer ' + '^Collection metadata file `.*` at `.*` is expected to have a valid SemVer ' 'version value but got {empty_unicode_string!r}$'. format(empty_unicode_string=u'') )