From 89d5dd004e6b9158f133d74b66333679455215d2 Mon Sep 17 00:00:00 2001 From: CyrusNajmabadi Date: Wed, 15 Aug 2018 22:44:55 -0700 Subject: [PATCH] Do not add the same file multiple times to a zip or tar file. (#1783) --- pkg/resource/asset.go | 26 ++++++++++++++++++++++---- pkg/resource/asset_test.go | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 4 deletions(-) diff --git a/pkg/resource/asset.go b/pkg/resource/asset.go index 49faaef01..ffc91c71e 100644 --- a/pkg/resource/asset.go +++ b/pkg/resource/asset.go @@ -896,13 +896,21 @@ func (a *Archive) Archive(format ArchiveFormat, w io.Writer) error { // addNextFileToTar adds the next file in the given archive to the given tar file. Returns io.EOF if the archive // contains no more files. -func addNextFileToTar(r ArchiveReader, tw *tar.Writer) error { +func addNextFileToTar(r ArchiveReader, tw *tar.Writer, seenFiles map[string]bool) error { file, data, err := r.Next() if err != nil { return err } defer contract.IgnoreClose(data) + // It's possible to run into the same file multiple times in the list of archives we're passed. + // For example, if there is an archive pointing to foo/bar and an archive pointing to + // foo/bar/baz/quux. Because of this only include the file the first time we see it. + if _, has := seenFiles[file]; has { + return nil + } + seenFiles[file] = true + sz := data.Size() if err = tw.WriteHeader(&tar.Header{ Name: file, @@ -928,8 +936,9 @@ func (a *Archive) archiveTar(w io.Writer) error { // Now actually emit the contents, file by file. tw := tar.NewWriter(w) + seenFiles := make(map[string]bool) for err == nil { - err = addNextFileToTar(reader, tw) + err = addNextFileToTar(reader, tw, seenFiles) } if err != io.EOF { return err @@ -945,13 +954,21 @@ func (a *Archive) archiveTarGZIP(w io.Writer) error { // addNextFileToZIP adds the next file in the given archive to the given ZIP file. Returns io.EOF if the archive // contains no more files. -func addNextFileToZIP(r ArchiveReader, zw *zip.Writer) error { +func addNextFileToZIP(r ArchiveReader, zw *zip.Writer, seenFiles map[string]bool) error { file, data, err := r.Next() if err != nil { return err } defer contract.IgnoreClose(data) + // It's possible to run into the same file multiple times in the list of archives we're passed. + // For example, if there is an archive pointing to foo/bar and an archive pointing to + // foo/bar/baz/quux. Because of this only include the file the first time we see it. + if _, has := seenFiles[file]; has { + return nil + } + seenFiles[file] = true + fh := &zip.FileHeader{ // These are the two fields set by zw.Create() Name: file, @@ -983,8 +1000,9 @@ func (a *Archive) archiveZIP(w io.Writer) error { // Now actually emit the contents, file by file. zw := zip.NewWriter(w) + seenFiles := make(map[string]bool) for err == nil { - err = addNextFileToZIP(reader, zw) + err = addNextFileToZIP(reader, zw, seenFiles) } if err != io.EOF { return err diff --git a/pkg/resource/asset_test.go b/pkg/resource/asset_test.go index 756268aa7..0766ddb1f 100644 --- a/pkg/resource/asset_test.go +++ b/pkg/resource/asset_test.go @@ -371,6 +371,41 @@ func TestNestedArchive(t *testing.T) { assert.Equal(t, "fake.txt", files[2].Name) } +func TestFileReferencedThroughMultiplePaths(t *testing.T) { + // Create temp dir and place some files. + dirName, err := ioutil.TempDir("", "") + assert.Nil(t, err) + assert.NoError(t, os.MkdirAll(filepath.Join(dirName, "foo", "bar"), 0777)) + assert.NoError(t, ioutil.WriteFile(filepath.Join(dirName, "foo", "bar", "b.txt"), []byte("b"), 0777)) + + // Construct an AssetArchive with a nested PathArchive. + outerArch, err := NewPathArchive(filepath.Join(dirName, "./foo")) + assert.Nil(t, err) + innerArch, err := NewPathArchive(filepath.Join(dirName, "./foo/bar")) + assert.Nil(t, err) + arch, err := NewAssetArchive(map[string]interface{}{ + "./foo": outerArch, + "./foo/bar": innerArch, + }) + assert.Nil(t, err) + + // Write a ZIP of the AssetArchive to disk. + tmpFile, err := ioutil.TempFile("", "") + fileName := tmpFile.Name() + assert.Nil(t, err) + err = arch.Archive(ZIPArchive, tmpFile) + assert.Nil(t, err) + tmpFile.Close() + + // Read the ZIP back into memory, and validate its contents. + zipReader, err := zip.OpenReader(fileName) + defer contract.IgnoreClose(zipReader) + assert.Nil(t, err) + files := zipReader.File + assert.Len(t, files, 1) + assert.Equal(t, "foo/bar/b.txt", files[0].Name) +} + func validateTestDirArchive(t *testing.T, arch *Archive) { r, err := arch.Open() assert.Nil(t, err)