Do not add the same file multiple times to a zip or tar file. (#1783)

This commit is contained in:
CyrusNajmabadi 2018-08-15 22:44:55 -07:00 committed by GitHub
parent fc38078102
commit 89d5dd004e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 57 additions and 4 deletions

View file

@ -896,13 +896,21 @@ func (a *Archive) Archive(format ArchiveFormat, w io.Writer) error {
// addNextFileToTar adds the next file in the given archive to the given tar file. Returns io.EOF if the archive
// contains no more files.
func addNextFileToTar(r ArchiveReader, tw *tar.Writer) error {
func addNextFileToTar(r ArchiveReader, tw *tar.Writer, seenFiles map[string]bool) error {
file, data, err := r.Next()
if err != nil {
return err
}
defer contract.IgnoreClose(data)
// It's possible to run into the same file multiple times in the list of archives we're passed.
// For example, if there is an archive pointing to foo/bar and an archive pointing to
// foo/bar/baz/quux. Because of this only include the file the first time we see it.
if _, has := seenFiles[file]; has {
return nil
}
seenFiles[file] = true
sz := data.Size()
if err = tw.WriteHeader(&tar.Header{
Name: file,
@ -928,8 +936,9 @@ func (a *Archive) archiveTar(w io.Writer) error {
// Now actually emit the contents, file by file.
tw := tar.NewWriter(w)
seenFiles := make(map[string]bool)
for err == nil {
err = addNextFileToTar(reader, tw)
err = addNextFileToTar(reader, tw, seenFiles)
}
if err != io.EOF {
return err
@ -945,13 +954,21 @@ func (a *Archive) archiveTarGZIP(w io.Writer) error {
// addNextFileToZIP adds the next file in the given archive to the given ZIP file. Returns io.EOF if the archive
// contains no more files.
func addNextFileToZIP(r ArchiveReader, zw *zip.Writer) error {
func addNextFileToZIP(r ArchiveReader, zw *zip.Writer, seenFiles map[string]bool) error {
file, data, err := r.Next()
if err != nil {
return err
}
defer contract.IgnoreClose(data)
// It's possible to run into the same file multiple times in the list of archives we're passed.
// For example, if there is an archive pointing to foo/bar and an archive pointing to
// foo/bar/baz/quux. Because of this only include the file the first time we see it.
if _, has := seenFiles[file]; has {
return nil
}
seenFiles[file] = true
fh := &zip.FileHeader{
// These are the two fields set by zw.Create()
Name: file,
@ -983,8 +1000,9 @@ func (a *Archive) archiveZIP(w io.Writer) error {
// Now actually emit the contents, file by file.
zw := zip.NewWriter(w)
seenFiles := make(map[string]bool)
for err == nil {
err = addNextFileToZIP(reader, zw)
err = addNextFileToZIP(reader, zw, seenFiles)
}
if err != io.EOF {
return err

View file

@ -371,6 +371,41 @@ func TestNestedArchive(t *testing.T) {
assert.Equal(t, "fake.txt", files[2].Name)
}
func TestFileReferencedThroughMultiplePaths(t *testing.T) {
// Create temp dir and place some files.
dirName, err := ioutil.TempDir("", "")
assert.Nil(t, err)
assert.NoError(t, os.MkdirAll(filepath.Join(dirName, "foo", "bar"), 0777))
assert.NoError(t, ioutil.WriteFile(filepath.Join(dirName, "foo", "bar", "b.txt"), []byte("b"), 0777))
// Construct an AssetArchive with a nested PathArchive.
outerArch, err := NewPathArchive(filepath.Join(dirName, "./foo"))
assert.Nil(t, err)
innerArch, err := NewPathArchive(filepath.Join(dirName, "./foo/bar"))
assert.Nil(t, err)
arch, err := NewAssetArchive(map[string]interface{}{
"./foo": outerArch,
"./foo/bar": innerArch,
})
assert.Nil(t, err)
// Write a ZIP of the AssetArchive to disk.
tmpFile, err := ioutil.TempFile("", "")
fileName := tmpFile.Name()
assert.Nil(t, err)
err = arch.Archive(ZIPArchive, tmpFile)
assert.Nil(t, err)
tmpFile.Close()
// Read the ZIP back into memory, and validate its contents.
zipReader, err := zip.OpenReader(fileName)
defer contract.IgnoreClose(zipReader)
assert.Nil(t, err)
files := zipReader.File
assert.Len(t, files, 1)
assert.Equal(t, "foo/bar/b.txt", files[0].Name)
}
func validateTestDirArchive(t *testing.T, arch *Archive) {
r, err := arch.Open()
assert.Nil(t, err)