pulumi/pkg/resource/asset.go
joeduffy 3d3f778c3d Fix asset bugs; write more tests
This change fixes a couple bugs with assets:

* We weren't recursing into subdirectories in the new "path as
  archive" feature, which meant we missed most of the files.

* We need to make paths relative to the root of the archive
  directory itself, otherwise paths end up redundantly including
  the asset's root folder path.

* We need to clean the file paths before adding them to the
  archive asset map, otherwise they are inconsistent between the
  path, tar, tgz, and zip cases.

* Ignore directories when traversing zips, since they aren't
  included in the other formats.

* Tolerate io.EOF errors when reading the ZIP contents into blobs.

* Add test cases for the four different archive kinds.

This fixes pulumi/pulumi-aws#50.
2017-10-24 09:00:11 -07:00

992 lines
28 KiB
Go

// Copyright 2016-2017, Pulumi Corporation. All rights reserved.
package resource
import (
"archive/tar"
"archive/zip"
"bytes"
"compress/gzip"
"crypto/sha256"
"encoding/hex"
"io"
"io/ioutil"
"net/http"
"net/url"
"os"
"path/filepath"
"reflect"
"sort"
"strings"
"github.com/pkg/errors"
"github.com/pulumi/pulumi/pkg/util/contract"
"github.com/pulumi/pulumi/pkg/workspace"
)
// Asset is a serialized asset reference. It is a union: thus, only one of its fields will be non-nil. Several helper
// routines exist as members in order to easily interact with the assets referenced by an instance of this type.
//nolint: lll
type Asset struct {
Sig string `json:"4dabf18193072939515e22adb298388d" yaml:"4dabf18193072939515e22adb298388d"` // the unique asset signature (see properties.go).
Hash string `json:"hash,omitempty" yaml:"hash,omitempty"` // the SHA256 hash of the asset contents.
Text string `json:"text,omitempty" yaml:"text,omitempty"` // a textual asset.
Path string `json:"path,omitempty" yaml:"path,omitempty"` // a file on the current filesystem.
URI string `json:"uri,omitempty" yaml:"uri,omitempty"` // a URI (file://, http://, https://, or custom).
}
const (
AssetSig = "c44067f5952c0a294b673a41bacd8c17" // a randomly assigned type hash for assets.
AssetHashProperty = "hash" // the dynamic property for an asset's hash.
AssetTextProperty = "text" // the dynamic property for an asset's text.
AssetPathProperty = "path" // the dynamic property for an asset's path.
AssetURIProperty = "uri" // the dynamic property for an asset's URI.
)
// NewTextAsset produces a new asset and its corresponding SHA256 hash from the given text.
func NewTextAsset(text string) (*Asset, error) {
a := &Asset{Sig: AssetSig, Text: text}
err := a.EnsureHash()
return a, err
}
// NewPathAsset produces a new asset and its corresponding SHA256 hash from the given filesystem path.
func NewPathAsset(path string) (*Asset, error) {
a := &Asset{Sig: AssetSig, Path: path}
err := a.EnsureHash()
return a, err
}
// NewURIAsset produces a new asset and its corresponding SHA256 hash from the given network URI.
func NewURIAsset(uri string) (*Asset, error) {
a := &Asset{Sig: AssetSig, URI: uri}
err := a.EnsureHash()
return a, err
}
func (a *Asset) IsText() bool { return a.Text != "" }
func (a *Asset) IsPath() bool { return a.Path != "" }
func (a *Asset) IsURI() bool { return a.URI != "" }
func (a *Asset) GetText() (string, bool) {
if a.IsText() {
return a.Text, true
}
return "", false
}
func (a *Asset) GetPath() (string, bool) {
if a.IsPath() {
return a.Path, true
}
return "", false
}
func (a *Asset) GetURI() (string, bool) {
if a.IsURI() {
return a.URI, true
}
return "", false
}
// GetURIURL returns the underlying URI as a parsed URL, provided it is one. If there was an error parsing the URI, it
// will be returned as a non-nil error object.
func (a *Asset) GetURIURL() (*url.URL, bool, error) {
if uri, isuri := a.GetURI(); isuri {
url, err := url.Parse(uri)
if err != nil {
return nil, true, err
}
return url, true, nil
}
return nil, false, nil
}
// Equals returns true if a is value-equal to other.
func (a *Asset) Equals(other *Asset) bool {
if a == nil {
return other == nil
} else if other == nil {
return false
}
return a.Hash == other.Hash && a.Text == other.Text && a.Path == other.Path && a.URI == other.URI
}
// Serialize returns a weakly typed map that contains the right signature for serialization purposes.
func (a *Asset) Serialize() map[string]interface{} {
result := map[string]interface{}{
string(SigKey): AssetSig,
}
if a.Hash != "" {
result[AssetHashProperty] = a.Hash
}
if a.Text != "" {
result[AssetTextProperty] = a.Text
}
if a.Path != "" {
result[AssetPathProperty] = a.Path
}
if a.URI != "" {
result[AssetURIProperty] = a.URI
}
return result
}
// DeserializeAsset checks to see if the map contains an asset, using its signature, and if so deserializes it.
func DeserializeAsset(obj map[string]interface{}) (*Asset, bool, error) {
// If not an asset, return false immediately.
if obj[string(SigKey)] != AssetSig {
return &Asset{}, false, nil
}
// Else, deserialize the possible fields.
var hash string
if v, has := obj[AssetHashProperty]; has {
hash = v.(string)
}
var text string
if v, has := obj[AssetTextProperty]; has {
text = v.(string)
}
var path string
if v, has := obj[AssetPathProperty]; has {
path = v.(string)
}
var uri string
if v, has := obj[AssetURIProperty]; has {
uri = v.(string)
}
if text == "" && path == "" && uri == "" {
return &Asset{}, false, errors.New("asset is missing one of text, path, or URI")
}
return &Asset{Hash: hash, Text: text, Path: path, URI: uri}, true, nil
}
// Read reads an asset's contents into memory.
func (a *Asset) Read() (*Blob, error) {
if a.IsText() {
return a.readText()
} else if a.IsPath() {
return a.readPath()
} else if a.IsURI() {
return a.readURI()
}
contract.Failf("Invalid asset; one of Text, Path, or URI must be non-nil")
return nil, nil
}
func (a *Asset) readText() (*Blob, error) {
text, istext := a.GetText()
contract.Assertf(istext, "Expected a text-based asset")
return NewByteBlob([]byte(text)), nil
}
func (a *Asset) readPath() (*Blob, error) {
path, ispath := a.GetPath()
contract.Assertf(ispath, "Expected a path-based asset")
// Do a quick check to make sure it's a file, so we can fail gracefully if someone passes a directory.
info, err := os.Stat(path)
if err != nil {
return nil, errors.Wrapf(err, "failed to open asset file '%v'", path)
} else if info.IsDir() {
return nil, errors.Errorf("asset path '%v' is a directory; try using an archive", path)
}
byts, err := ioutil.ReadFile(path)
if err != nil {
return nil, errors.Wrapf(err, "failed to read asset file '%v'", path)
}
return NewByteBlob(byts), nil
}
func (a *Asset) readURI() (*Blob, error) {
url, isurl, err := a.GetURIURL()
if err != nil {
return nil, err
}
contract.Assertf(isurl, "Expected a URI-based asset")
switch s := url.Scheme; s {
case "http", "https":
resp, err := http.Get(url.String())
if err != nil {
return nil, err
}
return NewReadCloserBlob(resp.Body)
case "file":
contract.Assert(url.User == nil)
contract.Assert(url.RawQuery == "")
contract.Assert(url.Fragment == "")
if url.Host != "" && url.Host != "localhost" {
return nil, errors.Errorf("file:// host '%v' not supported (only localhost)", url.Host)
}
f, err := os.Open(url.Path)
if err != nil {
return nil, err
}
return NewFileBlob(f)
default:
return nil, errors.Errorf("Unrecognized or unsupported URI scheme: %v", s)
}
}
// EnsureHash computes the SHA256 hash of the asset's contents and stores it on the object.
func (a *Asset) EnsureHash() error {
if a.Hash == "" {
blob, err := a.Read()
if err != nil {
return err
}
defer contract.IgnoreClose(blob)
hash := sha256.New()
_, err = io.Copy(hash, blob)
if err != nil {
return err
}
a.Hash = hex.EncodeToString(hash.Sum(nil))
}
return nil
}
// SeekableReadCloser combines Read, Close, and Seek functionality into one interface.
type SeekableReadCloser interface {
io.Seeker
io.ReadCloser
}
// Blob is a blob that implements ReadCloser, Seek, and offers Len functionality.
type Blob struct {
rd SeekableReadCloser // an underlying reader.
sz int64 // the size of the blob.
}
func (blob *Blob) Close() error { return blob.rd.Close() }
func (blob *Blob) Read(p []byte) (int, error) { return blob.rd.Read(p) }
func (blob *Blob) Reader() SeekableReadCloser { return blob.rd }
func (blob *Blob) Seek(offset int64, whence int) (int64, error) { return blob.rd.Seek(offset, whence) }
func (blob *Blob) Size() int64 { return blob.sz }
// NewByteBlob creates a new byte blob.
func NewByteBlob(data []byte) *Blob {
return &Blob{
rd: newBytesReader(data),
sz: int64(len(data)),
}
}
// NewFileBlob creates a new asset blob whose size is known thanks to stat.
func NewFileBlob(f *os.File) (*Blob, error) {
stat, err := f.Stat()
if err != nil {
return nil, err
}
return &Blob{
rd: f,
sz: stat.Size(),
}, nil
}
// NewReadCloserBlob turn any old ReadCloser into an Blob, usually by making a copy.
func NewReadCloserBlob(r io.ReadCloser) (*Blob, error) {
if f, isf := r.(*os.File); isf {
// If it's a file, we can "fast path" the asset creation without making a copy.
return NewFileBlob(f)
}
// Otherwise, read it all in, and create a blob out of that.
defer contract.IgnoreClose(r)
data, err := ioutil.ReadAll(r)
if err != nil {
return nil, err
}
return NewByteBlob(data), nil
}
// bytesReader turns a *bytes.Reader into a SeekableReadCloser by adding an empty Close method.
type bytesReader struct {
*bytes.Reader
}
func newBytesReader(b []byte) SeekableReadCloser {
return bytesReader{
Reader: bytes.NewReader(b),
}
}
func (b bytesReader) Close() error {
return nil // intentionally blank
}
// Archive is a serialized archive reference. It is a union: thus, only one of its fields will be non-nil. Several
// helper routines exist as members in order to easily interact with archives of different kinds.
//nolint: lll
type Archive struct {
Sig string `json:"4dabf18193072939515e22adb298388d" yaml:"4dabf18193072939515e22adb298388d"` // the unique asset signature (see properties.go).
Hash string `json:"hash,omitempty" yaml:"hash,omitempty"` // the SHA256 hash of the archive contents.
Assets map[string]interface{} `json:"assets,omitempty" yaml:"assets,omitempty"` // a collection of other assets/archives.
Path string `json:"path,omitempty" yaml:"path,omitempty"` // a file on the current filesystem.
URI string `json:"uri,omitempty" yaml:"uri,omitempty"` // a remote URI (file://, http://, https://, etc).
}
const (
ArchiveSig = "0def7320c3a5731c473e5ecbe6d01bc7" // a randomly assigned archive type signature.
ArchiveHashProperty = "hash" // the dynamic property for an archive's hash.
ArchiveAssetsProperty = "assets" // the dynamic property for an archive's assets.
ArchivePathProperty = "path" // the dynamic property for an archive's path.
ArchiveURIProperty = "uri" // the dynamic property for an archive's URI.
)
func NewAssetArchive(assets map[string]interface{}) (*Archive, error) {
// Ensure all elements are either assets or archives.
for _, asset := range assets {
switch t := asset.(type) {
case *Asset, *Archive:
// ok
default:
return &Archive{}, errors.Errorf("type %v is not a valid archive element", t)
}
}
a := &Archive{Sig: ArchiveSig, Assets: assets}
err := a.EnsureHash()
return a, err
}
func NewPathArchive(path string) (*Archive, error) {
a := &Archive{Sig: ArchiveSig, Path: path}
err := a.EnsureHash()
return a, err
}
func NewURIArchive(uri string) (*Archive, error) {
a := &Archive{Sig: ArchiveSig, URI: uri}
err := a.EnsureHash()
return a, err
}
func (a *Archive) IsAssets() bool { return a.Assets != nil }
func (a *Archive) IsPath() bool { return a.Path != "" }
func (a *Archive) IsURI() bool { return a.URI != "" }
func (a *Archive) GetAssets() (map[string]interface{}, bool) {
if a.IsAssets() {
return a.Assets, true
}
return nil, false
}
func (a *Archive) GetPath() (string, bool) {
if a.IsPath() {
return a.Path, true
}
return "", false
}
func (a *Archive) GetURI() (string, bool) {
if a.IsURI() {
return a.URI, true
}
return "", false
}
// GetURIURL returns the underlying URI as a parsed URL, provided it is one. If there was an error parsing the URI, it
// will be returned as a non-nil error object.
func (a *Archive) GetURIURL() (*url.URL, bool, error) {
if uri, isuri := a.GetURI(); isuri {
url, err := url.Parse(uri)
if err != nil {
return nil, true, err
}
return url, true, nil
}
return nil, false, nil
}
// Equals returns true if a is value-equal to other.
func (a *Archive) Equals(other *Archive) bool {
if a == nil {
return other == nil
} else if other == nil {
return false
}
if a.Assets != nil {
if other.Assets == nil {
return false
}
if len(a.Assets) != len(other.Assets) {
return false
}
for key, value := range a.Assets {
otherv := other.Assets[key]
switch valuet := value.(type) {
case *Asset:
if othera, isAsset := otherv.(*Asset); isAsset {
if !valuet.Equals(othera) {
return false
}
} else {
return false
}
case *Archive:
if othera, isArchive := otherv.(*Archive); isArchive {
if !valuet.Equals(othera) {
return false
}
} else {
return false
}
default:
return false
}
}
} else if other.Assets != nil {
return false
}
return a.Hash == other.Hash && a.Path == other.Path && a.URI == other.URI
}
// Serialize returns a weakly typed map that contains the right signature for serialization purposes.
func (a *Archive) Serialize() map[string]interface{} {
result := map[string]interface{}{
string(SigKey): ArchiveSig,
}
if a.Hash != "" {
result[ArchiveHashProperty] = a.Hash
}
if a.Assets != nil {
assets := make(map[string]interface{})
for k, v := range a.Assets {
switch t := v.(type) {
case *Asset:
assets[k] = t.Serialize()
case *Archive:
assets[k] = t.Serialize()
default:
contract.Failf("Unrecognized asset map type %v", reflect.TypeOf(t))
}
}
result[ArchiveAssetsProperty] = assets
}
if a.Path != "" {
result[ArchivePathProperty] = a.Path
}
if a.URI != "" {
result[ArchiveURIProperty] = a.URI
}
return result
}
// DeserializeArchive checks to see if the map contains an archive, using its signature, and if so deserializes it.
func DeserializeArchive(obj map[string]interface{}) (*Archive, bool, error) {
// If not an archive, return false immediately.
if obj[string(SigKey)] != ArchiveSig {
return &Archive{}, false, nil
}
var hash string
if v, has := obj[ArchiveHashProperty]; has {
hash = v.(string)
}
var assets map[string]interface{}
if v, has := obj[ArchiveAssetsProperty]; has {
assets = make(map[string]interface{})
if v != nil {
for k, elem := range v.(map[string]interface{}) {
switch t := elem.(type) {
case *Asset:
assets[k] = t
case *Archive:
assets[k] = t
case map[string]interface{}:
a, isa, err := DeserializeAsset(t)
if err != nil {
return &Archive{}, false, err
} else if isa {
assets[k] = a
} else {
arch, isarch, err := DeserializeArchive(t)
if err != nil {
return &Archive{}, false, err
} else if !isarch {
return &Archive{}, false, errors.Errorf("archive member '%v' is not an asset or archive", k)
}
assets[k] = arch
}
default:
return &Archive{}, false, nil
}
}
}
}
var path string
if v, has := obj[ArchivePathProperty]; has {
path = v.(string)
}
var uri string
if v, has := obj[ArchiveURIProperty]; has {
uri = v.(string)
}
if assets == nil && path == "" && uri == "" {
return &Archive{}, false, errors.New("archive is missing one of assets, path, or URI")
}
return &Archive{Hash: hash, Assets: assets, Path: path, URI: uri}, true, nil
}
// Read returns a map of asset name to its associated reader object (which can be used to perform reads/IO).
func (a *Archive) Read() (map[string]*Blob, error) {
if a.IsAssets() {
return a.readAssets()
} else if a.IsPath() {
return a.readPath()
} else if a.IsURI() {
return a.readURI()
}
contract.Failf("Invalid archive; one of Assets, Path, or URI must be non-nil")
return nil, nil
}
func (a *Archive) readAssets() (map[string]*Blob, error) {
// To read a map-based archive, just produce a map from each asset to its associated reader.
m, isassets := a.GetAssets()
contract.Assertf(isassets, "Expected an asset map-based archive")
result := map[string]*Blob{}
for name, asset := range m {
switch t := asset.(type) {
case *Asset:
// An asset can be added directly to the result.
blob, err := t.Read()
if err != nil {
return nil, errors.Wrapf(err, "failed to expand archive asset '%v'", name)
}
result[name] = blob
case *Archive:
// An archive must be recursively walked in order to turn it into a flat result map.
subs, err := t.Read()
if err != nil {
return nil, errors.Wrapf(err, "failed to expand sub-archive '%v'", name)
}
for sub, blob := range subs {
result[filepath.Join(name, sub)] = blob
}
}
}
return result, nil
}
func (a *Archive) readPath() (map[string]*Blob, error) {
// To read a path-based archive, read that file and use its extension to ascertain what format to use.
path, ispath := a.GetPath()
contract.Assertf(ispath, "Expected a path-based asset")
format := detectArchiveFormat(path)
if format == NotArchive {
// If not an archive, it could be a directory; if so, simply expand it out uncompressed as an archive.
info, err := os.Stat(path)
if err != nil {
return nil, errors.Wrapf(err, "couldn't read archive path '%v'", path)
} else if !info.IsDir() {
return nil, errors.Wrapf(err, "'%v' is neither a recognized archive type nor a directory", path)
}
results := make(map[string]*Blob)
if walkerr := filepath.Walk(path, func(filePath string, f os.FileInfo, fileerr error) error {
// If there was an error, exit.
if fileerr != nil {
return fileerr
}
// If this was a directory or a symlink, skip it.
if f.IsDir() || f.Mode()&os.ModeSymlink != 0 {
return nil
}
// Finally, if this was a .pulumi directory, we will skip this by default.
// TODO[pulumi/pulumi#122]: when we support .pulumiignore, this will be customizable.
if !f.IsDir() && f.Name() == workspace.Dir {
return filepath.SkipDir
}
// Otherwise, add this asset to the list of blobs, and keep going.
blob, err := (&Asset{Path: filePath}).Read()
if err != nil {
return err
}
// Crop the filePath so that it is relative to the path, and put the blob into the map.
filePath, err = filepath.Rel(path, filePath)
if err != nil {
return err
}
filePath = filepath.Clean(filePath)
contract.Assertf(results[filePath] == nil,
"Unexpected duplicate blob in map: path=%v filePath=%v", path, filePath)
results[filePath] = blob
return nil
}); walkerr != nil {
return nil, walkerr
}
return results, nil
}
// Otherwise, it's an archive file, and we will go ahead and open it up and read it.
file, err := os.Open(path)
if err != nil {
return nil, err
}
return readArchive(file, format)
}
func (a *Archive) readURI() (map[string]*Blob, error) {
// To read a URI-based archive, fetch the contents remotely and use the extension to pick the format to use.
url, isurl, err := a.GetURIURL()
if err != nil {
return nil, err
}
contract.Assertf(isurl, "Expected a URI-based asset")
format := detectArchiveFormat(url.Path)
if format == NotArchive {
// IDEA: support (a) hints and (b) custom providers that default to certain formats.
return nil, errors.Errorf("file at URL '%v' is not a recognized archive format", url)
}
ar, err := a.openURLStream(url)
if err != nil {
return nil, err
}
return readArchive(ar, format)
}
func (a *Archive) openURLStream(url *url.URL) (io.ReadCloser, error) {
switch s := url.Scheme; s {
case "http", "https":
resp, err := http.Get(url.String())
if err != nil {
return nil, err
}
return resp.Body, nil
case "file":
contract.Assert(url.Host == "")
contract.Assert(url.User == nil)
contract.Assert(url.RawQuery == "")
contract.Assert(url.Fragment == "")
return os.Open(url.Path)
default:
return nil, errors.Errorf("Unrecognized or unsupported URI scheme: %v", s)
}
}
// Bytes fetches the archive contents as a byte slices. This is almost certainly the least efficient way to deal with
// the underlying streaming capabilities offered by assets and archives, but can be used in a pinch to interact with
// APIs that demand []bytes.
func (a *Archive) Bytes(format ArchiveFormat) ([]byte, error) {
var data bytes.Buffer
if err := a.Archive(format, &data); err != nil {
return nil, err
}
return data.Bytes(), nil
}
// Archive produces a single archive stream in the desired format. It prefers to return the archive with as little
// copying as is feasible, however if the desired format is different from the source, it will need to translate.
func (a *Archive) Archive(format ArchiveFormat, w io.Writer) error {
// If the source format is the same, just return that.
if sf, ss, err := a.ReadSourceArchive(); sf != NotArchive && sf == format {
if err != nil {
return err
}
_, err := io.Copy(w, ss)
return err
}
switch format {
case TarArchive:
return a.archiveTar(w)
case TarGZIPArchive:
return a.archiveTarGZIP(w)
case ZIPArchive:
return a.archiveZIP(w)
default:
contract.Failf("Illegal archive type: %v", format)
return nil
}
}
func (a *Archive) archiveTar(w io.Writer) error {
// Read the archive.
arch, err := a.Read()
if err != nil {
return err
}
defer (func() {
// Ensure we close all files before exiting this function, no matter the outcome.
for _, blob := range arch {
contract.IgnoreClose(blob)
}
})()
// Sort the file names so we emit in a deterministic order.
var files []string
for file := range arch {
files = append(files, file)
}
sort.Strings(files)
// Now actually emit the contents, file by file.
tw := tar.NewWriter(w)
for _, file := range files {
data := arch[file]
sz := data.Size()
if err := tw.WriteHeader(&tar.Header{
Name: file,
Mode: 0600,
Size: sz,
}); err != nil {
return err
}
n, err := io.Copy(tw, data)
if err != nil {
return err
}
contract.Assert(n == sz)
}
return tw.Close()
}
func (a *Archive) archiveTarGZIP(w io.Writer) error {
z := gzip.NewWriter(w)
return a.archiveTar(z)
}
func (a *Archive) archiveZIP(w io.Writer) error {
// Read the archive.
arch, err := a.Read()
if err != nil {
return err
}
defer (func() {
// Ensure we close all files before exiting this function, no matter the outcome.
for _, blob := range arch {
contract.IgnoreClose(blob)
}
})()
// Sort the file names so we emit in a deterministic order.
var files []string
for file := range arch {
files = append(files, file)
}
sort.Strings(files)
// Now actually emit the contents, file by file.
zw := zip.NewWriter(w)
for _, file := range files {
fw, err := zw.Create(file)
if err != nil {
return err
}
if _, err = io.Copy(fw, arch[file]); err != nil {
return err
}
}
return zw.Close()
}
// ReadSourceArchive returns a stream to the underlying archive, if there is one.
func (a *Archive) ReadSourceArchive() (ArchiveFormat, io.ReadCloser, error) {
if path, ispath := a.GetPath(); ispath {
if format := detectArchiveFormat(path); format != NotArchive {
f, err := os.Open(path)
return format, f, err
}
} else if url, isurl, urlerr := a.GetURIURL(); urlerr == nil && isurl {
if format := detectArchiveFormat(url.Path); format != NotArchive {
s, err := a.openURLStream(url)
return format, s, err
}
}
return NotArchive, nil, nil
}
// EnsureHash computes the SHA256 hash of the archive's contents and stores it on the object.
func (a *Archive) EnsureHash() error {
if a.Hash == "" {
hash := sha256.New()
// Attempt to compute the hash in the most efficient way. First try to open the archive directly and copy it
// to the hash. This avoids traversing any of the contents and just treats it as a byte stream.
f, r, err := a.ReadSourceArchive()
if err != nil {
return err
}
if f != NotArchive && r != nil {
defer contract.IgnoreClose(r)
_, err = io.Copy(hash, r)
if err != nil {
return err
}
} else {
// Otherwise, it's not an archive; we'll need to transform it into one. Pick tar since it avoids
// any superfluous compression which doesn't actually help us in this situation.
err := a.Archive(TarArchive, hash)
if err != nil {
return err
}
}
// Finally, encode the resulting hash as a string and we're done.
a.Hash = hex.EncodeToString(hash.Sum(nil))
}
return nil
}
// ArchiveFormat indicates what archive and/or compression format an archive uses.
type ArchiveFormat int
const (
NotArchive = iota // not an archive.
TarArchive // a POSIX tar archive.
TarGZIPArchive // a POSIX tar archive that has been subsequently compressed using GZip.
ZIPArchive // a multi-file ZIP archive.
)
// ArchiveExts maps from a file extension and its associated archive and/or compression format.
var ArchiveExts = map[string]ArchiveFormat{
".tar": TarArchive,
".tgz": TarGZIPArchive,
".tar.gz": TarGZIPArchive,
".zip": ZIPArchive,
}
// detectArchiveFormat takes a path and infers its archive format based on the file extension.
func detectArchiveFormat(path string) ArchiveFormat {
ext := filepath.Ext(path)
if moreext := filepath.Ext(strings.TrimRight(path, ext)); moreext != "" {
ext = moreext + ext // this ensures we detect ".tar.gz" correctly.
}
format, has := ArchiveExts[ext]
if !has {
return NotArchive
}
return format
}
// readArchive takes a stream to an existing archive and returns a map of names to readers for the inner assets.
// The routine returns an error if something goes wrong and, no matter what, closes the stream before returning.
func readArchive(ar io.ReadCloser, format ArchiveFormat) (map[string]*Blob, error) {
defer contract.IgnoreClose(ar) // consume the input stream
switch format {
case TarArchive:
return readTarArchive(ar)
case TarGZIPArchive:
return readTarGZIPArchive(ar)
case ZIPArchive:
// Unfortunately, the ZIP archive reader requires ReaderAt functionality. If it's a file, we can recovera this
// with a simple stat. Otherwise, we will need to go ahead and make a copy in memory.
var ra io.ReaderAt
var sz int64
if f, isf := ar.(*os.File); isf {
stat, err := f.Stat()
if err != nil {
return nil, err
}
ra = f
sz = stat.Size()
} else if data, err := ioutil.ReadAll(ar); err != nil {
return nil, err
} else {
ra = bytes.NewReader(data)
sz = int64(len(data))
}
return readZIPArchive(ra, sz)
default:
contract.Failf("Illegal archive type: %v", format)
return nil, nil
}
}
func readTarArchive(ar io.ReadCloser) (map[string]*Blob, error) {
defer contract.IgnoreClose(ar) // consume the input stream
// Create a tar reader and walk through each file, adding each one to the map.
assets := make(map[string]*Blob)
tr := tar.NewReader(ar)
for {
file, err := tr.Next()
if err == io.EOF {
break
} else if err != nil {
return nil, err
}
switch file.Typeflag {
case tar.TypeDir:
continue // skip directories
case tar.TypeReg:
data := make([]byte, file.Size)
n, err := tr.Read(data)
if err != nil {
return nil, err
}
contract.Assert(int64(n) == file.Size)
name := filepath.Clean(file.Name)
assets[name] = NewByteBlob(data)
default:
contract.Failf("Unrecognized tar header typeflag: %v", file.Typeflag)
}
}
return assets, nil
}
func readTarGZIPArchive(ar io.ReadCloser) (map[string]*Blob, error) {
defer contract.IgnoreClose(ar) // consume the input stream
// First decompress the GZIP stream.
gz, err := gzip.NewReader(ar)
if err != nil {
return nil, err
}
// Now read the tarfile.
return readTarArchive(gz)
}
func readZIPArchive(ar io.ReaderAt, size int64) (map[string]*Blob, error) {
// Create a ZIP reader and iterate over the files inside of it, adding each one.
assets := make(map[string]*Blob)
z, err := zip.NewReader(ar, size)
if err != nil {
return nil, errors.Wrap(err, "failed to read ZIP")
}
for _, file := range z.File {
// Skip directories, since they aren't included in TAR and other archives above.
if file.FileInfo().IsDir() {
continue
}
body, err := file.Open()
if err != nil {
return nil, errors.Wrapf(err, "failed to read ZIP inner file %v", file.Name)
}
defer contract.IgnoreClose(body)
size := file.UncompressedSize64
data := make([]byte, size)
n, err := body.Read(data)
if err != nil && err != io.EOF {
return nil, errors.Wrapf(err, "unexpected early ZIP termination %v", file.Name)
}
contract.Assert(uint64(n) == size)
name := filepath.Clean(file.Name)
assets[name] = NewByteBlob(data)
}
return assets, nil
}