pulumi/pkg/resource/asset.go

// Copyright 2016-2017, Pulumi Corporation.  All rights reserved.

package resource

import (
	"archive/tar"
	"archive/zip"
	"bytes"
	"compress/gzip"
	"io"
	"io/ioutil"
	"net/http"
	"net/url"
	"os"
	"path/filepath"
	"sort"
	"strings"

	"github.com/pkg/errors"

	"github.com/pulumi/pulumi-fabric/pkg/compiler/types/predef"
	"github.com/pulumi/pulumi-fabric/pkg/eval/rt"
	"github.com/pulumi/pulumi-fabric/pkg/util/contract"
)

// Asset is a serialized asset reference.  It is a union: thus, only one of its fields will be non-nil.  Several helper
// routines exist as members in order to easily interact with the assets referenced by an instance of this type.
type Asset struct {
	Sig  string `json:"4dabf18193072939515e22adb298388d"` // the unique asset signature (see properties.go).
	Text string `json:"text,omitempty"`                   // a textual asset.
	Path string `json:"path,omitempty"`                   // a file on the current filesystem.
	URI  string `json:"uri,omitempty"`                    // a URI (file://, http://, https://, or custom).
}

const (
	AssetSig          = "c44067f5952c0a294b673a41bacd8c17" // a randomly assigned type hash for assets.
	AssetTextProperty = "text"                             // the dynamic property for an asset's text.
	AssetPathProperty = "path"                             // the dynamic property for an asset's path.
	AssetURIProperty  = "uri"                              // the dynamic property for an asset's URI.
)

func NewTextAsset(text string) Asset { return Asset{Sig: AssetSig, Text: text} }
func NewPathAsset(path string) Asset { return Asset{Sig: AssetSig, Path: path} }
func NewURIAsset(uri string) Asset   { return Asset{Sig: AssetSig, URI: uri} }

func NewAssetFromObject(obj *rt.Object) Asset {
	contract.Assert(predef.IsResourceAssetType(obj.Type()))
	props := obj.Properties()
	var text string
	if prop, has := props.TryGet(AssetTextProperty); has {
		text = prop.StringValue()
	}
	var path string
	if prop, has := props.TryGet(AssetPathProperty); has {
		path = prop.StringValue()
	}
	var uri string
	if prop, has := props.TryGet(AssetURIProperty); has {
		uri = prop.StringValue()
	}
	return Asset{Text: text, Path: path, URI: uri}
}

func (a Asset) IsText() bool { return a.Text != "" }
func (a Asset) IsPath() bool { return a.Path != "" }
func (a Asset) IsURI() bool  { return a.URI != "" }

func (a Asset) GetText() (string, bool) {
	if a.IsText() {
		return a.Text, true
	}
	return "", false
}

func (a Asset) GetPath() (string, bool) {
	if a.IsPath() {
		return a.Path, true
	}
	return "", false
}

func (a Asset) GetURI() (string, bool) {
	if a.IsURI() {
		return a.URI, true
	}
	return "", false
}

// GetURIURL returns the underlying URI as a parsed URL, provided it is one.  If there was an error parsing the URI, it
// will be returned as a non-nil error object.
func (a Asset) GetURIURL() (*url.URL, bool, error) {
	if uri, isuri := a.GetURI(); isuri {
		url, err := url.Parse(uri)
		if err != nil {
			return nil, true, err
		}
		return url, true, nil
	}
	return nil, false, nil
}

// Equals returns true if a is value-equal to other.
func (a Asset) Equals(other Asset) bool {
	return a.Text == other.Text && a.Path == other.Path && a.URI == other.URI
}

// Serialize returns a weakly typed map that contains the right signature for serialization purposes.
func (a Asset) Serialize() map[string]interface{} {
	return map[string]interface{}{
		string(SigKey):    AssetSig,
		AssetTextProperty: a.Text,
		AssetPathProperty: a.Path,
		AssetURIProperty:  a.URI,
	}
}

// DeserializeAsset checks to see if the map contains an asset, using its signature, and if so deserializes it.
func DeserializeAsset(obj map[string]interface{}) (Asset, bool) {
	if obj[string(SigKey)] != AssetSig {
		return Asset{}, false
	}
	var text string
	if v, has := obj[AssetTextProperty]; has {
		text = v.(string)
	}
	var path string
	if v, has := obj[AssetPathProperty]; has {
		path = v.(string)
	}
	var uri string
	if v, has := obj[AssetURIProperty]; has {
		uri = v.(string)
	}
	return Asset{
		Text: text,
		Path: path,
		URI:  uri,
	}, true
}

// Read reads an asset's contents into memory.
func (a Asset) Read() (*Blob, error) {
	if a.IsText() {
		return a.readText()
	} else if a.IsPath() {
		return a.readPath()
	} else if a.IsURI() {
		return a.readURI()
	}
	contract.Failf("Invalid asset; one of Text, Path, or URI must be non-nil")
	return nil, nil
}

func (a Asset) readText() (*Blob, error) {
	text, istext := a.GetText()
	contract.Assertf(istext, "Expected a text-based asset")
	return NewByteBlob([]byte(text)), nil
}

func (a Asset) readPath() (*Blob, error) {
	path, ispath := a.GetPath()
	contract.Assertf(ispath, "Expected a path-based asset")
	f, err := os.Open(path)
	if err != nil {
		return nil, err
	}
	return NewFileBlob(f)
}

func (a Asset) readURI() (*Blob, error) {
	url, isurl, err := a.GetURIURL()
	if err != nil {
		return nil, err
	}
	contract.Assertf(isurl, "Expected a URI-based asset")
	switch s := url.Scheme; s {
	case "http", "https":
		resp, err := http.Get(url.String())
		if err != nil {
			return nil, err
		}
		return NewReadCloserBlob(resp.Body)
	case "file":
		contract.Assert(url.Host == "")
		contract.Assert(url.User == nil)
		contract.Assert(url.RawQuery == "")
		contract.Assert(url.Fragment == "")
		f, err := os.Open(url.Path)
		if err != nil {
			return nil, err
		}
		return NewFileBlob(f)
	default:
		return nil, errors.Errorf("Unrecognized or unsupported URI scheme: %v", s)
	}
}

// SeekableReadCloser combines Read, Close, and Seek functionality into one interface.
type SeekableReadCloser interface {
	io.Seeker
	io.ReadCloser
}

// Blob is a blob that implements ReadCloser, Seek, and offers Len functionality.
type Blob struct {
	rd SeekableReadCloser // an underlying reader.
	sz int64              // the size of the blob.
}

func (blob *Blob) Close() error                                 { return blob.rd.Close() }
func (blob *Blob) Read(p []byte) (int, error)                   { return blob.rd.Read(p) }
func (blob *Blob) Reader() SeekableReadCloser                   { return blob.rd }
func (blob *Blob) Seek(offset int64, whence int) (int64, error) { return blob.rd.Seek(offset, whence) }
func (blob *Blob) Size() int64                                  { return blob.sz }

// NewByteBlob creates a new byte blob.
func NewByteBlob(data []byte) *Blob {
	return &Blob{
		rd: newBytesReader(data),
		sz: int64(len(data)),
	}
}

// NewFileBlob creates a new asset blob whose size is known thanks to stat.
func NewFileBlob(f *os.File) (*Blob, error) {
	stat, err := f.Stat()
	if err != nil {
		return nil, err
	}
	return &Blob{
		rd: f,
		sz: stat.Size(),
	}, nil
}

// NewReadCloserBlob turn any old ReadCloser into an Blob, usually by making a copy.
func NewReadCloserBlob(r io.ReadCloser) (*Blob, error) {
	if f, isf := r.(*os.File); isf {
		// If it's a file, we can "fast path" the asset creation without making a copy.
		return NewFileBlob(f)
	}
	// Otherwise, read it all in, and create a blob out of that.
	defer contract.IgnoreClose(r)
	data, err := ioutil.ReadAll(r)
	if err != nil {
		return nil, err
	}
	return NewByteBlob(data), nil
}

// bytesReader turns a *bytes.Reader into a SeekableReadCloser by adding an empty Close method.
type bytesReader struct {
	*bytes.Reader
}

func newBytesReader(b []byte) SeekableReadCloser {
	return bytesReader{
		Reader: bytes.NewReader(b),
	}
}

func (b bytesReader) Close() error {
	return nil // intentionally blank
}

// Archive is a serialized archive reference.  It is a union: thus, only one of its fields will be non-nil.  Several
// helper routines exist as members in order to easily interact with archives of different kinds.
type Archive struct {
	Sig    string           `json:"4dabf18193072939515e22adb298388d"` // the unique asset signature (see properties.go).
	Assets map[string]Asset `json:"assets,omitempty"`                 // a collection of other assets.
	Path   string           `json:"path,omitempty"`                   // a file on the current filesystem.
	URI    string           `json:"uri,omitempty"`                    // a remote URI (file://, http://, https://, etc).
}

const (
	ArchiveSig            = "0def7320c3a5731c473e5ecbe6d01bc7" // a randomly assigned archive type signature.
	ArchiveAssetsProperty = "assets"                           // the dynamic property for an archive's assets.
	ArchivePathProperty   = "path"                             // the dynamic property for an archive's path.
	ArchiveURIProperty    = "uri"                              // the dynamic property for an archive's URI.
)

func NewAssetArchive(assets map[string]Asset) Archive { return Archive{Sig: ArchiveSig, Assets: assets} }
func NewPathArchive(path string) Archive              { return Archive{Sig: ArchiveSig, Path: path} }
func NewURIArchive(uri string) Archive                { return Archive{Sig: ArchiveSig, URI: uri} }

func NewArchiveFromObject(obj *rt.Object) Archive {
	contract.Assert(predef.IsResourceArchiveType(obj.Type()))
	props := obj.Properties()
	var assets map[string]Asset
	if prop, has := props.TryGet(ArchiveAssetsProperty); has {
		assets = make(map[string]Asset)
		mapprops := prop.Properties()
		for _, k := range mapprops.Stable() {
			assets[string(k)] = NewAssetFromObject(mapprops.Get(k))
		}
	}
	var path string
	if prop, has := props.TryGet(ArchivePathProperty); has {
		path = prop.StringValue()
	}
	var uri string
	if prop, has := props.TryGet(ArchiveURIProperty); has {
		uri = prop.StringValue()
	}
	return Archive{Assets: assets, Path: path, URI: uri}
}

func (a Archive) IsAssets() bool { return a.Assets != nil }
func (a Archive) IsPath() bool   { return a.Path != "" }
func (a Archive) IsURI() bool    { return a.URI != "" }

func (a Archive) GetAssets() (map[string]Asset, bool) {
	if a.IsAssets() {
		return a.Assets, true
	}
	return nil, false
}

func (a Archive) GetPath() (string, bool) {
	if a.IsPath() {
		return a.Path, true
	}
	return "", false
}

func (a Archive) GetURI() (string, bool) {
	if a.IsURI() {
		return a.URI, true
	}
	return "", false
}

// GetURIURL returns the underlying URI as a parsed URL, provided it is one.  If there was an error parsing the URI, it
// will be returned as a non-nil error object.
func (a Archive) GetURIURL() (*url.URL, bool, error) {
	if uri, isuri := a.GetURI(); isuri {
		url, err := url.Parse(uri)
		if err != nil {
			return nil, true, err
		}
		return url, true, nil
	}
	return nil, false, nil
}

// Equals returns true if a is value-equal to other.
func (a Archive) Equals(other Archive) bool {
	if a.Assets != nil {
		if other.Assets == nil {
			return false
		}
		if len(a.Assets) != len(other.Assets) {
			return false
		}
		for key, value := range a.Assets {
			if other.Assets[key] != value {
				return false
			}
		}
	} else if other.Assets != nil {
		return false
	}
	return a.Path == other.Path && a.URI == other.URI
}

// Serialize returns a weakly typed map that contains the right signature for serialization purposes.
func (a Archive) Serialize() map[string]interface{} {
	var assets map[string]interface{}
	if a.Assets != nil {
		assets = make(map[string]interface{})
		for k, v := range a.Assets {
			assets[k] = v.Serialize()
		}
	}
	return map[string]interface{}{
		string(SigKey):        ArchiveSig,
		ArchiveAssetsProperty: assets,
		ArchivePathProperty:   a.Path,
		ArchiveURIProperty:    a.URI,
	}
}

// DeserializeArchive checks to see if the map contains an archive, using its signature, and if so deserializes it.
func DeserializeArchive(obj map[string]interface{}) (Archive, bool) {
	if obj[string(SigKey)] != ArchiveSig {
		return Archive{}, false
	}
	var assets map[string]Asset
	if v, has := obj[ArchiveAssetsProperty]; has {
		assets = make(map[string]Asset)
		for k, v := range v.(map[string]interface{}) {
			switch t := v.(type) {
			case Asset:
				assets[k] = t
			case map[string]interface{}:
				a, isa := DeserializeAsset(t)
				if !isa {
					return Archive{}, false
				}
				assets[k] = a
			default:
				return Archive{}, false
			}
		}
	}
	var path string
	if v, has := obj[ArchivePathProperty]; has {
		path = v.(string)
	}
	var uri string
	if v, has := obj[ArchiveURIProperty]; has {
		uri = v.(string)
	}
	return Archive{
		Assets: assets,
		Path:   path,
		URI:    uri,
	}, true
}

// Read returns a map of asset name to its associated reader object (which can be used to perform reads/IO).
func (a Archive) Read() (map[string]*Blob, error) {
	if a.IsAssets() {
		return a.readAssets()
	} else if a.IsPath() {
		return a.readPath()
	} else if a.IsURI() {
		return a.readURI()
	}
	contract.Failf("Invalid archive; one of Assets, Path, or URI must be non-nil")
	return nil, nil
}

func (a Archive) readAssets() (map[string]*Blob, error) {
	// To read a map-based archive, just produce a map from each asset to its associated reader.
	m, isassets := a.GetAssets()
	contract.Assertf(isassets, "Expected an asset map-based archive")
	result := map[string]*Blob{}
	for name, asset := range m {
		// TODO[pulumi/pulumi-fabric#240]: It would be better to treat folders as a first class concept intead
		//  of reusing a path Asset for this purpose.
		path, isPath := asset.GetPath()
		if isPath {
			if fi, err := os.Stat(path); err == nil && fi.IsDir() {
				// Asset is a folder, expand it
				if walkerr := filepath.Walk(path, func(filePath string, f os.FileInfo, fileerr error) error {
					if fileerr != nil || f.IsDir() || f.Mode()&os.ModeSymlink != 0 {
						return fileerr
					}

					var err error
					result[filePath], err = NewPathAsset(filePath).Read()
					return err
				}); walkerr != nil {
					return nil, walkerr
				}
				continue
			}
		}
		var err error
		if result[name], err = asset.Read(); err != nil {
			return nil, err
		}
	}
	return result, nil
}

func (a Archive) readPath() (map[string]*Blob, error) {
	// To read a path-based archive, read that file and use its extension to ascertain what format to use.
	path, ispath := a.GetPath()
	contract.Assertf(ispath, "Expected a path-based asset")

	format, err := detectArchiveFormat(path)
	if err != nil {
		return nil, err
	}

	file, err := os.Open(path)
	if err != nil {
		return nil, err
	}

	return readArchive(file, format)
}

func (a Archive) readURI() (map[string]*Blob, error) {
	// To read a URI-based archive, fetch the contents remotely and use the extension to pick the format to use.
	url, isurl, err := a.GetURIURL()
	if err != nil {
		return nil, err
	}
	contract.Assertf(isurl, "Expected a URI-based asset")

	format, err := detectArchiveFormat(url.Path)
	if err != nil {
		// IDEA: support (a) hints and (b) custom providers that default to certain formats.
		return nil, err
	}

	ar, err := a.openURLStream(url)
	if err != nil {
		return nil, err
	}
	return readArchive(ar, format)
}

func (a Archive) openURLStream(url *url.URL) (io.ReadCloser, error) {
	switch s := url.Scheme; s {
	case "http", "https":
		resp, err := http.Get(url.String())
		if err != nil {
			return nil, err
		}
		return resp.Body, nil
	case "file":
		contract.Assert(url.Host == "")
		contract.Assert(url.User == nil)
		contract.Assert(url.RawQuery == "")
		contract.Assert(url.Fragment == "")
		return os.Open(url.Path)
	default:
		return nil, errors.Errorf("Unrecognized or unsupported URI scheme: %v", s)
	}
}

// Bytes fetches the archive contents as a byte slices.  This is almost certainly the least efficient way to deal with
// the underlying streaming capabilities offered by assets and archives, but can be used in a pinch to interact with
// APIs that demand []bytes.
func (a Archive) Bytes(format ArchiveFormat) ([]byte, error) {
	var data bytes.Buffer
	if err := a.Archive(format, &data); err != nil {
		return nil, err
	}
	return data.Bytes(), nil
}

// Archive produces a single archive stream in the desired format.  It prefers to return the archive with as little
// copying as is feasible, however if the desired format is different from the source, it will need to translate.
func (a Archive) Archive(format ArchiveFormat, w io.Writer) error {
	// If the source format is the same, just return that.
	if sf, ss, err := a.ReadSourceArchive(); sf != NotArchive && sf == format {
		if err != nil {
			return err
		}
		_, err := io.Copy(w, ss)
		return err
	}

	switch format {
	case TarArchive:
		return a.archiveTar(w)
	case TarGZIPArchive:
		return a.archiveTarGZIP(w)
	case ZIPArchive:
		return a.archiveZIP(w)
	default:
		contract.Failf("Illegal archive type: %v", format)
		return nil
	}
}

func (a Archive) archiveTar(w io.Writer) error {
	// Read the archive.
	arch, err := a.Read()
	if err != nil {
		return err
	}
	defer (func() {
		// Ensure we close all files before exiting this function, no matter the outcome.
		for _, blob := range arch {
			contract.IgnoreClose(blob)
		}
	})()

	// Sort the file names so we emit in a deterministic order.
	var files []string
	for file := range arch {
		files = append(files, file)
	}
	sort.Strings(files)

	// Now actually emit the contents, file by file.
	tw := tar.NewWriter(w)
	for _, file := range files {
		data := arch[file]
		sz := data.Size()
		if err := tw.WriteHeader(&tar.Header{
			Name: file,
			Mode: 0600,
			Size: sz,
		}); err != nil {
			return err
		}
		n, err := io.Copy(tw, data)
		if err != nil {
			return err
		}
		contract.Assert(n == sz)
	}

	return tw.Close()
}

func (a Archive) archiveTarGZIP(w io.Writer) error {
	z := gzip.NewWriter(w)
	return a.archiveTar(z)
}

func (a Archive) archiveZIP(w io.Writer) error {
	// Read the archive.
	arch, err := a.Read()
	if err != nil {
		return err
	}
	defer (func() {
		// Ensure we close all files before exiting this function, no matter the outcome.
		for _, blob := range arch {
			contract.IgnoreClose(blob)
		}
	})()

	// Sort the file names so we emit in a deterministic order.
	var files []string
	for file := range arch {
		files = append(files, file)
	}
	sort.Strings(files)

	// Now actually emit the contents, file by file.
	zw := zip.NewWriter(w)
	for _, file := range files {
		fw, err := zw.Create(file)
		if err != nil {
			return err
		}
		if _, err = io.Copy(fw, arch[file]); err != nil {
			return err
		}
	}

	return zw.Close()
}

// ReadSourceArchive returns a stream to the underlying archive, if there eis one.
func (a Archive) ReadSourceArchive() (ArchiveFormat, io.ReadCloser, error) {
	if path, ispath := a.GetPath(); ispath {
		if format, archerr := detectArchiveFormat(path); archerr != nil && format != NotArchive {
			f, err := os.Open(path)
			return format, f, err
		}
	} else if url, isurl, urlerr := a.GetURIURL(); urlerr == nil && isurl {
		if format, archerr := detectArchiveFormat(url.Path); archerr == nil && format != NotArchive {
			s, err := a.openURLStream(url)
			return format, s, err
		}
	}
	return NotArchive, nil, nil
}

// ArchiveFormat indicates what archive and/or compression format an archive uses.
type ArchiveFormat int

const (
	NotArchive     = iota // not an archive.
	TarArchive            // a POSIX tar archive.
	TarGZIPArchive        // a POSIX tar archive that has been subsequently compressed using GZip.
	ZIPArchive            // a multi-file ZIP archive.
)

// ArchiveExts maps from a file extension and its associated archive and/or compression format.
var ArchiveExts = map[string]ArchiveFormat{
	".tar":    TarArchive,
	".tgz":    TarGZIPArchive,
	".tar.gz": TarGZIPArchive,
	".zip":    ZIPArchive,
}

// detectArchiveFormat takes a path and infers its archive format based on the file extension.
func detectArchiveFormat(path string) (ArchiveFormat, error) {
	ext := filepath.Ext(path)
	if moreext := filepath.Ext(strings.TrimRight(path, ext)); moreext != "" {
		ext = moreext + ext // this ensures we detect ".tar.gz" correctly.
	}
	format, has := ArchiveExts[ext]
	if !has {
		return NotArchive, errors.Errorf("unrecognized archive format '%v'", ext)
	}
	return format, nil
}

// readArchive takes a stream to an existing archive and returns a map of names to readers for the inner assets.
// The routine returns an error if something goes wrong and, no matter what, closes the stream before returning.
func readArchive(ar io.ReadCloser, format ArchiveFormat) (map[string]*Blob, error) {
	defer contract.IgnoreClose(ar) // consume the input stream

	switch format {
	case TarArchive:
		return readTarArchive(ar)
	case TarGZIPArchive:
		return readTarGZIPArchive(ar)
	case ZIPArchive:
		// Unfortunately, the ZIP archive reader requires ReaderAt functionality.  If it's a file, we can recovera this
		// with a simple stat.  Otherwise, we will need to go ahead and make a copy in memory.
		var ra io.ReaderAt
		var sz int64
		if f, isf := ar.(*os.File); isf {
			stat, err := f.Stat()
			if err != nil {
				return nil, err
			}
			ra = f
			sz = stat.Size()
		} else if data, err := ioutil.ReadAll(ar); err != nil {
			return nil, err
		} else {
			ra = bytes.NewReader(data)
			sz = int64(len(data))
		}
		return readZIPArchive(ra, sz)
	default:
		contract.Failf("Illegal archive type: %v", format)
		return nil, nil
	}
}

func readTarArchive(ar io.ReadCloser) (map[string]*Blob, error) {
	defer contract.IgnoreClose(ar) // consume the input stream

	// Create a tar reader and walk through each file, adding each one to the map.
	assets := make(map[string]*Blob)
	tr := tar.NewReader(ar)
	for {
		file, err := tr.Next()
		if err == io.EOF {
			break
		} else if err != nil {
			return nil, err
		}

		switch file.Typeflag {
		case tar.TypeDir:
			continue // skip directories
		case tar.TypeReg:
			data := make([]byte, file.Size)
			n, err := tr.Read(data)
			if err != nil {
				return nil, err
			}
			contract.Assert(int64(n) == file.Size)
			assets[file.Name] = NewByteBlob(data)
		default:
			contract.Failf("Unrecognized tar header typeflag: %v", file.Typeflag)
		}
	}

	return assets, nil
}

func readTarGZIPArchive(ar io.ReadCloser) (map[string]*Blob, error) {
	defer contract.IgnoreClose(ar) // consume the input stream

	// First decompress the GZIP stream.
	gz, err := gzip.NewReader(ar)
	if err != nil {
		return nil, err
	}

	// Now read the tarfile.
	return readTarArchive(gz)
}

func readZIPArchive(ar io.ReaderAt, size int64) (map[string]*Blob, error) {
	// Create a ZIP reader and iterate over the files inside of it, adding each one.
	assets := make(map[string]*Blob)
	z, err := zip.NewReader(ar, size)
	if err != nil {
		return nil, err
	}
	for _, file := range z.File {
		body, err := file.Open()
		if err != nil {
			return nil, err
		}
		size := file.UncompressedSize64
		data := make([]byte, size)
		n, err := body.Read(data)
		if err != nil {
			return nil, err
		}
		contract.Assert(uint64(n) == size)
		assets[file.Name] = NewByteBlob(data)
	}
	return assets, nil
}