pulumi/sdk/nodejs/runtime/closure.ts
CyrusNajmabadi 36a692390d
Properly capture 'arguments' when creating our serialization closure. (#569)
* Simplify how we capture 'this' in our serialization logic.
* Properly capture 'arguments'

* add tests for 'arguments' capture.

* Properly serialize out 'arguments'
* Invert 'with' and function closure.
2017-11-15 11:31:17 -08:00

854 lines
35 KiB
TypeScript

// Copyright 2016-2017, Pulumi Corporation. All rights reserved.
import * as crypto from "crypto";
import { relative as pathRelative } from "path";
import * as ts from "typescript";
import * as log from "../log";
import { debuggablePromise } from "./debuggable";
const nativeruntime = require("./native/build/Release/nativeruntime.node");
/**
* Closure represents the serialized form of a JavaScript serverless function.
*/
export interface Closure {
code: string; // a serialization of the function's source code as text.
runtime: string; // the language runtime required to execute the serialized code.
environment: Environment; // the captured lexical environment of variables to values, if any.
}
/**
* Environment is the captured lexical environment for a closure.
*/
export type Environment = {[key: string]: EnvironmentEntry};
/**
* EnvironmentEntry is the environment slot for a named lexically captured variable.
*/
export interface EnvironmentEntry {
json?: any; // a value which can be safely json serialized.
closure?: Closure; // a closure we are dependent on.
obj?: Environment; // an object which may contain nested closures.
arr?: EnvironmentEntry[]; // an array which may contain nested closures.
module?: string; // a reference to a requirable module name.
}
/**
* serializeClosure serializes a function and its closure environment into a form that is amenable to persistence
* as simple JSON. Like toString, it includes the full text of the function's source code, suitable for execution.
* Unlike toString, it actually includes information about the captured environment.
*/
export function serializeClosure(func: Function): Promise<Closure> {
// First get the async version. We will then await it to turn it into a flattened, async-free computed closure.
// This must be done "at the top" because we must not block the creation of the dataflow graph of closure
// elements, since there may be cycles that can only resolve by creating the entire graph first.
const closure: AsyncClosure = serializeClosureAsync(func);
// Now turn the AsyncClosure into a normal closure, and return it.
const flatCache = new Map<Promise<AsyncEnvironmentEntry>, EnvironmentEntry>();
return flattenClosure(closure, flatCache);
}
async function flattenClosure(closure: AsyncClosure,
flatCache: Map<Promise<AsyncEnvironmentEntry>, EnvironmentEntry>): Promise<Closure> {
return {
code: closure.code,
runtime: closure.runtime,
environment: await flattenEnvironment(closure.environment, flatCache),
};
}
async function flattenEnvironment(
env: AsyncEnvironment,
flatCache: Map<Promise<AsyncEnvironmentEntry>, EnvironmentEntry>): Promise<Environment> {
const result: Environment = {};
for (const key of Object.keys(env)) {
result[key] = await flattenEnvironmentEntry(env[key], flatCache);
}
return result;
}
async function flattenEnvironmentEntry(
entry: Promise<AsyncEnvironmentEntry>,
flatCache: Map<Promise<AsyncEnvironmentEntry>, EnvironmentEntry>): Promise<EnvironmentEntry> {
// See if there's an entry for this object already; if there is, use it.
let result: EnvironmentEntry | undefined = flatCache.get(entry);
if (result) {
return result;
}
// Otherwise, we need to create a new one, add it to the cache before recursing, and then go. Note that we
// DO NOT add a promise for the entry! We add the entry object itself, to avoid deadlocks in which mutually
// recursive functions end up trying to resolve the same entry on the same callstack.
result = {};
flatCache.set(entry, result);
const e: AsyncEnvironmentEntry = await entry;
if (e.hasOwnProperty("json")) {
result.json = e.json;
}
else if (e.module) {
result.module = e.module;
}
else if (e.closure) {
result.closure = await flattenClosure(e.closure, flatCache);
}
else if (e.obj) {
result.obj = await flattenEnvironment(e.obj, flatCache);
}
else if (e.arr) {
const arr: EnvironmentEntry[] = [];
for (const elem of e.arr) {
arr.push(await flattenEnvironmentEntry(elem, flatCache));
}
result.arr = arr;
}
else {
throw new Error(`Malformed flattened environment entry: ${e}`);
}
return result;
}
/**
* AsyncClosure represents the eventual serialized form of a JavaScript serverless function.
*/
export interface AsyncClosure {
code: string; // a serialization of the function's source code as text.
runtime: string; // the language runtime required to execute the serialized code.
environment: AsyncEnvironment; // the captured lexical environment of variables to values, if any.
}
/**
* AsyncEnvironment is the eventual captured lexical environment for a closure.
*/
export type AsyncEnvironment = {[key: string]: Promise<AsyncEnvironmentEntry>};
/**
* AsyncEnvironmentEntry is the eventual environment slot for a named lexically captured variable.
*/
export interface AsyncEnvironmentEntry {
json?: any; // a value which can be safely json serialized.
closure?: AsyncClosure; // a closure we are dependent on.
obj?: AsyncEnvironment; // an object which may contain nested closures.
arr?: Promise<AsyncEnvironmentEntry>[]; // an array which may contain nested closures.
module?: string; // a reference to a requirable module name.
}
/**
* entryCache stores a map of entry to promise, to support mutually recursive captures.
*/
const entryCache = new Map<Object, Promise<AsyncEnvironmentEntry>>();
/**
* serializeClosureAsync does the work to create an asynchronous dataflow graph that resolves to a final closure.
*/
function serializeClosureAsync(func: Function): AsyncClosure {
// Invoke the native runtime. Note that we pass a callback to our function below to compute
// free variables. This must be a callback and not the result of this function alone, since we
// may recursively compute them.
//
// N.B. We use the typescript parser to compute them. This has the downside that we now have
// two parsers in the game, V8 and TypeScript, but has the significant advantage that V8's
// parser isn't designed to be stable for 3rd party consumtpion. Hence it would be brittle and a
// maintenance challenge. This approach also avoids needing to write a big hunk of complex code
// in C++, which is nice.
return <AsyncClosure>nativeruntime.serializeClosure(func, computeFreeVariables, serializeCapturedObject);
}
/**
* serializeCapturedObject serializes an object, deeply, into something appropriate for an environment entry.
*/
function serializeCapturedObject(obj: any): Promise<AsyncEnvironmentEntry> {
// See if we have a cache hit. If yes, use the object as-is.
let result: Promise<AsyncEnvironmentEntry> | undefined = entryCache.get(obj);
if (result) {
return result;
}
// If it doesn't exist, actually do it, but stick the promise in the cache first for recursive scenarios.
let resultResolve: ((v: AsyncEnvironmentEntry) => void) | undefined = undefined;
result = debuggablePromise(new Promise<AsyncEnvironmentEntry>((resolve) => { resultResolve = resolve; }));
entryCache.set(obj, result);
serializeCapturedObjectAsync(obj, resultResolve!);
return result;
}
/**
* serializeCapturedObjectAsync is the work-horse that actually performs object serialization.
*/
function serializeCapturedObjectAsync(obj: any, resolve: (v: AsyncEnvironmentEntry) => void): void {
const moduleName = findRequirableModuleName(obj);
if (obj === undefined || obj === null ||
typeof obj === "boolean" || typeof obj === "number" || typeof obj === "string") {
// Serialize primitives as-is.
return resolve({ json: obj });
}
if (moduleName) {
// Serialize any value which was found as a requirable module name as a reference to the module
return resolve({module: moduleName});
}
// tslint:disable-next-line:max-line-length
// From: https://stackoverflow.com/questions/7656280/how-do-i-check-whether-an-object-is-an-arguments-object-in-javascript
if (obj instanceof Array ||
Object.prototype.toString.call(obj) === "[object Arguments]") {
// Recursively serialize elements of an array.
const arr: Promise<AsyncEnvironmentEntry>[] = [];
for (const elem of obj) {
arr.push(serializeCapturedObject(elem));
}
return resolve({ arr: arr });
}
if (obj instanceof Function) {
// Serialize functions recursively, and store them in a closure property.
return resolve({ closure: serializeClosureAsync(obj) });
}
if (obj instanceof Promise) {
// If this is a promise, we will await it and serialize the result instead.
obj.then((v) => serializeCapturedObjectAsync(v, resolve));
return;
}
// For all other objects, serialize all of their enumerable properties (skipping non-enumerable members, etc).
const env: AsyncEnvironment = {};
for (const key of Object.keys(obj)) {
env[key] = serializeCapturedObject(obj[key]);
}
resolve({ obj: env });
}
// These modules are built-in to Node.js, and are available via `require(...)`
// but are not stored in the `require.cache`. They are guaranteed to be
// available at the unqualified names listed below. _Note_: This list is derived
// based on Node.js 6.x tree at: https://github.com/nodejs/node/tree/v6.x/lib
const builtInModuleNames = [
"assert", "buffer", "child_process", "cluster", "console", "constants", "crypto",
"dgram", "dns", "domain", "events", "fs", "http", "https", "module", "net", "os",
"path", "process", "punycode", "querystring", "readline", "repl", "stream", "string_decoder",
/* "sys" deprecated ,*/ "timers", "tls", "tty", "url", "util", "v8", "vm", "zlib",
];
const builtInModules = new Map<any, string>();
for (const name of builtInModuleNames) {
builtInModules.set(require(name), name);
}
// findRequirableModuleName attempts to find a global name bound to the object, which can
// be used as a stable reference across serialization.
function findRequirableModuleName(obj: any): string | undefined {
// First, check the built-in modules
const key = builtInModules.get(obj);
if (key) {
return key;
}
// Next, check the Node module require cache, which will store cached values
// of all non-built-in Node modules loaded by the program so far. _Note_: We
// don't pre-compute this because the require cache will get populated
// dynamically during execution.
for (const path of Object.keys(require.cache)) {
if (require.cache[path].exports === obj) {
// Rewrite the path to be a local module reference relative to the
// current working directory
const modPath = pathRelative(process.cwd(), path).replace(/\\/g, "\\\\");
return "./" + modPath;
}
}
// Else, return that no global name is available for this object.
return undefined;
}
/**
* computeFreeVariables computes the set of free variables in a given function string. Note that this string is
* expected to be the usual V8-serialized function expression text.
*/
function computeFreeVariables(funcstr: string): string[] {
log.debug(`Computing free variables for function: ${funcstr}`);
if (funcstr.indexOf("[native code]") !== -1) {
throw new Error(`Cannot serialize native code function: "${funcstr}"`);
}
const file = ts.createSourceFile(
"", funcstr, ts.ScriptTarget.Latest, true, ts.ScriptKind.TS);
const diagnostics: ts.Diagnostic[] = (<any>file).parseDiagnostics;
if (diagnostics.length) {
throw new Error(`Could not parse function: ${diagnostics[0].messageText}\n${funcstr}`);
}
// Now that we've parsed the file, compute the free variables, and return them.
const freeVariables = new FreeVariableComputer().compute(file);
log.debug(`Found free variables: ${freeVariables}`);
return freeVariables;
}
type walkCallback = (node: ts.Node | undefined) => void;
const nodeModuleGlobals: {[key: string]: boolean} = {
"__dirname": true,
"__filename": true,
"exports": true,
"module": true,
"require": true,
};
class FreeVariableComputer {
private frees: {[key: string]: boolean}; // the in-progress list of free variables.
private scope: {[key: string]: boolean}[]; // a chain of current scopes and variables.
private functionVars: string[]; // list of function-scoped variables (vars).
private static isBuiltIn(ident: string): boolean {
// Anything in the global dictionary is a built-in. So is anything that's a global Node.js object;
// note that these only exist in the scope of modules, and so are not truly global in the usual sense.
// See https://nodejs.org/api/globals.html for more details.
return global.hasOwnProperty(ident) || nodeModuleGlobals[ident];
}
public compute(program: ts.SourceFile): string[] {
// Reset the state.
this.frees = {};
this.scope = [];
this.functionVars = [];
// Recurse through the tree. We use typescript's AST here and generally walk the entire
// tree. One subtlety to be aware of is that we generally assume that when we hit an
// identifier that it either introduces a new variable, or it lexically references a
// variable. This clearly doesn't make sense for *all* identifiers. For example, if you
// have "console.log" then "console" tries to lexically reference a variable, but "log" does
// not. So, to avoid that being an issue, we carefully decide when to recurse. For
// example, for member access expressions (i.e. A.B) we do not recurse down the right side.
const walk = (node: ts.Node) => {
if (!node) {
return;
}
switch (node.kind) {
case ts.SyntaxKind.Identifier:
return this.visitIdentifier(<ts.Identifier>node);
case ts.SyntaxKind.ThisKeyword:
return this.visitThisExpression(<ts.PrimaryExpression>node);
case ts.SyntaxKind.Block:
return this.visitBlockStatement(<ts.Block>node, walk);
case ts.SyntaxKind.CatchClause:
return this.visitCatchClause(<ts.CatchClause>node, walk);
case ts.SyntaxKind.CallExpression:
return this.visitCallExpression(<ts.CallExpression>node, walk);
case ts.SyntaxKind.MethodDeclaration:
return this.visitMethodDeclaration(<ts.MethodDeclaration>node, walk);
case ts.SyntaxKind.PropertyAssignment:
return this.visitPropertyAssignment(<ts.PropertyAssignment>node, walk);
case ts.SyntaxKind.PropertyAccessExpression:
return this.visitPropertyAccessExpression(<ts.PropertyAccessExpression>node, walk);
case ts.SyntaxKind.FunctionDeclaration:
return this.visitFunctionDeclaration(<ts.FunctionDeclaration>node, walk);
case ts.SyntaxKind.FunctionExpression:
case ts.SyntaxKind.ArrowFunction:
return this.visitBaseFunction(<ts.ArrowFunction | ts.FunctionExpression>node, walk);
case ts.SyntaxKind.VariableDeclaration:
return this.visitVariableDeclaration(<ts.VariableDeclaration>node, walk);
default:
break;
}
ts.forEachChild(node, walk);
};
ts.forEachChild(program, walk);
// Now just return all variables whose value is true. Filter out any that are part of the built-in
// Node.js global object, however, since those are implicitly availble on the other side of serialization.
const freeVars: string[] = [];
for (const key of Object.keys(this.frees)) {
if (this.frees[key] && !FreeVariableComputer.isBuiltIn(key)) {
freeVars.push(key);
}
}
return freeVars;
}
private visitIdentifier(node: ts.Identifier): void {
// Remember undeclared identifiers during the walk, as they are possibly free.
const name = node.text;
for (let i = this.scope.length - 1; i >= 0; i--) {
if (this.scope[i][name]) {
// This is currently known in the scope chain, so do not add it as free.
break;
} else if (i === 0) {
// We reached the top of the scope chain and this wasn't found; it's free.
this.frees[name] = true;
}
}
}
private visitThisExpression(node: ts.PrimaryExpression): void {
// Mark references to the built-in 'this' variable as free.
this.frees["this"] = true;
}
private visitBlockStatement(node: ts.Block, walk: walkCallback): void {
// Push new scope, visit all block statements, and then restore the scope.
this.scope.push({});
ts.forEachChild(node, walk);
this.scope.pop();
}
private visitFunctionDeclaration(node: ts.FunctionDeclaration, walk: walkCallback): void {
// A function declaration is special in one way: its identifier is added to the current function's
// var-style variables, so that its name is in scope no matter the order of surrounding references to it.
if (node.name) {
this.functionVars.push(node.name.text);
}
this.visitBaseFunction(node, walk);
}
private visitBaseFunction(node: ts.FunctionLikeDeclarationBase, walk: walkCallback): void {
// First, push new free vars list, scope, and function vars
const oldFrees: {[key: string]: boolean} = this.frees;
const oldFunctionVars: string[] = this.functionVars;
this.frees = {};
this.functionVars = [];
this.scope.push({});
// Add all parameters to the scope. By visiting the parameters, they end up being seen as
// identifiers, and therefore added to the free variables list. We then migrate them to the scope.
for (const param of node.parameters) {
walk(param);
}
for (const param of Object.keys(this.frees)) {
if (this.frees[param]) {
this.scope[this.scope.length-1][param] = true;
}
}
this.frees = {};
// Next, visit the body underneath this new context.
walk(node.body);
// Remove any function-scoped variables that we encountered during the walk.
for (const v of this.functionVars) {
this.frees[v] = false;
}
// If the function is not an arrow function, then its `this` and `arguments` are also
// function-scoped variables and should be removed.
if (!ts.isArrowFunction(node)) {
this.frees["this"] = false;
this.frees["arguments"] = false;
}
// Restore the prior context and merge our free list with the previous one.
this.scope.pop();
this.functionVars = oldFunctionVars;
for (const free of Object.keys(this.frees)) {
if (this.frees[free]) {
oldFrees[free] = true;
}
}
this.frees = oldFrees;
}
private visitCatchClause(node: ts.CatchClause, walk: walkCallback): void {
// Add the catch pattern to the scope as a variable.
const oldFrees: {[key: string]: boolean} = this.frees;
this.frees = {};
this.scope.push({});
walk(node.variableDeclaration);
for (const param of Object.keys(this.frees)) {
if (this.frees[param]) {
this.scope[this.scope.length-1][param] = true;
}
}
this.frees = oldFrees;
// And then visit the block without adding them as free variables.
walk(node.block);
// Relinquish the scope so the error patterns aren't available beyond the catch.
this.scope.pop();
}
private visitCallExpression(node: ts.CallExpression, walk: walkCallback): void {
// Most call expressions are normal. But we must special case one kind of function:
// TypeScript's __awaiter functions. They are of the form `__awaiter(this, void 0, void 0, function* (){})`,
// which will cause us to attempt to capture and serialize the entire surrounding function in
// which any lambda is created (thanks to `this`). That spirals into craziness, and bottoms out on native
// functions which we cannot serialize. We only want to capture `this` if the user code mentioned it.
walk(node.expression);
const isAwaiterCall = ts.isIdentifier(node.expression) && node.expression.text === "__awaiter";
for (let i = 0; i < node.arguments.length; i++) {
if (i > 0 || !isAwaiterCall) {
walk(node.arguments[i]);
}
}
}
private visitMethodDeclaration(node: ts.MethodDeclaration, walk: walkCallback): void {
if (ts.isComputedPropertyName(node.name)) {
// Don't walk down the 'name' part of the property assignment if it is an identifier. It
// does not capture any variables. However, if it is a computed property name, walk it
// as it may capture variables.
walk(node.name);
}
// Always walk the method.
this.visitBaseFunction(node, walk);
}
private visitPropertyAssignment(node: ts.PropertyAssignment, walk: walkCallback): void {
if (ts.isComputedPropertyName(node.name)) {
// Don't walk down the 'name' part of the property assignment if it is an identifier. It
// is not capturing any variables. However, if it is a computed property name, walk it
// as it may capture variables.
walk(node.name);
}
// Always walk the property initializer.
walk(node.initializer);
}
private visitPropertyAccessExpression(node: ts.PropertyAccessExpression, walk: walkCallback): void {
// Don't walk down the 'name' part of the property access. It could not capture a free variable.
// i.e. if you have "A.B", we should analyze the "A" part and not the "B" part.
walk(node.expression);
}
private visitVariableDeclaration(node: ts.VariableDeclaration, walk: walkCallback): void {
// tslint:disable-next-line:max-line-length
const isLet = node.parent !== undefined && ts.isVariableDeclarationList(node.parent) && (node.parent.flags & ts.NodeFlags.Let) !== 0;
const isConst = node.parent !== undefined && ts.isVariableDeclarationList(node.parent) && (node.parent.flags & ts.NodeFlags.Const) !== 0;
const isVar = !isLet && !isConst;
// Walk the declaration's `name` property (which may be an Identifier or Pattern) using a
// fresh walker which will capture any variables declared by this variable declaration.
const nameWalk = (n: ts.Node): void => {
if (!n) {
return;
}
switch (n.kind) {
case ts.SyntaxKind.Identifier:
return this.visitVariableDeclarationIdentifier(<ts.Identifier>n, isVar);
case ts.SyntaxKind.BindingElement:
return this.visitBindingElement(<ts.BindingElement>n, nameWalk, walk);
case ts.SyntaxKind.ObjectBindingPattern:
default:
break;
}
return ts.forEachChild(n, nameWalk);
};
nameWalk(node.name);
// Also walk into the variable initializer with the original walker to make sure we see any
// captures on the right hand side.
walk(node.initializer);
}
private visitVariableDeclarationIdentifier(node: ts.Identifier, isVar: boolean): void {
// If the declaration is an identifier, it isn't a free variable, for whatever scope it
// pertains to (function-wide for var and scope-wide for let/const). Track it so we can
// remove any subseqeunt references to that variable, so we know it isn't free.
if (isVar) {
this.functionVars.push(node.text);
} else {
this.scope[this.scope.length-1][node.text] = true;
}
}
private visitBindingElement(
node: ts.BindingElement, nameWalk: walkCallback, valueWalk: walkCallback): void {
// array and object patterns can be quite complex. You can have:
//
// var {t} = val; // lookup a property in 'val' called 't' and place into a variable 't'.
// var {t: m} = val; // lookup a property in 'val' called 't' and place into a variable 'm'.
// var {t: <pat>} = val; // lookup a property in 'val' called 't' and decompose further into the pattern.
//
// And, for all of the above, you can have:
//
// var {t = def} = val;
// var {t: m = def} = val;
// var {t: <pat> = def} = val;
//
// These are the same as the above, except that if there is no property 't' in 'val',
// then the default value will be used.
//
// You can also have at the end of the literal: { ...rest}
// Walk the name portion, looking for names to add. for
//
// var {t} // this will be 't'.
//
// for
//
// var {t: m} // this will be 'm'
//
// and for
//
// var {t: <pat>} // this will recurse into the pattern.
//
// and for
//
// ...rest // this will be 'rest'
nameWalk(node.name);
// if there is a default value, walk it as well, looking for captures.
valueWalk(node.initializer);
// importantly, we do not walk into node.propertyName
// This Name defines what property will be retrieved from the value being pattern
// matched against. Importantly, it does not define a new name put into scope,
// nor does it reference a variable in scope.
}
}
/**
* serializeJavaScript Text converts a Closure object into a string
* representation of a Node.js module body which exposes a single function
* `exports.handler` representing the serialized function.
* @param c The Closure to be serialized into a module string.
*/
export function serializeJavaScriptText(c: Closure): string {
// Ensure the closure is targeting a supported runtime.
if (c.runtime !== "nodejs") {
throw new Error(`Runtime '${c.runtime}' not yet supported (currently only 'nodejs')`);
}
// Now produce a textual representation of the closure and its serialized captured environment.
const funcsForClosure = new FuncsForClosure(c);
const funcs = funcsForClosure.funcs;
let text = "exports.handler = " + funcsForClosure.root + ";\n\n";
for (const name of Object.keys(funcs)) {
const environment = funcs[name].env;
const thisCapture = environment.this;
const argumentsCapture = environment.arguments;
delete environment.this;
delete environment.arguments;
text +=
"function " + name + "() {\n" +
" return (function() {\n" +
" with(" + envObjToString(environment) + ") {\n\n" +
"return " + funcs[name].code + "\n\n" +
" }\n" +
" }).apply(" + thisCapture + ", " + argumentsCapture + ").apply(this, arguments);\n" +
"}\n" +
"\n";
}
return text;
}
export function getClosureHash_forTestingPurposes(closure: Closure): string {
return new FuncsForClosure(closure).root;
}
interface FuncEnv {
code: string;
env: { [key: string]: string; };
}
/**
* FuncsForClosure collects all the function defintions needed to support serialization of a given Closure object.
* Context is the shape of the context object passed to a Function callback.
* Note that a Closure object can reference other Closure objects and can also have cycles, so we recursively walk the
* graph and cache serialized nodes along the way to avoid cycles.
*/
class FuncsForClosure {
public funcs: { [hash: string]: FuncEnv }; // a cache of functions.
public root: string; // the root closure hash.
constructor(closure: Closure) {
this.funcs = {};
this.root = this.createFuncForClosure(closure);
}
private createFuncForClosure(closure: Closure): string {
// Produce a hash to identify the function.
const hash = this.createFunctionHash(closure);
// Now only store if this function hasn't already been hashed.
if (this.funcs[hash] === undefined) {
this.funcs[hash] = {
code: closure.code,
env: {}, // initialize as empty - update after recursive call
};
this.funcs[hash].env = this.envFromEnvObj(closure.environment);
}
return hash;
}
private createFunctionHash(closure: Closure): string {
const shasum = crypto.createHash("sha1");
// We want to produce a deterministic hash from all the relevant data in this closure. To do
// so we 'normalize' the object to remove any meaningless differences, and also to ensure
// the closure can be appropriately serialized to a JSON string, which can then be sha1
// hashed.
//
// The changes normalization performs are:
// 1. Cycles are removed. If a closure is self referenced, we replace it with an object
// indicating the reference.
// 2. The entire structure is ordered (through the use of arrays). This avoids any
// potential concerns around property enumeration order in dictionaries.
// 3. All data is packed into the final object (even when undefined). That way, if you had
// { key: undefined, value: "foo" } and { key: "foo", value: undefined } you don't end
// up with the same hash (which would happen if undefined values were ignored, and both
// only wrote out the "foo" value).
// To ensure that cycles are properly represented (and so that we do not infinitely
// recurse), keep track of which closures we've seen. We specifically use an array so that
// we can map the closures to a unique value that we can then use as the reference when seen
// later on.
const seenClosures: Closure[] = [];
const normalizedClosure = this.convertClosureToNormalizedObject(seenClosures, closure);
shasum.update(JSON.stringify(normalizedClosure));
const hash: string = "__" + shasum.digest("hex");
return hash;
}
private convertClosureToNormalizedObject(seenClosures: Closure[], closure: Closure | undefined) {
if (!closure) {
return undefined;
}
const closureIndex = seenClosures.indexOf(closure);
if (closureIndex >= 0) {
// We've already seen this closure. Represent it specially. Importantly: do not
// represent it in the same way that we represent 'no closure' (above). There is a
// difference between if we have a cyclic closure versus a non-cyclic one.
return closureIndex;
}
// keep track of this closure so we don't recurse into it again.
seenClosures.push(closure);
return [
closure.code,
closure.runtime,
this.convertEnvironmentToNormalizedObject(seenClosures, closure.environment),
];
}
private convertEnvironmentToNormalizedObject(seenClosures: Closure[], environment: Environment | undefined) {
if (!environment) {
// Encode no environment differently than an empty environment. It may not be necessary
// to do this. However, in case there ever is a meaningful distinction between the two,
// this can help avoid particularly subtle bugs.
return undefined;
}
// Process keys in a deterministic order.
return Object.keys(environment).sort().map(key => ({
name: key,
value: this.convertEnvironmentEntryToNormalizedObject(seenClosures, environment[key]),
}));
}
private convertEnvironmentEntryToNormalizedObject(
seenClosures: Closure[], entry: EnvironmentEntry | undefined): any {
if (!entry) {
return undefined;
}
return [
entry.json,
this.convertClosureToNormalizedObject(seenClosures, entry.closure),
this.convertEnvironmentToNormalizedObject(seenClosures, entry.obj),
entry.arr
? entry.arr.map(child => this.convertEnvironmentEntryToNormalizedObject(seenClosures, child))
: undefined,
entry.module,
];
}
private envFromEnvObj(env: Environment): {[key: string]: string} {
const envObj: {[key: string]: string} = {};
for (const key of Object.keys(env)) {
const val = this.envEntryToString(env[key]);
if (val !== undefined) {
envObj[key] = val;
}
}
return envObj;
}
private envFromEnvArr(arr: EnvironmentEntry[]): (string | undefined)[] {
const envArr: (string | undefined)[] = [];
for (let i = 0; i < arr.length; i++) {
envArr[i] = this.envEntryToString(arr[i]);
}
return envArr;
}
private envEntryToString(envEntry: EnvironmentEntry): string | undefined {
if (envEntry.json !== undefined) {
return JSON.stringify(envEntry.json);
}
else if (envEntry.closure !== undefined) {
const innerHash = this.createFuncForClosure(envEntry.closure);
return innerHash;
}
else if (envEntry.obj !== undefined) {
return envObjToString(this.envFromEnvObj(envEntry.obj));
}
else if (envEntry.arr !== undefined) {
return envArrToString(this.envFromEnvArr(envEntry.arr));
}
else if (envEntry.module !== undefined) {
return `require("${envEntry.module}")`;
}
else {
return undefined;
}
}
}
/**
* Converts an environment object into a string which can be embedded into a serialized function body. Note that this
* is not JSON serialization, as we may have proeprty values which are variable references to other global functions.
* In other words, there can be free variables in the resulting object literal.
*
* @param envObj The environment object to convert to a string.
*/
function envObjToString(envObj: { [key: string]: string; }): string {
let result = "";
let first = true;
for (const key of Object.keys(envObj)) {
const val = envObj[key];
if (!first) {
result += ", ";
}
result += key + ": " + val;
first = false;
}
return "{ " + result + " }";
}
function envArrToString(envArr: (string | undefined)[]): string {
let result = "";
let first = true;
for (let i = 0; i < envArr.length; i++) {
if (!first) {
result += ", ";
}
result += envArr[i];
first = false;
}
return "[ " + result + " ]";
}