Support cycles and avoid name collisions in closure serialization

Generate names for serialized functions as hashes of the function text, and
bind these to the actual referenced names locally to each serialized closure.
This avoids potential conflicts between different functions which are referenced
with the same name.

For example:
```
function __8c176d768f95567f8c3acd08e486a1da7299fbc1() {
  with({ f: __e61452ceb9d53f3611ed543f3822a85ad29345dd}) {
    return (() => {

return function (cb) { cb(null, f(7)); };

    })().apply(this, arguments);
  }
}
```

Also fixes the stack overflows when there are cycles in the Closure graph by
adding caches of serialized functions.

Fixes #238.
This commit is contained in:
Luke Hoban 2017-07-12 12:49:04 -07:00
parent 1d7792cd83
commit 15be6850a2
2 changed files with 100 additions and 49 deletions

View file

@ -2,7 +2,7 @@
import { AssetArchive, File, String as StringAsset } from "@lumi/lumi/asset";
import {
Closure, jsonStringify, objectKeys, printf, serializeClosure,
Closure, jsonStringify, objectKeys, printf, serializeClosure, sha1hash,
} from "@lumi/lumirt";
import { Role } from "../iam/role";
import { DeadLetterConfig, Function as LambdaFunction } from "../lambda/function";
@ -42,35 +42,73 @@ let policy = {
interface FuncEnv {
code: string;
env: string;
env: { [key: string]: string; };
}
// addToFuncEnvs adds the closure with the given name into a collection of function environments. As it walks the
// closures environment, it finds additional closures that this closure is dependent on, and recursively adds those
// to the function environment as well. The resulting environments for each closure are json stringified
// representations ready to be marshalled to a target execution environment.
function addToFuncEnvs(funcEnvs: { [key: string]: FuncEnv}, name: string, closure: Closure): { [key: string]: FuncEnv} {
let obj: any = {};
let keys = objectKeys(closure.environment);
// FuncsForClosure collects all the function defintions needed to
// support serialization of a given Closure object. Note that
// a Closure object can reference other Closure objects and
// can also have cycles, so we recursively walk the graph and
// cache serialized nodes along the way to avoid cycles.
class FuncsForClosure {
public funcs: { [hash: string]: FuncEnv };
public root: string;
constructor(closure: Closure) {
this.funcs = {};
this.root = this.createFuncForClosure(closure);
}
private createFuncForClosure(closure: Closure): string {
let hash = "__" + sha1hash(closure.code);
if (this.funcs[hash] === undefined) {
this.funcs[hash] = {
code: closure.code,
env: {},
};
let envObj: any = {};
let keys = objectKeys(closure.environment);
for (let i = 0; i < (<any>keys).length; i++) {
let key = keys[i];
let envEntry = closure.environment[key];
if (envEntry.json !== undefined) {
envObj[key] = jsonStringify(envEntry.json);
} else if (envEntry.closure !== undefined) {
let innerHash = this.createFuncForClosure(envEntry.closure);
envObj[key] = innerHash;
} else {
// TODO[pulumi/lumi#239]: For now we will skip serialziing when the captured JSON object is
// null/undefined. This is not technically correct, as it will cause references to these to
// fail instead of return undefined.
}
}
this.funcs[hash].env = envObj;
}
return hash;
}
}
// Converts an environment object into a string which can be embedded into a serialized
// function body. Note that this is not JSON serialization, as we may have proeprty
// values which are variable references to other global functions. In other words,
// there can be free variables in the resulting object literal.
function envObjToString(envObj: { [key: string]: string; }): string {
let ret = "{";
let isStart = true;
let keys = objectKeys(envObj);
for (let i = 0; i < (<any>keys).length; i++) {
let key = keys[i];
let envEntry = closure.environment[key];
if (envEntry.json !== undefined) {
obj[key] = envEntry.json;
} else if (envEntry.closure !== undefined) {
// TODO[pulumi/lumi#238]: We need to detect cycles here.
addToFuncEnvs(funcEnvs, key, envEntry.closure);
let val = envObj[key];
if (isStart) {
ret += " ";
} else {
// TODO[pulumi/lumi#239]: For now we will skip serialziing when the captured JSON object is null/undefined.
// This is not technically correct, as it will cause references to these to fail instead
// of return undefined.
ret += ", ";
}
isStart = false;
ret += key + ": " + val;
}
funcEnvs[name] = {
code: closure.code,
env: jsonStringify(obj),
};
return funcEnvs;
ret += " }";
return ret;
}
function createJavaScriptLambda(
@ -79,30 +117,28 @@ function createJavaScriptLambda(
closure: Closure,
opts: FunctionOptions): LambdaFunction {
let funcs = addToFuncEnvs({}, "__handler", closure);
let str = "exports.handler = __handler;\n\n";
let funcsForClosure = new FuncsForClosure(closure);
let funcs = funcsForClosure.funcs;
let str = "exports.handler = " + funcsForClosure.root + ";\n\n";
let fkeys = objectKeys(funcs);
let envObj: any = {};
for (let i = 0; i < (<any>fkeys).length; i++) {
let name = fkeys[i];
str +=
"function " + name + "() {\n" +
" let __env = JSON.parse(process.env.LUMI_ENV_" + name + ");\n" +
" with(__env) {\n" +
" let __f = (() => {" + funcs[name].code + "})();\n" +
" return __f.apply(this, arguments);\n" +
" with(" + envObjToString(funcs[name].env) + ") {\n" +
" return (() => {\n\n" +
funcs[name].code + "\n" +
" })().apply(this, arguments);\n" +
" }\n" +
"}\n" +
"\n";
envObj["LUMI_ENV_" + name] = funcs[name].env;
}
// Inject some TypeScript runtime helpers that the transpiled code may have dependencies on.
// These are necessary for targeting Node.js runtime environments that do not yet support
// new ECMAScript features like `async`/`await`.
//
// The implemnetations are sourced from: https://github.com/Microsoft/tslib/blob/master/tslib.es6.js
// The implementations are sourced from: https://github.com/Microsoft/tslib/blob/master/tslib.es6.js
/*tslint:disable: max-line-length */
str += `
function __awaiter(thisArg, _arguments, P, generator) {
@ -159,7 +195,6 @@ function __generator(thisArg, body) {
timeout: timeout,
memorySize: opts.memorySize,
deadLetterConfig: opts.deadLetterConfig,
environment: envObj,
});
return lambda;

View file

@ -103,23 +103,38 @@ func sha1hash(intrin *rt.Intrinsic, e *evaluator, this *rt.Object, args []*rt.Ob
}
type closureSerializer struct {
node diag.Diagable
e *evaluator
node diag.Diagable
e *evaluator
envEntryCache map[*rt.Object]*rt.Object
}
func (s *closureSerializer) envEntryObjFor(obj *rt.Object) *rt.Object {
props := rt.NewPropertyMap()
if obj.IsFunction() {
// Serialize functions using serializeClosure
stub := obj.FunctionValue()
lambda, ok := stub.Func.(*ast.LambdaExpression)
contract.Assertf(ok, "Expected function to be lambda expression")
props.Set("closure", s.serializeClosure(stub, lambda))
} else {
// Else we will pass through the object to serialize
props.Set("json", obj)
envEntry, ok := s.envEntryCache[obj]
if !ok {
props := rt.NewPropertyMap()
envEntry = rt.NewObject(types.Dynamic, nil, props, nil)
s.envEntryCache[obj] = envEntry
if obj.IsFunction() {
// Serialize functions using serializeClosure
stub := obj.FunctionValue()
lambda, ok := stub.Func.(*ast.LambdaExpression)
contract.Assertf(ok, "Expected function to be lambda expression")
props.Set("closure", s.serializeClosure(stub, lambda))
} else {
// Else we will pass through the object to serialize
// IDEA: Support for function members on captured references
// could be added by recurring here in the case that
// obj is not a primitive type.
// let x = {
// f: () => 24,
// };
// let g = () => x.f();
// In the above case, the free vars of g are only [x], which will
// currently trigger JSON serialization, losing access to `x.f`.
props.Set("json", obj)
}
}
return rt.NewObject(types.Dynamic, nil, props, nil)
return envEntry
}
func (s *closureSerializer) serializeClosure(stub rt.FuncStub, lambda *ast.LambdaExpression) *rt.Object {
@ -165,8 +180,9 @@ func serializeClosure(intrin *rt.Intrinsic, e *evaluator, this *rt.Object, args
return e.NewException(intrin.Tree(), "Expected argument 'func' to be a lambda expression.")
}
closureSerializer := &closureSerializer{
node: intrin.Tree(),
e: e,
node: intrin.Tree(),
e: e,
envEntryCache: map[*rt.Object]*rt.Object{},
}
closure := closureSerializer.serializeClosure(stub, lambda)
return rt.NewReturnUnwind(closure)