Use new tree to get Bloom filters

This commit is contained in:
Mohamed Hegazy 2014-08-20 11:25:39 -07:00
parent 2d4cec43f5
commit 55512faa0d
8 changed files with 147 additions and 184 deletions

View file

@ -1,7 +1,5 @@
///<reference path='..\core\integerUtilities.ts' />
module TypeScript {
module ts {
export class BloomFilter {
private bitArray: boolean[];
private hashFunctionCount: number;
@ -40,7 +38,7 @@ module TypeScript {
}
// m = ceil((n * log(p)) / log(1.0 / (pow(2.0, log(2.0)))))
static computeM(expectedCount: number): number {
private static computeM(expectedCount: number): number {
var p: number = BloomFilter.falsePositiveProbability;
var n: number = expectedCount;
@ -50,7 +48,7 @@ module TypeScript {
}
// k = round(log(2.0) * m / n)
static computeK(expectedCount: number): number {
private static computeK(expectedCount: number): number {
var n: number = expectedCount;
var m: number = BloomFilter.computeM(expectedCount);
@ -78,14 +76,12 @@ module TypeScript {
* Murmur hash is public domain. Actual code is included below as reference.
*/
private computeHash(key: string, seed: number): number {
return Hash.computeMurmur2StringHashCode(key, seed);
return BloomFilter.computeMurmur2StringHashCode(key, seed);
}
public addKeys(keys: ts.Map<any>) {
for (var name in keys) {
if (ts.lookUp(keys, name)) {
this.add(name);
}
public addKeys(keys: string[]) {
for (var i = 0, n = keys.length; i < n; i++) {
this.add(keys[i]);
}
}
@ -114,7 +110,7 @@ module TypeScript {
&& this.hashFunctionCount === filter.hashFunctionCount;
}
static isEquivalent(array1: boolean[], array2: boolean[]): boolean {
private static isEquivalent(array1: boolean[], array2: boolean[]): boolean {
if (array1.length !== array2.length) {
return false;
}
@ -127,5 +123,64 @@ module TypeScript {
return true;
}
private static integerMultiplyLow32Bits(n1: number, n2: number): number {
var n1Low16 = n1 & 0x0000ffff;
var n1High16 = n1 >>> 16;
var n2Low16 = n2 & 0x0000ffff;
var n2High16 = n2 >>> 16;
var resultLow32 = (((n1 & 0xffff0000) * n2) >>> 0) + (((n1 & 0x0000ffff) * n2) >>> 0) >>> 0;
return resultLow32;
}
private static computeMurmur2StringHashCode(key: string, seed: number): number {
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
var m: number = 0x5bd1e995;
var r: number = 24;
// Initialize the hash to a 'random' value
var numberOfCharsLeft = key.length;
var h = Math.abs(seed ^ numberOfCharsLeft);
// Mix 4 bytes at a time into the hash. NOTE: 4 bytes is two chars, so we iterate
// through the string two chars at a time.
var index = 0;
while (numberOfCharsLeft >= 2) {
var c1 = key.charCodeAt(index);
var c2 = key.charCodeAt(index + 1);
var k = Math.abs(c1 | (c2 << 16));
k = BloomFilter.integerMultiplyLow32Bits(k, m);
k ^= k >> r;
k = BloomFilter.integerMultiplyLow32Bits(k, m);
h = BloomFilter.integerMultiplyLow32Bits(h, m);
h ^= k;
index += 2;
numberOfCharsLeft -= 2;
}
// Handle the last char (or 2 bytes) if they exist. This happens if the original string had
// odd length.
if (numberOfCharsLeft === 1) {
h ^= key.charCodeAt(index);
h = BloomFilter.integerMultiplyLow32Bits(h, m);
}
// Do a few final mixes of the hash to ensure the last few bytes are well-incorporated.
h ^= h >> 13;
h = BloomFilter.integerMultiplyLow32Bits(h, m);
h ^= h >> 15;
return h;
}
}
}
}

View file

@ -18,7 +18,6 @@
/////<reference path='precompile.ts' />
/////<reference path='referenceResolver.ts' />
/////<reference path='declarationEmitter.ts' />
/////<reference path='bloomFilter.ts' />
/////<reference path='identifierWalker.ts' />
/////<reference path='settings.ts' />
/////<reference path='typecheck\pullFlags.ts' />

View file

@ -1,112 +0,0 @@
///<reference path='references.ts' />
module TypeScript {
export class Hash {
// This table uses FNV1a as a string hash
private static FNV_BASE = 2166136261;
private static FNV_PRIME = 16777619;
private static computeFnv1aCharArrayHashCode(text: number[], start: number, len: number): number {
var hashCode = Hash.FNV_BASE;
var end = start + len;
for (var i = start; i < end; i++) {
hashCode = IntegerUtilities.integerMultiplyLow32Bits(hashCode ^ text[i], Hash.FNV_PRIME);
}
return hashCode;
}
public static computeSimple31BitCharArrayHashCode(key: number[], start: number, len: number): number {
// Start with an int.
var hash = 0;
for (var i = 0; i < len; i++) {
var ch = key[start + i];
// Left shift keeps things as a 32bit int. And we're only doing two adds. Chakra and
// V8 recognize this as not needing to go past the 53 bits needed for the float
// mantissa. Or'ing with 0 keeps this 32 bits.
hash = ((((hash << 5) - hash) | 0) + ch) | 0;
}
// Ensure we fit in 31 bits. That way if/when this gets stored, it won't require any heap
// allocation.
return hash & 0x7FFFFFFF;
}
public static computeSimple31BitStringHashCode(key: string): number {
// Start with an int.
var hash = 0;
var start = 0;
var len = key.length;
for (var i = 0; i < len; i++) {
var ch = key.charCodeAt(start + i);
// Left shift keeps things as a 32bit int. And we're only doing two adds. Chakra and
// V8 recognize this as not needing to go past the 53 bits needed for the float
// mantissa. Or'ing with 0 keeps this 32 bits.
hash = ((((hash << 5) - hash) | 0) + ch) | 0;
}
// Ensure we fit in 31 bits. That way if/when this gets stored, it won't require any heap
// allocation.
return hash & 0x7FFFFFFF;
}
public static computeMurmur2StringHashCode(key: string, seed: number): number {
// 'm' and 'r' are mixing constants generated offline.
// They're not really 'magic', they just happen to work well.
var m: number = 0x5bd1e995;
var r: number = 24;
// Initialize the hash to a 'random' value
var numberOfCharsLeft = key.length;
var h = Math.abs(seed ^ numberOfCharsLeft);
// Mix 4 bytes at a time into the hash. NOTE: 4 bytes is two chars, so we iterate
// through the string two chars at a time.
var index = 0;
while (numberOfCharsLeft >= 2) {
var c1 = key.charCodeAt(index);
var c2 = key.charCodeAt(index + 1);
var k = Math.abs(c1 | (c2 << 16));
k = IntegerUtilities.integerMultiplyLow32Bits(k, m);
k ^= k >> r;
k = IntegerUtilities.integerMultiplyLow32Bits(k, m);
h = IntegerUtilities.integerMultiplyLow32Bits(h, m);
h ^= k;
index += 2;
numberOfCharsLeft -= 2;
}
// Handle the last char (or 2 bytes) if they exist. This happens if the original string had
// odd length.
if (numberOfCharsLeft === 1) {
h ^= key.charCodeAt(index);
h = IntegerUtilities.integerMultiplyLow32Bits(h, m);
}
// Do a few final mixes of the hash to ensure the last few bytes are well-incorporated.
h ^= h >> 13;
h = IntegerUtilities.integerMultiplyLow32Bits(h, m);
h ^= h >> 15;
return h;
}
public static combine(value: number, currentHash: number): number {
// Ensure we stay within 31 bits.
return (((currentHash << 5) + currentHash) + value) & 0x7FFFFFFF;
}
}
}

View file

@ -6,7 +6,6 @@
///<reference path='diagnosticCore.ts' />
///<reference path='diagnosticInfo.ts' />
///<reference path='errors.ts' />
///<reference path='hash.ts' />
///<reference path='integerUtilities.ts' />
///<reference path='lineMap.ts' />
///<reference path='linePosition.ts' />

View file

@ -11,7 +11,7 @@
/// <reference path='breakpoints.ts' />
/// <reference path='indentation.ts' />
/// <reference path='formatting\formatting.ts' />
/// <reference path='compiler\bloomFilter.ts' />
/// <reference path='bloomFilter.ts' />
/// <reference path='core\references.ts' />
/// <reference path='resources\references.ts' />
@ -71,7 +71,7 @@ module ts {
export interface SourceFile {
getSourceUnit(): TypeScript.SourceUnitSyntax;
getSyntaxTree(): TypeScript.SyntaxTree;
getBloomFilter(): TypeScript.BloomFilter;
getBloomFilter(): BloomFilter;
update(scriptSnapshot: TypeScript.IScriptSnapshot, version: number, isOpen: boolean, textChangeRange: TypeScript.TextChangeRange): SourceFile;
}
@ -324,7 +324,7 @@ module ts {
public isOpen: boolean;
public languageVersion: ScriptTarget;
private bloomFilter: TypeScript.BloomFilter;
private bloomFilter: BloomFilter;
private syntaxTree: TypeScript.SyntaxTree;
private scriptSnapshot: TypeScript.IScriptSnapshot;
@ -356,61 +356,27 @@ module ts {
return TypeScript.isDTSFile(this.filename);
}
private static isNameOfPropertyDeclaration(node: TypeScript.ISyntaxElement): boolean {
if (node.kind() !== TypeScript.SyntaxKind.IdentifierName && node.kind() !== TypeScript.SyntaxKind.StringLiteral && node.kind() !== TypeScript.SyntaxKind.NumericLiteral) {
return false;
}
switch (node.parent.kind()) {
case TypeScript.SyntaxKind.VariableDeclarator:
return (<TypeScript.VariableDeclaratorSyntax>node.parent).propertyName === node;
case TypeScript.SyntaxKind.PropertySignature:
return (<TypeScript.PropertySignatureSyntax>node.parent).propertyName === node;
case TypeScript.SyntaxKind.SimplePropertyAssignment:
return (<TypeScript.SimplePropertyAssignmentSyntax>node.parent).propertyName === node;
case TypeScript.SyntaxKind.EnumElement:
return (<TypeScript.EnumElementSyntax>node.parent).propertyName === node;
case TypeScript.SyntaxKind.ModuleDeclaration:
return (<TypeScript.ModuleDeclarationSyntax>node.parent).name === node;
}
return false;
}
private static isElementAccessLiteralIndexer(node: TypeScript.ISyntaxElement): boolean {
return (node.kind() === TypeScript.SyntaxKind.StringLiteral || node.kind() === TypeScript.SyntaxKind.NumericLiteral) &&
node.parent.kind() === TypeScript.SyntaxKind.ElementAccessExpression && (<TypeScript.ElementAccessExpressionSyntax> node.parent).argumentExpression === node;
}
public getBloomFilter(): TypeScript.BloomFilter {
public getBloomFilter(): BloomFilter {
if (!this.bloomFilter) {
var identifiers = TypeScript.createIntrinsicsObject<boolean>();
var pre = function (cur: TypeScript.ISyntaxElement) {
if (TypeScript.ASTHelpers.isValidAstNode(cur)) {
if (cur.kind() === TypeScript.SyntaxKind.IdentifierName ||
SourceFileObject.isNameOfPropertyDeclaration(cur) ||
SourceFileObject.isElementAccessLiteralIndexer(cur)) {
var nodeText = TypeScript.tokenValueText((<TypeScript.ISyntaxToken>cur));
var identifiers: string[] = [];
identifiers[nodeText] = true;
}
}
};
forEachChild(this, function visit (node: Node) {
switch (node.kind) {
case SyntaxKind.Identifier:
identifiers.push((<Identifier>node).text);
return undefined;
case SyntaxKind.StringLiteral:
case SyntaxKind.NumericLiteral:
if (isNameOfPropertyDeclaration(node) || isLiteralIndexOfIndexAccess(node)) {
identifiers.push((<LiteralExpression>node).text);
}
return undefined;
default:
return forEachChild(node, visit);
};
});
TypeScript.getAstWalkerFactory().simpleWalk(this.getSourceUnit(), pre, null, identifiers);
var identifierCount = 0;
for (var name in identifiers) {
if (identifiers[name]) {
identifierCount++;
}
}
this.bloomFilter = new TypeScript.BloomFilter(identifierCount);
this.bloomFilter = new BloomFilter(identifiers.length);
this.bloomFilter.addKeys(identifiers);
}
return this.bloomFilter;
@ -2375,7 +2341,7 @@ module ts {
// type to the search set
if (isNameOfPropertyAssignment(location)) {
var symbolFromContextualType = getPropertySymbolFromContextualType(location);
if (symbolFromContextualType) result.push(symbolFromContextualType);
if (symbolFromContextualType) result.push(typeInfoResolver.getRootSymbol(symbolFromContextualType));
}
// Add symbol of properties/methods of the same name in base classes and implemented interfaces definitions
@ -2429,7 +2395,7 @@ module ts {
// compare to our searchSymbol
if (isNameOfPropertyAssignment(referenceLocation)) {
var symbolFromContextualType = getPropertySymbolFromContextualType(referenceLocation);
if (searchSymbols.indexOf(symbolFromContextualType) >= 0) {
if (symbolFromContextualType && searchSymbols.indexOf(typeInfoResolver.getRootSymbol(symbolFromContextualType)) >= 0) {
return true;
}
}

View file

@ -0,0 +1,20 @@
/// <reference path='fourslash.ts'/>
// Ensure BloomFilter building logic is correct, by having one reference per file
// @Filename: declaration.ts
////var container = { /*1*/searchProp : 1 };
// @Filename: expression.ts
////function blah() { return (1 + 2 + container./*2*/searchProp()) === 2; };
// @Filename: stringIndexer.ts
////function blah2() { container[/*3*/"searchProp"] };
// @Filename: redeclaration.ts
////container = { /*4*/"searchProp" : 18 };
test.markers().forEach(m => {
goTo.position(m.position, m.fileName);
verify.referencesCountIs(4);
});

View file

@ -0,0 +1,20 @@
/// <reference path='fourslash.ts'/>
// Ensure BloomFilter building logic is correct, by having one reference per file
// @Filename: declaration.ts
////var container = { /*1*/42 : 1 };
// @Filename: expression.ts
////function blah() { return (container[/*2*/42]) === 2; };
// @Filename: stringIndexer.ts
////function blah2() { container[/*3*/"42"] };
// @Filename: redeclaration.ts
////container = { /*4*/"42" : 18 };
test.markers().forEach(m => {
goTo.position(m.position, m.fileName);
verify.referencesCountIs(4);
});

View file

@ -0,0 +1,16 @@
/// <reference path='fourslash.ts'/>
// Ensure BloomFilter building logic is correct, by having one reference per file
// @Filename: declaration.ts
////enum Test { /*1*/"42" = 1 };
// @Filename: expression.ts
////(Test[/*2*/42]);
test.markers().forEach(m => {
goTo.position(m.position, m.fileName);
verify.referencesCountIs(2);
});