use a skip list for keeping canonical uris, https://github.com/microsoft/vscode/issues/93368

This commit is contained in:
Johannes Rieken 2020-05-27 16:25:32 +02:00
parent 64c4407492
commit ae0e3d7376
3 changed files with 468 additions and 14 deletions

View file

@ -0,0 +1,203 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
class Node<K, V> {
readonly forward: Node<K, V>[];
constructor(readonly level: number, readonly key: K, public value: V) {
this.forward = [];
}
}
const NIL: undefined = undefined;
interface Comparator<K> {
(a: K, b: K): number;
}
export class SkipList<K, V> implements Map<K, V> {
readonly [Symbol.toStringTag] = 'SkipList';
private _maxLevel: number;
private _level: number = 1;
private _header: Node<K, V>;
private _size: number = 0;
/**
*
* @param capacity Capacity at which the list performs best
*/
constructor(
readonly comparator: (a: K, b: K) => number,
capacity: number = 2 ** 16
) {
this._maxLevel = Math.max(1, Math.log2(capacity) | 0);
this._header = <any>new Node(this._maxLevel, NIL, NIL);
}
get size(): number {
return this._size;
}
clear(): void {
this._header = <any>new Node(this._maxLevel, NIL, NIL);
}
has(key: K): boolean {
return Boolean(SkipList._search(this, key, this.comparator));
}
get(key: K): V | undefined {
return SkipList._search(this, key, this.comparator)?.value;
}
set(key: K, value: V): this {
if (SkipList._insert(this, key, value, this.comparator)) {
this._size += 1;
}
return this;
}
delete(key: K): boolean {
const didDelete = SkipList._delete(this, key, this.comparator);
if (didDelete) {
this._size -= 1;
}
return didDelete;
}
// --- iteration
forEach(callbackfn: (value: V, key: K, map: Map<K, V>) => void, thisArg?: any): void {
let node = this._header.forward[0];
while (node) {
callbackfn.call(thisArg, node.value, node.key, this);
node = node.forward[0];
}
}
[Symbol.iterator](): IterableIterator<[K, V]> {
return this.entries();
}
*entries(): IterableIterator<[K, V]> {
let node = this._header.forward[0];
while (node) {
yield [node.key, node.value];
node = node.forward[0];
}
}
*keys(): IterableIterator<K> {
let node = this._header.forward[0];
while (node) {
yield node.key;
node = node.forward[0];
}
}
*values(): IterableIterator<V> {
let node = this._header.forward[0];
while (node) {
yield node.value;
node = node.forward[0];
}
}
toString(): string {
// debug string...
let result = '[SkipList]:';
let node = this._header.forward[0];
while (node) {
result += `node(${node.key}, ${node.value}, lvl:${node.level})`;
node = node.forward[0];
}
return result;
}
// from https://www.epaperpress.com/sortsearch/download/skiplist.pdf
private static _search<K, V>(list: SkipList<K, V>, searchKey: K, comparator: Comparator<K>) {
let x = list._header;
for (let i = list._level; i >= 0; i--) {
while (x.forward[i] && comparator(x.forward[i].key, searchKey) < 0) {
x = x.forward[i];
}
}
x = x.forward[0];
if (x && comparator(x.key, searchKey) === 0) {
return x;
}
return undefined;
}
private static _insert<K, V>(list: SkipList<K, V>, searchKey: K, value: V, comparator: Comparator<K>) {
let update: Node<K, V>[] = [];
let x = list._header;
for (let i = list._level; i >= 0; i--) {
while (x.forward[i] && comparator(x.forward[i].key, searchKey) < 0) {
x = x.forward[i];
}
update[i] = x;
}
x = x.forward[0];
if (x && comparator(x.key, searchKey) === 0) {
// update
x.value = value;
return false;
} else {
// insert
let lvl = SkipList._randomLevel(list);
if (lvl > list._level) {
for (let i = list._level + 1; i <= lvl; i++) {
update[i] = list._header;
}
list._level = lvl;
}
x = new Node<K, V>(lvl, searchKey, value);
for (let i = 0; i <= lvl; i++) {
x.forward[i] = update[i].forward[i];
update[i].forward[i] = x;
}
return true;
}
}
private static _randomLevel(list: SkipList<any, any>, p: number = 0.5): number {
let lvl = 1;
while (Math.random() < p && lvl < list._maxLevel) {
lvl += 1;
}
return lvl;
}
private static _delete<K, V>(list: SkipList<K, V>, searchKey: K, comparator: Comparator<K>) {
let update: Node<K, V>[] = [];
let x = list._header;
for (let i = list._level; i >= 0; i--) {
while (x.forward[i] && comparator(x.forward[i].key, searchKey) < 0) {
x = x.forward[i];
}
update[i] = x;
}
x = x.forward[0];
if (!x || comparator(x.key, searchKey) !== 0) {
// not found
return false;
}
for (let i = 0; i < list._level; i++) {
if (update[i].forward[i] !== x) {
break;
}
update[i].forward[i] = x.forward[i];
}
while (list._level >= 1 && list._header.forward[list._level] === NIL) {
list._level -= 1;
}
return true;
}
}

View file

@ -0,0 +1,218 @@
/*---------------------------------------------------------------------------------------------
* Copyright (c) Microsoft Corporation. All rights reserved.
* Licensed under the MIT License. See License.txt in the project root for license information.
*--------------------------------------------------------------------------------------------*/
import * as assert from 'assert';
import { SkipList } from 'vs/base/common/skipList';
import { StopWatch } from 'vs/base/common/stopwatch';
import { binarySearch } from 'vs/base/common/arrays';
suite('SkipList', function () {
function assertValues<V>(list: SkipList<any, V>, expected: V[]) {
assert.equal(list.size, expected.length);
assert.deepEqual([...list.values()], expected);
let valuesFromEntries = [...list.entries()].map(entry => entry[1]);
assert.deepEqual(valuesFromEntries, expected);
let valuesFromIter = [...list].map(entry => entry[1]);
assert.deepEqual(valuesFromIter, expected);
let i = 0;
list.forEach((value, _key, map) => {
assert.ok(map === list);
assert.deepEqual(value, expected[i++]);
});
}
function assertKeys<K>(list: SkipList<K, any>, expected: K[]) {
assert.equal(list.size, expected.length);
assert.deepEqual([...list.keys()], expected);
let keysFromEntries = [...list.entries()].map(entry => entry[0]);
assert.deepEqual(keysFromEntries, expected);
let keysFromIter = [...list].map(entry => entry[0]);
assert.deepEqual(keysFromIter, expected);
let i = 0;
list.forEach((_value, key, map) => {
assert.ok(map === list);
assert.deepEqual(key, expected[i++]);
});
}
test('set/get/delete', function () {
let list = new SkipList<number, number>((a, b) => a - b);
assert.equal(list.get(3), undefined);
list.set(3, 1);
assert.equal(list.get(3), 1);
assertValues(list, [1]);
list.set(3, 3);
assertValues(list, [3]);
list.set(1, 1);
list.set(4, 4);
assert.equal(list.get(3), 3);
assert.equal(list.get(1), 1);
assert.equal(list.get(4), 4);
assertValues(list, [1, 3, 4]);
assert.equal(list.delete(17), false);
assert.equal(list.delete(1), true);
assert.equal(list.get(1), undefined);
assert.equal(list.get(3), 3);
assert.equal(list.get(4), 4);
assertValues(list, [3, 4]);
});
test('Figure 3', function () {
let list = new SkipList<number, boolean>((a, b) => a - b);
list.set(3, true);
list.set(6, true);
list.set(7, true);
list.set(9, true);
list.set(12, true);
list.set(19, true);
list.set(21, true);
list.set(25, true);
assertKeys(list, [3, 6, 7, 9, 12, 19, 21, 25]);
list.set(17, true);
assert.deepEqual(list.size, 9);
assertKeys(list, [3, 6, 7, 9, 12, 17, 19, 21, 25]);
});
test('capacity max', function () {
let list = new SkipList<number, boolean>((a, b) => a - b, 10);
list.set(1, true);
list.set(2, true);
list.set(3, true);
list.set(4, true);
list.set(5, true);
list.set(6, true);
list.set(7, true);
list.set(8, true);
list.set(9, true);
list.set(10, true);
list.set(11, true);
list.set(12, true);
assertKeys(list, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]);
});
const cmp = (a: number, b: number): number => {
if (a < b) {
return -1;
} else if (a > b) {
return 1;
} else {
return 0;
}
};
function insertArraySorted(array: number[], element: number) {
let idx = binarySearch(array, element, cmp);
if (idx >= 0) {
array[idx] = element;
} else {
idx = ~idx;
// array = array.slice(0, idx).concat(element, array.slice(idx));
array.splice(idx, 0, element);
}
return array;
}
function delArraySorted(array: number[], element: number) {
let idx = binarySearch(array, element, cmp);
if (idx >= 0) {
// array = array.slice(0, idx).concat(array.slice(idx));
array.splice(idx, 1);
}
return array;
}
test('perf', function () {
this.skip();
// data
const max = 2 ** 16;
const values = new Set<number>();
for (let i = 0; i < max; i++) {
let value = Math.floor(Math.random() * max);
values.add(value);
}
console.log(values.size);
// init
let list = new SkipList<number, boolean>(cmp, max);
let sw = new StopWatch(true);
values.forEach(value => list.set(value, true));
sw.stop();
console.log(`[LIST] ${list.size} elements after ${sw.elapsed()}ms`);
let array: number[] = [];
sw = new StopWatch(true);
values.forEach(value => array = insertArraySorted(array, value));
sw.stop();
console.log(`[ARRAY] ${array.length} elements after ${sw.elapsed()}ms`);
// get
sw = new StopWatch(true);
let someValues = [...values].slice(0, values.size / 4);
someValues.forEach(key => {
let value = list.get(key); // find
console.assert(value, '[LIST] must have ' + key);
list.get(-key); // miss
});
sw.stop();
console.log(`[LIST] retrieve ${sw.elapsed()}ms (${(sw.elapsed() / (someValues.length * 2)).toPrecision(4)}ms/op)`);
sw = new StopWatch(true);
someValues.forEach(key => {
let idx = binarySearch(array, key, cmp); // find
console.assert(idx >= 0, '[ARRAY] must have ' + key);
binarySearch(array, -key, cmp); // miss
});
sw.stop();
console.log(`[ARRAY] retrieve ${sw.elapsed()}ms (${(sw.elapsed() / (someValues.length * 2)).toPrecision(4)}ms/op)`);
// insert
sw = new StopWatch(true);
someValues.forEach(key => {
list.set(-key, false);
});
sw.stop();
console.log(`[LIST] insert ${sw.elapsed()}ms (${(sw.elapsed() / someValues.length).toPrecision(4)}ms/op)`);
sw = new StopWatch(true);
someValues.forEach(key => {
array = insertArraySorted(array, -key);
});
sw.stop();
console.log(`[ARRAY] insert ${sw.elapsed()}ms (${(sw.elapsed() / someValues.length).toPrecision(4)}ms/op)`);
// delete
sw = new StopWatch(true);
someValues.forEach(key => {
list.delete(key); // find
list.delete(-key); // miss
});
sw.stop();
console.log(`[LIST] delete ${sw.elapsed()}ms (${(sw.elapsed() / (someValues.length * 2)).toPrecision(4)}ms/op)`);
sw = new StopWatch(true);
someValues.forEach(key => {
array = delArraySorted(array, key); // find
array = delArraySorted(array, -key); // miss
});
sw.stop();
console.log(`[ARRAY] delete ${sw.elapsed()}ms (${(sw.elapsed() / (someValues.length * 2)).toPrecision(4)}ms/op)`);
});
});

View file

@ -7,8 +7,18 @@ import { IUriIdentityService } from 'vs/workbench/services/uriIdentity/common/ur
import { URI } from 'vs/base/common/uri';
import { registerSingleton } from 'vs/platform/instantiation/common/extensions';
import { IFileService, FileSystemProviderCapabilities } from 'vs/platform/files/common/files';
import { binarySearch } from 'vs/base/common/arrays';
import { ExtUri, IExtUri, normalizePath } from 'vs/base/common/resources';
import { SkipList } from 'vs/base/common/skipList';
class Entry {
static _clock = 0;
time: number = Entry._clock++;
constructor(readonly uri: URI) { }
touch() {
this.time = Entry._clock++;
return this;
}
}
export class UriIdentityService implements IUriIdentityService {
@ -16,19 +26,17 @@ export class UriIdentityService implements IUriIdentityService {
readonly extUri: IExtUri;
private _canonicalUris: URI[] = []; // use SkipList or BinaryTree instead of array...
private readonly _limit = 10_000;
private readonly _canonicalUris: SkipList<URI, Entry>;
private readonly _limit = 2 ** 16;
constructor(@IFileService private readonly _fileService: IFileService) {
// assume path casing matters unless the file system provider spec'ed the opposite
const ignorePathCasing = (uri: URI): boolean => {
// perf@jrieken cache this information
if (this._fileService.canHandleResource(uri)) {
return !this._fileService.hasCapability(uri, FileSystemProviderCapabilities.PathCaseSensitive);
}
// this defaults to false which is a good default for
// * virtual documents
// * in-memory uris
@ -36,6 +44,7 @@ export class UriIdentityService implements IUriIdentityService {
return false;
};
this.extUri = new ExtUri(ignorePathCasing);
this._canonicalUris = new SkipList((a, b) => this.extUri.compare(a, b, true), this._limit);
}
asCanonicalUri(uri: URI): URI {
@ -53,19 +62,43 @@ export class UriIdentityService implements IUriIdentityService {
}
// (2) find the uri in its canonical form or use this uri to define it
const idx = binarySearch(this._canonicalUris, uri, (a, b) => this.extUri.compare(a, b, true));
if (idx >= 0) {
return this._canonicalUris[idx].with({ fragment: uri.fragment });
let item = this._canonicalUris.get(uri);
if (item) {
return item.touch().uri.with({ fragment: uri.fragment });
}
// using slice/concat is faster than splice
// total len should be being _limit and 2*_limit
const insertIdx = ~idx;
const before = this._canonicalUris.slice(Math.max(0, insertIdx - this._limit), insertIdx);
const after = this._canonicalUris.slice(insertIdx, insertIdx + this._limit);
this._canonicalUris = before.concat(uri.with({ fragment: null }), after);
// this uri is first and defines the canonical form
this._canonicalUris.set(uri, new Entry(uri));
this._checkTrim();
return uri;
}
private _checkTrim(): void {
if (this._canonicalUris.size < this._limit) {
return;
}
// get all entries, sort by touch (MRU) and re-initalize
// the uri cache and the entry clock. this is an expensive
// operation and should happen rarely
const entries = [...this._canonicalUris.entries()].sort((a, b) => {
if (a[1].touch < b[1].touch) {
return 1;
} else if (a[1].touch > b[1].touch) {
return -1;
} else {
return 0;
}
});
Entry._clock = 0;
this._canonicalUris.clear();
const newSize = this._limit * 0.5;
for (let i = 0; i < newSize; i++) {
this._canonicalUris.set(entries[i][0], entries[i][1].touch());
}
}
}
registerSingleton(IUriIdentityService, UriIdentityService, true);