Skip costly tests

1. Add a script to generate a sorted list of most costly tests. A tests'
cost is roughly `runtime% / number of edits`. A slow test that's only
been updated once is much less valuable than a slow test that has
been updated 20 times: the latter test is catching more changes in the
type system.

2. Check in the results of running this script. I want to make the
skipping behaviour deterministic and the same for everybody, even though
you may get slightly better performance by examining only *your* test
changes.

3. Add code to skip tests until it reaches a 5% chance of missing an
edit. Right now this provides a 38% speedup.

Still not done:
4. Make this value configurable.
5. Make the CI configuration specify a 0% chance of missing an edit.
This commit is contained in:
Nathan Shively-Sanders 2019-06-14 13:35:41 -07:00
parent 81f71530c4
commit a852f2feea
4 changed files with 139 additions and 4 deletions

1
.test-cost.json Normal file

File diff suppressed because one or more lines are too long

View file

@ -82,6 +82,7 @@
"prex": "^0.4.3",
"q": "latest",
"remove-internal": "^2.9.2",
"simple-git": "^1.113.0",
"source-map-support": "latest",
"through2": "latest",
"travis-fold": "latest",
@ -102,7 +103,8 @@
"gulp": "gulp",
"jake": "gulp",
"lint": "gulp lint",
"setup-hooks": "node scripts/link-hooks.js"
"setup-hooks": "node scripts/link-hooks.js",
"update-costly-tests": "node scripts/costly-tests.js"
},
"browser": {
"fs": false,

104
scripts/costly-tests.js Normal file
View file

@ -0,0 +1,104 @@
// @ts-check
const fs = require("fs");
const git = require('simple-git/promise')('.')
const readline = require('readline')
/** @typedef {{ [s: string]: number}} Histogram */
async function main() {
/** @type {Histogram} */
const edits = Object.create(null)
/** @type {Histogram} */
const perf = JSON.parse(fs.readFileSync('.parallelperf.json', 'utf8'))
await collectCommits(git, "release-2.3", "master", /*author*/ undefined, files => fillMap(files, edits))
const totalTime = Object.values(perf).reduce((n,m) => n + m, 0)
const untouched = Object.values(perf).length - Object.values(edits).length
const totalEdits = Object.values(edits).reduce((n,m) => n + m, 0) + untouched + Object.values(edits).length
let i = 0
/** @type {{ name: string, time: number, edits: number, cost: number }[]} */
let data = []
for (const k in perf) {
const otherk = k.replace(/tsrunner-[a-z-]+?:\/\//, '')
const percentTime = perf[k] / totalTime
const percentHits = (1 + (edits[otherk] || 0)) / totalEdits
const cost = 5 + Math.log(percentTime / percentHits)
// TODO: Write counts instead of numbers to make JSON file smaller
data.push({ name: otherk, time: perf[k], edits: 1 + (edits[otherk] || 0), cost})
if (edits[otherk])
i++
}
const output = {
totalTime,
totalEdits,
data: data.sort((x,y) => y.cost - x.cost).map(x => ({ ...x, cost: x.cost.toFixed(2) }))
}
fs.writeFileSync('.test-cost.json', JSON.stringify(output), 'utf8')
}
main().catch(e => {
console.log(e);
process.exit(1);
})
/**
* @param {string[]} files
* @param {Histogram} histogram
*/
function fillMap(files, histogram) {
// keep edits to test cases (but not /users), and not file moves
const tests = files.filter(f => f.startsWith('tests/cases/') && !f.startsWith('tests/cases/user') && !/=>/.test(f))
for (const test of tests) {
histogram[test] = (histogram[test] || 0) + 1
}
}
/**
* @param {string} s
*/
function isSquashMergeMessage(s) {
return /\(#[0-9]+\)$/.test(s)
}
/**
* @param {string} s
*/
function isMergeCommit(s) {
return /Merge pull request #[0-9]+/.test(s)
}
/**
* @param {string} s
*/
function parseFiles(s) {
const lines = s.split('\n')
// Note that slice(2) only works for merge commits, which have an empty newline after the title
return lines.slice(2, lines.length - 2).map(line => line.split("|")[0].trim())
}
/**
* @param {import('simple-git/promise').SimpleGit} git
* @param {string} from
* @param {string} to
* @param {string | undefined} author - only include commits from this author
* @param {(files: string[]) => void} update
*/
async function collectCommits(git, from, to, author, update) {
let i = 0
for (const commit of (await git.log({ from, to })).all) {
i++
if ((!author || commit.author_name === author) && isMergeCommit(commit.message) || isSquashMergeMessage(commit.message)) {
readline.clearLine(process.stdout, /*left*/ -1)
readline.cursorTo(process.stdout, 0)
process.stdout.write(i + ": " + commit.date)
const files = parseFiles(await git.show([commit.hash, "--stat=1000,960,40", "--pretty=oneline"]))
update(files)
}
}
}

View file

@ -14,7 +14,9 @@ namespace Harness.Parallel.Host {
const isatty = tty.isatty(1) && tty.isatty(2);
const path = require("path") as typeof import("path");
const { fork } = require("child_process") as typeof import("child_process");
const { statSync } = require("fs") as typeof import("fs");
const { statSync, readFileSync } = require("fs") as typeof import("fs");
const editSkipRate = 0.05
// NOTE: paths for module and types for FailedTestReporter _do not_ line up due to our use of --outFile for run.js
// tslint:disable-next-line:variable-name
@ -192,7 +194,32 @@ namespace Harness.Parallel.Host {
return `tsrunner-${runner}://${test}`;
}
function startDelayed(perfData: { [testHash: string]: number } | undefined, totalCost: number) {
function skipCostlyTests(tasks: Task[], editSkipRate: number) {
if (statSync('.test-cost.json')) {
const costs = JSON.parse(readFileSync('.test-cost.json', 'utf8')) as {
totalTime: number,
totalEdits: number,
data: Array<{ name: string, time: number, edits: number, costs: number }>
}
let skippedEdits = 0;
let skippedTests = new Set<string>();
let skippedTime = 0;
let i = 0;
for (; i < costs.data.length && (skippedEdits / costs.totalEdits) < editSkipRate; i++) {
skippedEdits += costs.data[i].edits;
skippedTime += costs.data[i].time;
skippedTests.add(costs.data[i].name);
}
console.log(`Skipped ${i} expensive tests; estimated time savings of ${(skippedTime / costs.totalTime * 100).toFixed(2)}% with ${(editSkipRate * 100).toFixed(2)}% chance of missing a test.`)
return tasks.filter(t => !skippedTests.has(t.file));
}
else {
console.log('No cost analysis discovered.');
return tasks;
}
}
function startDelayed(perfData: { [testHash: string]: number } | undefined, totalCost: number, editSkipRate: number) {
console.log(`Discovered ${tasks.length} unittest suites` + (newTasks.length ? ` and ${newTasks.length} new suites.` : "."));
console.log("Discovering runner-based tests...");
const discoverStart = +(new Date());
@ -231,6 +258,7 @@ namespace Harness.Parallel.Host {
}
tasks.sort((a, b) => a.size - b.size);
tasks = tasks.concat(newTasks);
tasks = skipCostlyTests(tasks, editSkipRate);
const batchCount = workerCount;
const packfraction = 0.9;
const chunkSize = 1000; // ~1KB or 1s for sending batches near the end of a test
@ -625,6 +653,6 @@ namespace Harness.Parallel.Host {
}
// tslint:disable-next-line:ban
setTimeout(() => startDelayed(perfData, totalCost), 0); // Do real startup on next tick, so all unit tests have been collected
setTimeout(() => startDelayed(perfData, totalCost, editSkipRate), 0); // Do real startup on next tick, so all unit tests have been collected
}
}