[kbn/es] print first 500 bytes of invalid content (#34133)

We added checksum verification to `@kbn/es` in https://github.com/elastic/kibana/pull/33893, which is working, and in turn proves that using the Artifact API to download snapshots wasn't enough to fix the issues we've been seeing. In https://kibana-ci.elastic.co/job/elastic+kibana+7.0/412/JOB=kibana-ciGroup8,node=immutable/console we see:

```
00:14:20.762  │ info Installing from snapshot
00:14:20.763      │ info version: 7.0.0
00:14:20.763      │ info install path: /var/lib/jenkins/workspace/elastic+kibana+7.0/JOB/kibana-ciGroup8/node/immutable/kibana/.es/test-97zzlri1nyb
00:14:20.764      │ info license: oss
00:14:20.764      │ info downloading artifact info from https://artifacts-api.elastic.co/v1/versions/7.0.0-SNAPSHOT/builds/latest/projects/elasticsearch
00:14:20.852      │ info downloading artifact from https://snapshots.elastic.co/7.0.0-ea741e68/downloads/elasticsearch/elasticsearch-oss-7.0.0-SNAPSHOT-linux-x86_64.tar.gz
00:14:22.765      │ info downloading artifact checksum from https://snapshots.elastic.co/7.0.0-ea741e68/downloads/elasticsearch/elasticsearch-oss-7.0.0-SNAPSHOT-linux-x86_64.tar.gz
00:14:22.818 
00:14:22.818 artifact downloaded from https://snapshots.elastic.co/7.0.0-ea741e68/downloads/elasticsearch/elasticsearch-oss-7.0.0-SNAPSHOT-linux-x86_64.tar.gz does not match expected checksum
00:14:22.819   expected: 1e5188c1410e0299777dd1cb9638aa12de4067ea80a77ff6723986746bbc7b1a35d580023f605e6e1ae84887e675ba10184032da80240a42289ee6c5d5361c66
00:14:22.820   received: 7630553ab83828196931917aa15f17bf1f31d541fd11e0f1c82d89f294fbe47dca7be99d96da5baa0a9545712e3c9340f9b3d989e5056a3f9471f43bb582becc
```

The new logging shows that we used the explicit URL for the artifact `https://snapshots.elastic.co/7.0.0-ea741e68/downloads/elasticsearch/elasticsearch-oss-7.0.0-SNAPSHOT-linux-x86_64.tar.gz` and still got invalid data, which confirms our suspicion that something funky is happening with the CDN. In order to understand more about what's going on here this PR adds a buffer that caches the first 500 bytes from the artifact response and will log it in utf8 format if the checksum doesn't match. I'm hoping we're going to see some HTML or something that will help explain what's happening.
This commit is contained in:
Spencer 2019-03-28 19:41:07 -07:00 committed by GitHub
parent 179b9be6bb
commit eefa2a54af
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -167,7 +167,7 @@ exports.Artifact = class Artifact {
return;
}
await this._verifyChecksum(artifactResp.checksum);
await this._verifyChecksum(artifactResp);
// cache the etag for future downloads
cache.writeMeta(dest, { etag: artifactResp.etag });
@ -181,7 +181,7 @@ exports.Artifact = class Artifact {
* @param {string} tmpPath
* @param {string} etag
* @param {string} ts
* @return {{ cached: true }|{ checksum: string, etag: string }}
* @return {{ cached: true }|{ checksum: string, etag: string, first500Bytes: Buffer }}
*/
async _download(tmpPath, etag, ts) {
const url = this.getUrl();
@ -219,12 +219,23 @@ exports.Artifact = class Artifact {
}
const hash = createHash(this.getChecksumType());
let first500Bytes = Buffer.alloc(0);
let contentLength = 0;
mkdirp.sync(path.dirname(tmpPath));
await asyncPipeline(
resp.body,
new Transform({
transform(chunk, encoding, cb) {
contentLength += Buffer.byteLength(chunk);
if (first500Bytes.length < 500) {
first500Bytes = Buffer.concat(
[first500Bytes, chunk],
first500Bytes.length + chunk.length
).slice(0, 500);
}
hash.update(chunk, encoding);
cb(null, chunk);
},
@ -235,16 +246,18 @@ exports.Artifact = class Artifact {
return {
checksum: hash.digest('hex'),
etag: resp.headers.get('etag'),
contentLength,
first500Bytes,
};
}
/**
* Verify the checksum of the downloaded artifact with the checksum at checksumUrl
* @param {string} actualChecksum
* @param {{ checksum: string, contentLength: number, first500Bytes: Buffer }} artifactResp
* @return {Promise<void>}
*/
async _verifyChecksum(actualChecksum) {
this._log.info('downloading artifact checksum from %s', chalk.bold(this.getUrl()));
async _verifyChecksum(artifactResp) {
this._log.info('downloading artifact checksum from %s', chalk.bold(this.getChecksumUrl()));
const abc = new AbortController();
const resp = await fetch(this.getChecksumUrl(), {
@ -258,11 +271,13 @@ exports.Artifact = class Artifact {
// in format of stdout from `shasum` cmd, which is `<checksum> <filename>`
const [expectedChecksum] = (await resp.text()).split(' ');
if (actualChecksum !== expectedChecksum) {
if (artifactResp.checksum !== expectedChecksum) {
const len = `${artifactResp.first500Bytes / artifactResp.contentLength}`;
throw createCliError(
`artifact downloaded from ${this.getUrl()} does not match expected checksum\n` +
` expected: ${expectedChecksum}\n` +
` received: ${actualChecksum}`
` received: ${artifactResp.checksum}\n` +
` content[${len}]: ${artifactResp.first500Bytes.toString('utf8')}`
);
}