From 59cc92897d0a01afd3090fea8cc1cffa7fe10d63 Mon Sep 17 00:00:00 2001 From: Erick Bajao Date: Tue, 9 Nov 2021 18:34:44 +0000 Subject: [PATCH] Include upload_duration in finalized fields --- lib/gitlab/instrumentation/uploads.rb | 32 ++++++++++ lib/gitlab/instrumentation_helper.rb | 5 ++ lib/uploaded_file.rb | 16 ++++- .../lib/gitlab/instrumentation_helper_spec.rb | 19 ++++++ spec/lib/uploaded_file_spec.rb | 64 ++++++++++++++++--- workhorse/internal/filestore/file_handler.go | 34 +++++----- .../internal/filestore/file_handler_test.go | 1 + workhorse/internal/upload/uploads_test.go | 2 +- workhorse/upload_test.go | 2 +- 9 files changed, 145 insertions(+), 30 deletions(-) create mode 100644 lib/gitlab/instrumentation/uploads.rb diff --git a/lib/gitlab/instrumentation/uploads.rb b/lib/gitlab/instrumentation/uploads.rb new file mode 100644 index 000000000000..02e457453cd7 --- /dev/null +++ b/lib/gitlab/instrumentation/uploads.rb @@ -0,0 +1,32 @@ +# frozen_string_literal: true + +module Gitlab + module Instrumentation + class Uploads + UPLOAD_DURATION = :uploaded_file_upload_duration_s + UPLOADED_FILE_SIZE = :uploaded_file_size_bytes + + def self.track(uploaded_file) + if ::Gitlab::SafeRequestStore.active? + ::Gitlab::SafeRequestStore[UPLOAD_DURATION] = uploaded_file.upload_duration + ::Gitlab::SafeRequestStore[UPLOADED_FILE_SIZE] = uploaded_file.size + end + end + + def self.get_upload_duration + ::Gitlab::SafeRequestStore[UPLOAD_DURATION] + end + + def self.get_uploaded_file_size + ::Gitlab::SafeRequestStore[UPLOADED_FILE_SIZE] + end + + def self.payload + { + UPLOAD_DURATION => get_upload_duration, + UPLOADED_FILE_SIZE => get_uploaded_file_size + }.compact + end + end + end +end diff --git a/lib/gitlab/instrumentation_helper.rb b/lib/gitlab/instrumentation_helper.rb index 26e44d7822e4..155e365d04c1 100644 --- a/lib/gitlab/instrumentation_helper.rb +++ b/lib/gitlab/instrumentation_helper.rb @@ -31,6 +31,7 @@ def add_instrumentation_data(payload) instrument_thread_memory_allocations(payload) instrument_load_balancing(payload) instrument_pid(payload) + instrument_uploads(payload) end def instrument_gitaly(payload) @@ -116,6 +117,10 @@ def instrument_load_balancing(payload) payload.merge!(load_balancing_payload) end + def instrument_uploads(payload) + payload.merge! ::Gitlab::Instrumentation::Uploads.payload + end + # Returns the queuing duration for a Sidekiq job in seconds, as a float, if the # `enqueued_at` field or `created_at` field is available. # diff --git a/lib/uploaded_file.rb b/lib/uploaded_file.rb index 79920968603e..36baf4a3cf80 100644 --- a/lib/uploaded_file.rb +++ b/lib/uploaded_file.rb @@ -20,8 +20,9 @@ class UploadedFile attr_reader :remote_id attr_reader :sha256 attr_reader :size + attr_reader :upload_duration - def initialize(path, filename: nil, content_type: "application/octet-stream", sha256: nil, remote_id: nil, size: nil) + def initialize(path, filename: nil, content_type: "application/octet-stream", sha256: nil, remote_id: nil, size: nil, upload_duration: nil) if path.present? raise InvalidPathError, "#{path} file does not exist" unless ::File.exist?(path) @@ -35,6 +36,12 @@ def initialize(path, filename: nil, content_type: "application/octet-stream", sh end end + begin + @upload_duration = Float(upload_duration) + rescue ArgumentError, TypeError + @upload_duration = 0 + end + @content_type = content_type @original_filename = sanitize_filename(filename || path || '') @content_type = content_type @@ -64,8 +71,11 @@ def self.from_params(params, upload_paths) content_type: params['type'] || 'application/octet-stream', sha256: params['sha256'], remote_id: remote_id, - size: params['size'] - ) + size: params['size'], + upload_duration: params['upload_duration'] + ).tap do |uploaded_file| + ::Gitlab::Instrumentation::Uploads.track(uploaded_file) + end end def self.allowed_path?(file_path, paths) diff --git a/spec/lib/gitlab/instrumentation_helper_spec.rb b/spec/lib/gitlab/instrumentation_helper_spec.rb index 52d3623c304e..a9663012e9a6 100644 --- a/spec/lib/gitlab/instrumentation_helper_spec.rb +++ b/spec/lib/gitlab/instrumentation_helper_spec.rb @@ -147,6 +147,25 @@ expect(payload).not_to include(:caught_up_replica_pick_fail) end end + + context 'when there is an uploaded file' do + it 'adds upload data' do + uploaded_file = UploadedFile.from_params({ + 'name' => 'dir/foo.txt', + 'sha256' => 'sha256', + 'remote_url' => 'http://localhost/file', + 'remote_id' => '1234567890', + 'etag' => 'etag1234567890', + 'upload_duration' => '5.05', + 'size' => '123456' + }, nil) + + subject + + expect(payload[:uploaded_file_upload_duration_s]).to eq(uploaded_file.upload_duration) + expect(payload[:uploaded_file_size_bytes]).to eq(uploaded_file.size) + end + end end describe 'duration calculations' do diff --git a/spec/lib/uploaded_file_spec.rb b/spec/lib/uploaded_file_spec.rb index ececc84bc934..0aba6cb00657 100644 --- a/spec/lib/uploaded_file_spec.rb +++ b/spec/lib/uploaded_file_spec.rb @@ -15,7 +15,7 @@ end context 'from_params functions' do - RSpec.shared_examples 'using the file path' do |filename:, content_type:, sha256:, path_suffix:| + RSpec.shared_examples 'using the file path' do |filename:, content_type:, sha256:, path_suffix:, upload_duration:| it { is_expected.not_to be_nil } it 'sets properly the attributes' do @@ -24,6 +24,7 @@ expect(subject.sha256).to eq(sha256) expect(subject.remote_id).to be_nil expect(subject.path).to end_with(path_suffix) + expect(subject.upload_duration).to eq(upload_duration) end it 'handles a blank path' do @@ -37,16 +38,17 @@ end end - RSpec.shared_examples 'using the remote id' do |filename:, content_type:, sha256:, size:, remote_id:| + RSpec.shared_examples 'using the remote id' do |filename:, content_type:, sha256:, size:, remote_id:, upload_duration:| it { is_expected.not_to be_nil } it 'sets properly the attributes' do expect(subject.original_filename).to eq(filename) - expect(subject.content_type).to eq('application/octet-stream') - expect(subject.sha256).to eq('sha256') + expect(subject.content_type).to eq(content_type) + expect(subject.sha256).to eq(sha256) expect(subject.path).to be_nil - expect(subject.size).to eq(123456) - expect(subject.remote_id).to eq('1234567890') + expect(subject.size).to eq(size) + expect(subject.remote_id).to eq(remote_id) + expect(subject.upload_duration).to eq(upload_duration) end end @@ -78,6 +80,7 @@ { 'path' => temp_file.path, 'name' => 'dir/my file&.txt', 'type' => 'my/type', + 'upload_duration' => '5.05', 'sha256' => 'sha256' } end @@ -85,7 +88,8 @@ filename: 'my_file_.txt', content_type: 'my/type', sha256: 'sha256', - path_suffix: 'test' + path_suffix: 'test', + upload_duration: 5.05 end context 'with a remote id' do @@ -96,6 +100,7 @@ 'remote_url' => 'http://localhost/file', 'remote_id' => '1234567890', 'etag' => 'etag1234567890', + 'upload_duration' => '5.05', 'size' => '123456' } end @@ -105,7 +110,8 @@ content_type: 'application/octet-stream', sha256: 'sha256', size: 123456, - remote_id: '1234567890' + remote_id: '1234567890', + upload_duration: 5.05 end context 'with a path and a remote id' do @@ -117,6 +123,7 @@ 'remote_url' => 'http://localhost/file', 'remote_id' => '1234567890', 'etag' => 'etag1234567890', + 'upload_duration' => '5.05', 'size' => '123456' } end @@ -126,7 +133,8 @@ content_type: 'application/octet-stream', sha256: 'sha256', size: 123456, - remote_id: '1234567890' + remote_id: '1234567890', + upload_duration: 5.05 end end end @@ -216,6 +224,44 @@ end.to raise_error(UploadedFile::UnknownSizeError, 'Unable to determine file size') end end + + context 'when upload_duration is not provided' do + it 'sets upload_duration to zero' do + file = described_class.new(temp_file.path) + + expect(file.upload_duration).to be_zero + end + end + + context 'when upload_duration is provided' do + let(:file) { described_class.new(temp_file.path, upload_duration: duration) } + + context 'and upload_duration is a number' do + let(:duration) { 5.505 } + + it 'sets the upload_duration' do + expect(file.upload_duration).to eq(duration) + end + end + + context 'and upload_duration is a string' do + context 'and represents a number' do + let(:duration) { '5.505' } + + it 'converts upload_duration to a number' do + expect(file.upload_duration).to eq(duration.to_f) + end + end + + context 'and does not represent a number' do + let(:duration) { 'not a number' } + + it 'sets upload_duration to zero' do + expect(file.upload_duration).to be_zero + end + end + end + end end describe '#sanitize_filename' do diff --git a/workhorse/internal/filestore/file_handler.go b/workhorse/internal/filestore/file_handler.go index b4d7250fe0c3..dac8d4d62479 100644 --- a/workhorse/internal/filestore/file_handler.go +++ b/workhorse/internal/filestore/file_handler.go @@ -43,6 +43,9 @@ type FileHandler struct { // a map containing different hashes hashes map[string]string + + // Duration of upload in seconds + uploadDuration float64 } type uploadClaims struct { @@ -74,11 +77,12 @@ func (fh *FileHandler) GitLabFinalizeFields(prefix string) (map[string]string, e } for k, v := range map[string]string{ - "name": fh.Name, - "path": fh.LocalPath, - "remote_url": fh.RemoteURL, - "remote_id": fh.RemoteID, - "size": strconv.FormatInt(fh.Size, 10), + "name": fh.Name, + "path": fh.LocalPath, + "remote_url": fh.RemoteURL, + "remote_id": fh.RemoteID, + "size": strconv.FormatInt(fh.Size, 10), + "upload_duration": strconv.FormatFloat(fh.uploadDuration, 'f', -1, 64), } { data[key(k)] = v signedData[k] = v @@ -105,18 +109,20 @@ type consumer interface { // SaveFileFromReader persists the provided reader content to all the location specified in opts. A cleanup will be performed once ctx is Done // Make sure the provided context will not expire before finalizing upload with GitLab Rails. -func SaveFileFromReader(ctx context.Context, reader io.Reader, size int64, opts *SaveFileOpts) (fh *FileHandler, err error) { - var uploadDestination consumer - fh = &FileHandler{ +func SaveFileFromReader(ctx context.Context, reader io.Reader, size int64, opts *SaveFileOpts) (*FileHandler, error) { + fh := &FileHandler{ Name: opts.TempFilePrefix, RemoteID: opts.RemoteID, RemoteURL: opts.RemoteURL, } + uploadStartTime := time.Now() + defer func() { fh.uploadDuration = time.Since(uploadStartTime).Seconds() }() hashes := newMultiHash() reader = io.TeeReader(reader, hashes.Writer) var clientMode string - + var uploadDestination consumer + var err error switch { case opts.IsLocal(): clientMode = "local" @@ -161,23 +167,19 @@ func SaveFileFromReader(ctx context.Context, reader io.Reader, size int64, opts return nil, err } + var hlr *hardLimitReader if opts.MaximumSize > 0 { if size > opts.MaximumSize { return nil, SizeError(fmt.Errorf("the upload size %d is over maximum of %d bytes", size, opts.MaximumSize)) } - hlr := &hardLimitReader{r: reader, n: opts.MaximumSize} + hlr = &hardLimitReader{r: reader, n: opts.MaximumSize} reader = hlr - defer func() { - if hlr.n < 0 { - err = ErrEntityTooLarge - } - }() } fh.Size, err = uploadDestination.Consume(ctx, reader, opts.Deadline) if err != nil { - if err == objectstore.ErrNotEnoughParts { + if (err == objectstore.ErrNotEnoughParts) || (hlr != nil && hlr.n < 0) { err = ErrEntityTooLarge } return nil, err diff --git a/workhorse/internal/filestore/file_handler_test.go b/workhorse/internal/filestore/file_handler_test.go index 16af56dcf48f..f57026a59dfe 100644 --- a/workhorse/internal/filestore/file_handler_test.go +++ b/workhorse/internal/filestore/file_handler_test.go @@ -548,4 +548,5 @@ func checkFileHandlerWithFields(t *testing.T, fh *filestore.FileHandler, fields require.Equal(t, test.ObjectSHA1, fields[key("sha1")]) require.Equal(t, test.ObjectSHA256, fields[key("sha256")]) require.Equal(t, test.ObjectSHA512, fields[key("sha512")]) + require.NotEmpty(t, fields[key("upload_duration")]) } diff --git a/workhorse/internal/upload/uploads_test.go b/workhorse/internal/upload/uploads_test.go index e82dcdcbc697..b5db8003d41b 100644 --- a/workhorse/internal/upload/uploads_test.go +++ b/workhorse/internal/upload/uploads_test.go @@ -114,7 +114,7 @@ func TestUploadHandlerRewritingMultiPartData(t *testing.T) { require.Equal(t, hash, r.FormValue("file."+algo), "file hash %s", algo) } - require.Len(t, r.MultipartForm.Value, 11, "multipart form values") + require.Len(t, r.MultipartForm.Value, 12, "multipart form values") w.WriteHeader(202) fmt.Fprint(w, "RESPONSE") diff --git a/workhorse/upload_test.go b/workhorse/upload_test.go index 24c14bb12aa3..b043cf17f32c 100644 --- a/workhorse/upload_test.go +++ b/workhorse/upload_test.go @@ -83,7 +83,7 @@ func uploadTestServer(t *testing.T, authorizeTests func(r *http.Request), extraT require.NoError(t, r.ParseMultipartForm(100000)) - const nValues = 10 // file name, path, remote_url, remote_id, size, md5, sha1, sha256, sha512, gitlab-workhorse-upload for just the upload (no metadata because we are not POSTing a valid zip file) + const nValues = 11 // file name, path, remote_url, remote_id, size, md5, sha1, sha256, sha512, upload_duration, gitlab-workhorse-upload for just the upload (no metadata because we are not POSTing a valid zip file) require.Len(t, r.MultipartForm.Value, nValues) require.Empty(t, r.MultipartForm.File, "multipart form files")