2017-05-26 09:57:09 +02:00
// Copyright 2017 Vector Creations Ltd
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
2017-10-11 19:16:53 +02:00
package routing
2017-05-26 09:57:09 +02:00
import (
2017-09-21 16:44:00 +02:00
"context"
2020-08-25 16:08:37 +02:00
"crypto/rand"
"encoding/hex"
2017-05-26 09:57:09 +02:00
"fmt"
2017-05-26 17:34:58 +02:00
"io"
2017-05-26 09:57:09 +02:00
"net/http"
"net/url"
2017-05-26 16:49:54 +02:00
"path"
2017-11-22 16:45:03 +01:00
"strings"
2017-05-26 09:57:09 +02:00
"github.com/matrix-org/dendrite/clientapi/jsonerror"
2017-05-26 16:49:54 +02:00
"github.com/matrix-org/dendrite/mediaapi/fileutils"
"github.com/matrix-org/dendrite/mediaapi/storage"
2017-06-07 01:12:49 +02:00
"github.com/matrix-org/dendrite/mediaapi/thumbnailer"
2017-05-26 09:57:09 +02:00
"github.com/matrix-org/dendrite/mediaapi/types"
2020-12-02 18:41:00 +01:00
"github.com/matrix-org/dendrite/setup/config"
2020-08-26 16:38:34 +02:00
userapi "github.com/matrix-org/dendrite/userapi/api"
2017-07-07 15:11:32 +02:00
"github.com/matrix-org/gomatrixserverlib"
2017-05-26 09:57:09 +02:00
"github.com/matrix-org/util"
2017-11-15 12:13:09 +01:00
log "github.com/sirupsen/logrus"
2017-05-26 09:57:09 +02:00
)
// uploadRequest metadata included in or derivable from an upload request
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
// NOTE: The members come from HTTP request metadata such as headers, query parameters or can be derived from such
type uploadRequest struct {
MediaMetadata * types . MediaMetadata
Logger * log . Entry
}
2017-05-26 10:03:16 +02:00
// uploadResponse defines the format of the JSON response
2017-05-26 09:57:09 +02:00
// https://matrix.org/docs/spec/client_server/r0.2.0.html#post-matrix-media-r0-upload
type uploadResponse struct {
ContentURI string ` json:"content_uri" `
}
2019-07-18 09:40:10 +02:00
// Upload implements POST /upload
2017-05-26 09:57:09 +02:00
// This endpoint involves uploading potentially significant amounts of data to the homeserver.
// This implementation supports a configurable maximum file size limit in bytes. If a user tries to upload more than this, they will receive an error that their upload is too large.
// Uploaded files are processed piece-wise to avoid DoS attacks which would starve the server of memory.
// TODO: We should time out requests if they have not received any data within a configured timeout period.
2020-08-26 16:38:34 +02:00
func Upload ( req * http . Request , cfg * config . MediaAPI , dev * userapi . Device , db storage . Database , activeThumbnailGeneration * types . ActiveThumbnailGeneration ) util . JSONResponse {
r , resErr := parseAndValidateRequest ( req , cfg , dev )
2017-05-26 09:57:09 +02:00
if resErr != nil {
return * resErr
}
2017-09-21 16:44:00 +02:00
if resErr = r . doUpload ( req . Context ( ) , req . Body , cfg , db , activeThumbnailGeneration ) ; resErr != nil {
2017-05-26 17:34:58 +02:00
return * resErr
}
return util . JSONResponse {
2018-03-13 16:55:45 +01:00
Code : http . StatusOK ,
2017-05-26 17:34:58 +02:00
JSON : uploadResponse {
2017-06-19 16:21:04 +02:00
ContentURI : fmt . Sprintf ( "mxc://%s/%s" , cfg . Matrix . ServerName , r . MediaMetadata . MediaID ) ,
2017-05-26 17:34:58 +02:00
} ,
}
}
// parseAndValidateRequest parses the incoming upload request to validate and extract
// all the metadata about the media being uploaded.
// Returns either an uploadRequest or an error formatted as a util.JSONResponse
2020-08-26 16:38:34 +02:00
func parseAndValidateRequest ( req * http . Request , cfg * config . MediaAPI , dev * userapi . Device ) ( * uploadRequest , * util . JSONResponse ) {
2017-05-26 17:34:58 +02:00
r := & uploadRequest {
MediaMetadata : & types . MediaMetadata {
2017-06-19 16:21:04 +02:00
Origin : cfg . Matrix . ServerName ,
2017-05-26 17:34:58 +02:00
FileSizeBytes : types . FileSizeBytes ( req . ContentLength ) ,
ContentType : types . ContentType ( req . Header . Get ( "Content-Type" ) ) ,
UploadName : types . Filename ( url . PathEscape ( req . FormValue ( "filename" ) ) ) ,
2020-08-26 16:38:34 +02:00
UserID : types . MatrixUserID ( dev . UserID ) ,
2017-05-26 17:34:58 +02:00
} ,
2017-06-19 16:21:04 +02:00
Logger : util . GetLogger ( req . Context ( ) ) . WithField ( "Origin" , cfg . Matrix . ServerName ) ,
2017-05-26 17:34:58 +02:00
}
2020-08-10 15:18:04 +02:00
if resErr := r . Validate ( * cfg . MaxFileSizeBytes ) ; resErr != nil {
2017-05-26 17:34:58 +02:00
return nil , resErr
}
return r , nil
}
2020-08-25 16:08:37 +02:00
func ( r * uploadRequest ) generateMediaID ( ctx context . Context , db storage . Database ) ( types . MediaID , error ) {
for {
// First try generating a meda ID. We'll do this by
// generating some random bytes and then hex-encoding.
mediaIDBytes := make ( [ ] byte , 32 )
_ , err := rand . Read ( mediaIDBytes )
if err != nil {
return "" , fmt . Errorf ( "rand.Read: %w" , err )
}
mediaID := types . MediaID ( hex . EncodeToString ( mediaIDBytes ) )
// Then we will check if this media ID already exists in
// our database. If it does then we had best generate a
// new one.
existingMetadata , err := db . GetMediaMetadata ( ctx , mediaID , r . MediaMetadata . Origin )
if err != nil {
return "" , fmt . Errorf ( "db.GetMediaMetadata: %w" , err )
}
if existingMetadata != nil {
// The media ID was already used - repeat the process
// and generate a new one instead.
continue
}
// The media ID was not already used - let's return that.
return mediaID , nil
}
}
2017-09-21 16:44:00 +02:00
func ( r * uploadRequest ) doUpload (
ctx context . Context ,
reqReader io . Reader ,
2020-08-10 15:18:04 +02:00
cfg * config . MediaAPI ,
2020-01-03 15:07:05 +01:00
db storage . Database ,
2017-09-21 16:44:00 +02:00
activeThumbnailGeneration * types . ActiveThumbnailGeneration ,
) * util . JSONResponse {
2017-05-26 16:49:54 +02:00
r . Logger . WithFields ( log . Fields {
2017-05-26 17:24:13 +02:00
"UploadName" : r . MediaMetadata . UploadName ,
"FileSizeBytes" : r . MediaMetadata . FileSizeBytes ,
2017-05-31 14:54:10 +02:00
"ContentType" : r . MediaMetadata . ContentType ,
2017-05-26 16:49:54 +02:00
} ) . Info ( "Uploading file" )
// The file data is hashed and the hash is used as the MediaID. The hash is useful as a
// method of deduplicating files to save storage, as well as a way to conduct
// integrity checks on the file data in the repository.
2017-05-31 07:10:01 +02:00
// Data is truncated to maxFileSizeBytes. Content-Length was reported as 0 < Content-Length <= maxFileSizeBytes so this is OK.
2020-08-26 16:38:34 +02:00
//
// TODO: This has a bad API shape where you either need to call:
// fileutils.RemoveDir(tmpDir, r.Logger)
// or call:
// r.storeFileAndMetadata(ctx, tmpDir, ...)
// before you return from doUpload else we will leak a temp file. We could make this nicer with a `WithTransaction` style of
// nested function to guarantee either storage or cleanup.
2021-02-17 14:54:53 +01:00
hash , bytesWritten , tmpDir , err := fileutils . WriteTempFile ( ctx , reqReader , cfg . AbsBasePath )
2017-05-26 17:50:16 +02:00
if err != nil {
r . Logger . WithError ( err ) . WithFields ( log . Fields {
2020-08-10 15:18:04 +02:00
"MaxFileSizeBytes" : * cfg . MaxFileSizeBytes ,
2017-05-26 17:50:16 +02:00
} ) . Warn ( "Error while transferring file" )
2017-05-26 17:34:58 +02:00
return & util . JSONResponse {
2018-03-13 16:55:45 +01:00
Code : http . StatusBadRequest ,
2017-05-31 07:12:22 +02:00
JSON : jsonerror . Unknown ( "Failed to upload" ) ,
2017-05-26 16:49:54 +02:00
}
}
2021-04-14 11:53:24 +02:00
// Check if temp file size exceeds max file size configuration
if bytesWritten > types . FileSizeBytes ( * cfg . MaxFileSizeBytes ) {
fileutils . RemoveDir ( tmpDir , r . Logger ) // delete temp file
return requestEntityTooLargeJSONResponse ( * cfg . MaxFileSizeBytes )
}
2020-08-25 16:08:37 +02:00
// Look up the media by the file hash. If we already have the file but under a
// different media ID then we won't upload the file again - instead we'll just
// add a new metadata entry that refers to the same file.
existingMetadata , err := db . GetMediaMetadataByHash (
ctx , hash , r . MediaMetadata . Origin ,
2017-09-21 16:44:00 +02:00
)
2017-05-31 14:52:45 +02:00
if err != nil {
2020-08-26 16:38:34 +02:00
fileutils . RemoveDir ( tmpDir , r . Logger )
2020-08-25 16:08:37 +02:00
r . Logger . WithError ( err ) . Error ( "Error querying the database by hash." )
2017-05-31 17:41:42 +02:00
resErr := jsonerror . InternalServerError ( )
return & resErr
2017-05-31 14:52:45 +02:00
}
2020-08-25 16:08:37 +02:00
if existingMetadata != nil {
2020-08-26 16:38:34 +02:00
// The file already exists, delete the uploaded temporary file.
defer fileutils . RemoveDir ( tmpDir , r . Logger )
2020-08-25 16:08:37 +02:00
// The file already exists. Make a new media ID up for it.
mediaID , merr := r . generateMediaID ( ctx , db )
if merr != nil {
r . Logger . WithError ( merr ) . Error ( "Failed to generate media ID for existing file" )
resErr := jsonerror . InternalServerError ( )
return & resErr
}
2017-05-31 14:52:45 +02:00
2020-08-25 16:08:37 +02:00
// Then amend the upload metadata.
r . MediaMetadata = & types . MediaMetadata {
MediaID : mediaID ,
Origin : r . MediaMetadata . Origin ,
ContentType : r . MediaMetadata . ContentType ,
FileSizeBytes : r . MediaMetadata . FileSizeBytes ,
CreationTimestamp : r . MediaMetadata . CreationTimestamp ,
UploadName : r . MediaMetadata . UploadName ,
Base64Hash : hash ,
UserID : r . MediaMetadata . UserID ,
}
} else {
// The file doesn't exist. Update the request metadata.
r . MediaMetadata . FileSizeBytes = bytesWritten
r . MediaMetadata . Base64Hash = hash
r . MediaMetadata . MediaID , err = r . generateMediaID ( ctx , db )
if err != nil {
2020-08-26 16:38:34 +02:00
fileutils . RemoveDir ( tmpDir , r . Logger )
2020-08-25 16:08:37 +02:00
r . Logger . WithError ( err ) . Error ( "Failed to generate media ID for new upload" )
resErr := jsonerror . InternalServerError ( )
return & resErr
2017-05-26 16:49:54 +02:00
}
}
2020-08-25 16:08:37 +02:00
r . Logger = r . Logger . WithField ( "media_id" , r . MediaMetadata . MediaID )
r . Logger . WithFields ( log . Fields {
"Base64Hash" : r . MediaMetadata . Base64Hash ,
"UploadName" : r . MediaMetadata . UploadName ,
"FileSizeBytes" : r . MediaMetadata . FileSizeBytes ,
"ContentType" : r . MediaMetadata . ContentType ,
} ) . Info ( "File uploaded" )
2017-11-15 12:13:09 +01:00
return r . storeFileAndMetadata (
2020-08-10 15:18:04 +02:00
ctx , tmpDir , cfg . AbsBasePath , db , cfg . ThumbnailSizes ,
activeThumbnailGeneration , cfg . MaxThumbnailGenerators ,
2017-11-15 12:13:09 +01:00
)
2017-05-26 09:57:09 +02:00
}
2021-04-14 11:53:24 +02:00
func requestEntityTooLargeJSONResponse ( maxFileSizeBytes config . FileSizeBytes ) * util . JSONResponse {
return & util . JSONResponse {
Code : http . StatusRequestEntityTooLarge ,
JSON : jsonerror . Unknown ( fmt . Sprintf ( "HTTP Content-Length is greater than the maximum allowed upload size (%v)." , maxFileSizeBytes ) ) ,
}
}
2017-05-26 09:57:09 +02:00
// Validate validates the uploadRequest fields
2017-06-19 16:21:04 +02:00
func ( r * uploadRequest ) Validate ( maxFileSizeBytes config . FileSizeBytes ) * util . JSONResponse {
if maxFileSizeBytes > 0 && r . MediaMetadata . FileSizeBytes > types . FileSizeBytes ( maxFileSizeBytes ) {
2021-04-14 11:53:24 +02:00
return requestEntityTooLargeJSONResponse ( maxFileSizeBytes )
2017-05-26 09:57:09 +02:00
}
2017-11-22 16:45:03 +01:00
if strings . HasPrefix ( string ( r . MediaMetadata . UploadName ) , "~" ) {
2017-05-26 15:26:50 +02:00
return & util . JSONResponse {
2018-03-13 16:55:45 +01:00
Code : http . StatusBadRequest ,
2017-05-26 15:26:50 +02:00
JSON : jsonerror . Unknown ( "File name must not begin with '~'." ) ,
}
}
2017-05-26 09:57:09 +02:00
// TODO: Validate filename - what are the valid characters?
if r . MediaMetadata . UserID != "" {
// TODO: We should put user ID parsing code into gomatrixserverlib and use that instead
// (see https://github.com/matrix-org/gomatrixserverlib/blob/3394e7c7003312043208aa73727d2256eea3d1f6/eventcontent.go#L347 )
// It should be a struct (with pointers into a single string to avoid copying) and
// we should update all refs to use UserID types rather than strings.
// https://github.com/matrix-org/synapse/blob/v0.19.2/synapse/types.py#L92
2017-07-07 15:11:32 +02:00
if _ , _ , err := gomatrixserverlib . SplitID ( '@' , string ( r . MediaMetadata . UserID ) ) ; err != nil {
2017-05-26 09:57:09 +02:00
return & util . JSONResponse {
2018-03-13 16:55:45 +01:00
Code : http . StatusBadRequest ,
2017-05-26 09:57:09 +02:00
JSON : jsonerror . BadJSON ( "user id must be in the form @localpart:domain" ) ,
}
}
}
return nil
}
2017-05-26 16:49:54 +02:00
2017-05-26 17:42:08 +02:00
// storeFileAndMetadata moves the temporary file to its final path based on metadata and stores the metadata in the database
// See getPathFromMediaMetadata in fileutils for details of the final path.
// The order of operations is important as it avoids metadata entering the database before the file
// is ready, and if we fail to move the file, it never gets added to the database.
// Returns a util.JSONResponse error and cleans up directories in case of error.
2017-09-21 16:44:00 +02:00
func ( r * uploadRequest ) storeFileAndMetadata (
ctx context . Context ,
tmpDir types . Path ,
absBasePath config . Path ,
2020-01-03 15:07:05 +01:00
db storage . Database ,
2017-09-21 16:44:00 +02:00
thumbnailSizes [ ] config . ThumbnailSize ,
activeThumbnailGeneration * types . ActiveThumbnailGeneration ,
maxThumbnailGenerators int ,
) * util . JSONResponse {
2017-05-26 16:49:54 +02:00
finalPath , duplicate , err := fileutils . MoveFileWithHashCheck ( tmpDir , r . MediaMetadata , absBasePath , r . Logger )
if err != nil {
r . Logger . WithError ( err ) . Error ( "Failed to move file." )
return & util . JSONResponse {
2018-03-13 16:55:45 +01:00
Code : http . StatusBadRequest ,
2017-05-31 07:12:22 +02:00
JSON : jsonerror . Unknown ( "Failed to upload" ) ,
2017-05-26 16:49:54 +02:00
}
}
if duplicate {
r . Logger . WithField ( "dst" , finalPath ) . Info ( "File was stored previously - discarding duplicate" )
}
2017-09-21 16:44:00 +02:00
if err = db . StoreMediaMetadata ( ctx , r . MediaMetadata ) ; err != nil {
2017-05-26 16:49:54 +02:00
r . Logger . WithError ( err ) . Warn ( "Failed to store metadata" )
// If the file is a duplicate (has the same hash as an existing file) then
// there is valid metadata in the database for that file. As such we only
// remove the file if it is not a duplicate.
2017-09-20 15:15:38 +02:00
if ! duplicate {
2017-05-26 17:15:54 +02:00
fileutils . RemoveDir ( types . Path ( path . Dir ( string ( finalPath ) ) ) , r . Logger )
2017-05-26 16:49:54 +02:00
}
return & util . JSONResponse {
2018-03-13 16:55:45 +01:00
Code : http . StatusBadRequest ,
2017-05-31 07:12:22 +02:00
JSON : jsonerror . Unknown ( "Failed to upload" ) ,
2017-05-26 16:49:54 +02:00
}
}
2017-06-07 01:12:49 +02:00
go func ( ) {
2017-09-21 16:44:00 +02:00
busy , err := thumbnailer . GenerateThumbnails (
context . Background ( ) , finalPath , thumbnailSizes , r . MediaMetadata ,
activeThumbnailGeneration , maxThumbnailGenerators , db , r . Logger ,
)
2017-06-07 01:12:49 +02:00
if err != nil {
r . Logger . WithError ( err ) . Warn ( "Error generating thumbnails" )
}
if busy {
r . Logger . Warn ( "Maximum number of active thumbnail generators reached. Skipping pre-generation." )
}
} ( )
2017-05-26 16:49:54 +02:00
return nil
}