Adjust gitea doctor --run storages to check all storage types (#21785)

The doctor check `storages` currently only checks the attachment
storage. This PR adds some basic garbage collection functionality for
the other types of storage.

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
zeripath 2022-11-15 08:08:59 +00:00 committed by GitHub
parent de6dfb7141
commit c772934ff6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 296 additions and 31 deletions

View file

@ -235,9 +235,9 @@ func LFSObjectAccessible(user *user_model.User, oid string) (bool, error) {
return count > 0, err
}
// LFSObjectIsAssociated checks if a provided Oid is associated
func LFSObjectIsAssociated(oid string) (bool, error) {
return db.GetEngine(db.DefaultContext).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}})
// ExistsLFSObject checks if a provided Oid exists within the DB
func ExistsLFSObject(ctx context.Context, oid string) (bool, error) {
return db.GetEngine(ctx).Exist(&LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}})
}
// LFSAutoAssociate auto associates accessible LFSMetaObjects

View file

@ -62,6 +62,13 @@ func GetBlobByID(ctx context.Context, blobID int64) (*PackageBlob, error) {
return pb, nil
}
// ExistPackageBlobWithSHA returns if a package blob exists with the provided sha
func ExistPackageBlobWithSHA(ctx context.Context, blobSha256 string) (bool, error) {
return db.GetEngine(ctx).Exist(&PackageBlob{
HashSHA256: blobSha256,
})
}
// FindExpiredUnreferencedBlobs gets all blobs without associated files older than the specific duration
func FindExpiredUnreferencedBlobs(ctx context.Context, olderThan time.Duration) ([]*PackageBlob, error) {
pbs := make([]*PackageBlob, 0, 10)

View file

@ -7,11 +7,14 @@ package repo
import (
"context"
"fmt"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
"xorm.io/builder"
)
@ -44,6 +47,28 @@ func (archiver *RepoArchiver) RelativePath() string {
return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String())
}
// repoArchiverForRelativePath takes a relativePath created from (archiver *RepoArchiver) RelativePath() and creates a shell repoArchiver struct representing it
func repoArchiverForRelativePath(relativePath string) (*RepoArchiver, error) {
parts := strings.SplitN(relativePath, "/", 3)
if len(parts) != 3 {
return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument}
}
repoID, err := strconv.ParseInt(parts[0], 10, 64)
if err != nil {
return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument}
}
nameExts := strings.SplitN(parts[2], ".", 2)
if len(nameExts) != 2 {
return nil, util.SilentWrap{Message: fmt.Sprintf("invalid storage path: %s", relativePath), Err: util.ErrInvalidArgument}
}
return &RepoArchiver{
RepoID: repoID,
CommitID: parts[1] + nameExts[0],
Type: git.ToArchiveType(nameExts[1]),
}, nil
}
var delRepoArchiver = new(RepoArchiver)
// DeleteRepoArchiver delete archiver
@ -65,6 +90,17 @@ func GetRepoArchiver(ctx context.Context, repoID int64, tp git.ArchiveType, comm
return nil, nil
}
// ExistsRepoArchiverWithStoragePath checks if there is a RepoArchiver for a given storage path
func ExistsRepoArchiverWithStoragePath(ctx context.Context, storagePath string) (bool, error) {
// We need to invert the path provided func (archiver *RepoArchiver) RelativePath() above
archiver, err := repoArchiverForRelativePath(storagePath)
if err != nil {
return false, err
}
return db.GetEngine(ctx).Exist(archiver)
}
// AddRepoArchiver adds an archiver
func AddRepoArchiver(ctx context.Context, archiver *RepoArchiver) error {
_, err := db.GetEngine(ctx).Insert(archiver)

View file

@ -122,9 +122,9 @@ func GetAttachmentsByUUIDs(ctx context.Context, uuids []string) ([]*Attachment,
return attachments, db.GetEngine(ctx).In("uuid", uuids).Find(&attachments)
}
// ExistAttachmentsByUUID returns true if attachment is exist by given UUID
func ExistAttachmentsByUUID(uuid string) (bool, error) {
return db.GetEngine(db.DefaultContext).Where("`uuid`=?", uuid).Exist(new(Attachment))
// ExistAttachmentsByUUID returns true if attachment exists with the given UUID
func ExistAttachmentsByUUID(ctx context.Context, uuid string) (bool, error) {
return db.GetEngine(ctx).Where("`uuid`=?", uuid).Exist(new(Attachment))
}
// GetAttachmentsByIssueID returns all attachments of an issue.

View file

@ -24,6 +24,13 @@ func (repo *Repository) CustomAvatarRelativePath() string {
return repo.Avatar
}
// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar
func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) {
// See func (repo *Repository) CustomAvatarRelativePath()
// repo.Avatar is used directly as the storage path - therefore we can check for existence directly using the path
return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(Repository))
}
// RelAvatarLink returns a relative link to the repository's avatar.
func (repo *Repository) RelAvatarLink() string {
return repo.relAvatarLink(db.DefaultContext)

View file

@ -111,3 +111,10 @@ func (u *User) IsUploadAvatarChanged(data []byte) bool {
avatarID := fmt.Sprintf("%x", md5.Sum([]byte(fmt.Sprintf("%d-%x", u.ID, md5.Sum(data)))))
return u.Avatar != avatarID
}
// ExistsWithAvatarAtStoragePath returns true if there is a user with this Avatar
func ExistsWithAvatarAtStoragePath(ctx context.Context, storagePath string) (bool, error) {
// See func (u *User) CustomAvatarRelativePath()
// u.Avatar is used directly as the storage path - therefore we can check for existence directly using the path
return db.GetEngine(ctx).Where("`avatar`=?", storagePath).Exist(new(User))
}

View file

@ -6,71 +6,255 @@ package doctor
import (
"context"
"errors"
"io/fs"
"strings"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/git"
"code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/log"
packages_module "code.gitea.io/gitea/modules/packages"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/util"
)
func checkAttachmentStorageFiles(logger log.Logger, autofix bool) error {
var total, garbageNum int
var deletePaths []string
if err := storage.Attachments.IterateObjects(func(p string, obj storage.Object) error {
type commonStorageCheckOptions struct {
storer storage.ObjectStorage
isOrphaned func(path string, obj storage.Object, stat fs.FileInfo) (bool, error)
name string
}
func commonCheckStorage(ctx context.Context, logger log.Logger, autofix bool, opts *commonStorageCheckOptions) error {
totalCount, orphanedCount := 0, 0
totalSize, orphanedSize := int64(0), int64(0)
var pathsToDelete []string
if err := opts.storer.IterateObjects(func(p string, obj storage.Object) error {
defer obj.Close()
total++
totalCount++
stat, err := obj.Stat()
if err != nil {
return err
}
exist, err := repo_model.ExistAttachmentsByUUID(stat.Name())
totalSize += stat.Size()
orphaned, err := opts.isOrphaned(p, obj, stat)
if err != nil {
return err
}
if !exist {
garbageNum++
if orphaned {
orphanedCount++
orphanedSize += stat.Size()
if autofix {
deletePaths = append(deletePaths, p)
pathsToDelete = append(pathsToDelete, p)
}
}
return nil
}); err != nil {
logger.Error("storage.Attachments.IterateObjects failed: %v", err)
logger.Error("Error whilst iterating %s storage: %v", opts.name, err)
return err
}
if garbageNum > 0 {
if orphanedCount > 0 {
if autofix {
var deletedNum int
for _, p := range deletePaths {
if err := storage.Attachments.Delete(p); err != nil {
log.Error("Delete attachment %s failed: %v", p, err)
for _, p := range pathsToDelete {
if err := opts.storer.Delete(p); err != nil {
log.Error("Error whilst deleting %s from %s storage: %v", p, opts.name, err)
} else {
deletedNum++
}
}
logger.Info("%d missed information attachment detected, %d deleted.", garbageNum, deletedNum)
logger.Info("Deleted %d/%d orphaned %s(s)", deletedNum, orphanedCount, opts.name)
} else {
logger.Warn("Checked %d attachment, %d missed information.", total, garbageNum)
logger.Warn("Found %d/%d (%s/%s) orphaned %s(s)", orphanedCount, totalCount, base.FileSize(orphanedSize), base.FileSize(totalSize), opts.name)
}
} else {
logger.Info("Found %d (%s) %s(s)", totalCount, base.FileSize(totalSize), opts.name)
}
return nil
}
func checkStorageFiles(ctx context.Context, logger log.Logger, autofix bool) error {
if err := storage.Init(); err != nil {
logger.Error("storage.Init failed: %v", err)
return err
type checkStorageOptions struct {
All bool
Attachments bool
LFS bool
Avatars bool
RepoAvatars bool
RepoArchives bool
Packages bool
}
// checkStorage will return a doctor check function to check the requested storage types for "orphaned" stored object/files and optionally delete them
func checkStorage(opts *checkStorageOptions) func(ctx context.Context, logger log.Logger, autofix bool) error {
return func(ctx context.Context, logger log.Logger, autofix bool) error {
if err := storage.Init(); err != nil {
logger.Error("storage.Init failed: %v", err)
return err
}
if opts.Attachments || opts.All {
if err := commonCheckStorage(ctx, logger, autofix,
&commonStorageCheckOptions{
storer: storage.Attachments,
isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
exists, err := repo.ExistAttachmentsByUUID(ctx, stat.Name())
return !exists, err
},
name: "attachment",
}); err != nil {
return err
}
}
if opts.LFS || opts.All {
if err := commonCheckStorage(ctx, logger, autofix,
&commonStorageCheckOptions{
storer: storage.LFS,
isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
// The oid of an LFS stored object is the name but with all the path.Separators removed
oid := strings.ReplaceAll(path, "/", "")
exists, err := git.ExistsLFSObject(ctx, oid)
return !exists, err
},
name: "LFS file",
}); err != nil {
return err
}
}
if opts.Avatars || opts.All {
if err := commonCheckStorage(ctx, logger, autofix,
&commonStorageCheckOptions{
storer: storage.Avatars,
isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
exists, err := user.ExistsWithAvatarAtStoragePath(ctx, path)
return !exists, err
},
name: "avatar",
}); err != nil {
return err
}
}
if opts.RepoAvatars || opts.All {
if err := commonCheckStorage(ctx, logger, autofix,
&commonStorageCheckOptions{
storer: storage.RepoAvatars,
isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
exists, err := repo.ExistsWithAvatarAtStoragePath(ctx, path)
return !exists, err
},
name: "repo avatar",
}); err != nil {
return err
}
}
if opts.RepoArchives || opts.All {
if err := commonCheckStorage(ctx, logger, autofix,
&commonStorageCheckOptions{
storer: storage.RepoAvatars,
isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
exists, err := repo.ExistsRepoArchiverWithStoragePath(ctx, path)
if err == nil || errors.Is(err, util.ErrInvalidArgument) {
// invalid arguments mean that the object is not a valid repo archiver and it should be removed
return !exists, nil
}
return !exists, err
},
name: "repo archive",
}); err != nil {
return err
}
}
if opts.Packages || opts.All {
if err := commonCheckStorage(ctx, logger, autofix,
&commonStorageCheckOptions{
storer: storage.Packages,
isOrphaned: func(path string, obj storage.Object, stat fs.FileInfo) (bool, error) {
key, err := packages_module.RelativePathToKey(path)
if err != nil {
// If there is an error here then the relative path does not match a valid package
// Therefore it is orphaned by default
return true, nil
}
exists, err := packages.ExistPackageBlobWithSHA(ctx, string(key))
return !exists, err
},
name: "package blob",
}); err != nil {
return err
}
}
return nil
}
return checkAttachmentStorageFiles(logger, autofix)
}
func init() {
Register(&Check{
Title: "Check if there is garbage storage files",
Title: "Check if there are orphaned storage files",
Name: "storages",
IsDefault: false,
Run: checkStorageFiles,
Run: checkStorage(&checkStorageOptions{All: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
Register(&Check{
Title: "Check if there are orphaned attachments in storage",
Name: "storage-attachments",
IsDefault: false,
Run: checkStorage(&checkStorageOptions{Attachments: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
Register(&Check{
Title: "Check if there are orphaned lfs files in storage",
Name: "storage-lfs",
IsDefault: false,
Run: checkStorage(&checkStorageOptions{LFS: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
Register(&Check{
Title: "Check if there are orphaned avatars in storage",
Name: "storage-avatars",
IsDefault: false,
Run: checkStorage(&checkStorageOptions{Avatars: true, RepoAvatars: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
Register(&Check{
Title: "Check if there are orphaned archives in storage",
Name: "storage-archives",
IsDefault: false,
Run: checkStorage(&checkStorageOptions{RepoArchives: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,
})
Register(&Check{
Title: "Check if there are orphaned package blobs in storage",
Name: "storage-packages",
IsDefault: false,
Run: checkStorage(&checkStorageOptions{Packages: true}),
AbortIfFailed: false,
SkipDatabaseInitialization: false,
Priority: 1,

View file

@ -38,6 +38,18 @@ func (a ArchiveType) String() string {
return "unknown"
}
func ToArchiveType(s string) ArchiveType {
switch s {
case "zip":
return ZIP
case "tar.gz":
return TARGZ
case "bundle":
return BUNDLE
}
return 0
}
// CreateArchive create archive content to the target path
func (repo *Repository) CreateArchive(ctx context.Context, format ArchiveType, target io.Writer, usePrefix bool, commitID string) error {
if format.String() == "unknown" {

View file

@ -7,8 +7,10 @@ package packages
import (
"io"
"path"
"strings"
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/modules/util"
)
// BlobHash256Key is the key to address a blob content
@ -45,3 +47,13 @@ func (s *ContentStore) Delete(key BlobHash256Key) error {
func KeyToRelativePath(key BlobHash256Key) string {
return path.Join(string(key)[0:2], string(key)[2:4], string(key))
}
// RelativePathToKey converts a relative path aa/bb/aabb000000... to the sha256 key aabb000000...
func RelativePathToKey(relativePath string) (BlobHash256Key, error) {
parts := strings.SplitN(relativePath, "/", 3)
if len(parts) != 3 || len(parts[0]) != 2 || len(parts[1]) != 2 || len(parts[2]) < 4 || parts[0]+parts[1] != parts[2][0:4] {
return "", util.ErrInvalidArgument
}
return BlobHash256Key(parts[2]), nil
}

View file

@ -478,7 +478,7 @@ func LFSPointerFiles(ctx *context.Context) {
return err
}
if !result.Associatable {
associated, err := git_model.LFSObjectIsAssociated(pointerBlob.Oid)
associated, err := git_model.ExistsLFSObject(ctx, pointerBlob.Oid)
if err != nil {
return err
}