forgejo/services/cron/tasks_extended.go
wxiaoguang 6bc3079c00
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)

## Review without space diff

https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1

## Purpose of this PR

1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command

## The main idea of this PR

* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
    * Before: `AddArguments("-m").AddDynamicArguments(message)`
    * After: `AddOptionValues("-m", message)`
    * -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`

## FAQ

### Why these changes were not done in #21535 ?

#21535 is mainly a search&replace, it did its best to not change too
much logic.

Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.


### The naming of `AddOptionXxx`

According to git's manual, the `--xxx` part is called `option`.

### How can it guarantee that `internal.CmdArg` won't be not misused?

Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.

And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.

### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?

Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.

### Why there was a `CmdArgCheck` and why it's removed?

At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.


### Why many codes for `signArg == ""` is deleted?

Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-04 10:30:43 +08:00

230 lines
7.2 KiB
Go

// Copyright 2020 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package cron
import (
"context"
"time"
activities_model "code.gitea.io/gitea/models/activities"
asymkey_model "code.gitea.io/gitea/models/asymkey"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/system"
user_model "code.gitea.io/gitea/models/user"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/updatechecker"
repo_service "code.gitea.io/gitea/services/repository"
archiver_service "code.gitea.io/gitea/services/repository/archiver"
user_service "code.gitea.io/gitea/services/user"
)
func registerDeleteInactiveUsers() {
RegisterTaskFatal("delete_inactive_accounts", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@annually",
},
OlderThan: time.Minute * time.Duration(setting.Service.ActiveCodeLives),
}, func(ctx context.Context, _ *user_model.User, config Config) error {
olderThanConfig := config.(*OlderThanConfig)
return user_service.DeleteInactiveUsers(ctx, olderThanConfig.OlderThan)
})
}
func registerDeleteRepositoryArchives() {
RegisterTaskFatal("delete_repo_archives", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@annually",
}, func(ctx context.Context, _ *user_model.User, _ Config) error {
return archiver_service.DeleteRepositoryArchives(ctx)
})
}
func registerGarbageCollectRepositories() {
type RepoHealthCheckConfig struct {
BaseConfig
Timeout time.Duration
Args []string `delim:" "`
}
RegisterTaskFatal("git_gc_repos", &RepoHealthCheckConfig{
BaseConfig: BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
},
Timeout: time.Duration(setting.Git.Timeout.GC) * time.Second,
Args: setting.Git.GCArgs,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
rhcConfig := config.(*RepoHealthCheckConfig)
// the git args are set by config, they can be safe to be trusted
return repo_service.GitGcRepos(ctx, rhcConfig.Timeout, git.ToTrustedCmdArgs(rhcConfig.Args))
})
}
func registerRewriteAllPublicKeys() {
RegisterTaskFatal("resync_all_sshkeys", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
}, func(_ context.Context, _ *user_model.User, _ Config) error {
return asymkey_model.RewriteAllPublicKeys()
})
}
func registerRewriteAllPrincipalKeys() {
RegisterTaskFatal("resync_all_sshprincipals", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
}, func(_ context.Context, _ *user_model.User, _ Config) error {
return asymkey_model.RewriteAllPrincipalKeys(db.DefaultContext)
})
}
func registerRepositoryUpdateHook() {
RegisterTaskFatal("resync_all_hooks", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
}, func(ctx context.Context, _ *user_model.User, _ Config) error {
return repo_service.SyncRepositoryHooks(ctx)
})
}
func registerReinitMissingRepositories() {
RegisterTaskFatal("reinit_missing_repos", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
}, func(ctx context.Context, _ *user_model.User, _ Config) error {
return repo_service.ReinitMissingRepositories(ctx)
})
}
func registerDeleteMissingRepositories() {
RegisterTaskFatal("delete_missing_repos", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
}, func(ctx context.Context, user *user_model.User, _ Config) error {
return repo_service.DeleteMissingRepositories(ctx, user)
})
}
func registerRemoveRandomAvatars() {
RegisterTaskFatal("delete_generated_repository_avatars", &BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 72h",
}, func(ctx context.Context, _ *user_model.User, _ Config) error {
return repo_service.RemoveRandomAvatars(ctx)
})
}
func registerDeleteOldActions() {
RegisterTaskFatal("delete_old_actions", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 168h",
},
OlderThan: 365 * 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
olderThanConfig := config.(*OlderThanConfig)
return activities_model.DeleteOldActions(olderThanConfig.OlderThan)
})
}
func registerUpdateGiteaChecker() {
type UpdateCheckerConfig struct {
BaseConfig
HTTPEndpoint string
}
RegisterTaskFatal("update_checker", &UpdateCheckerConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: false,
Schedule: "@every 168h",
},
HTTPEndpoint: "https://dl.gitea.io/gitea/version.json",
}, func(ctx context.Context, _ *user_model.User, config Config) error {
updateCheckerConfig := config.(*UpdateCheckerConfig)
return updatechecker.GiteaUpdateChecker(updateCheckerConfig.HTTPEndpoint)
})
}
func registerDeleteOldSystemNotices() {
RegisterTaskFatal("delete_old_system_notices", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 168h",
},
OlderThan: 365 * 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
olderThanConfig := config.(*OlderThanConfig)
return system.DeleteOldSystemNotices(olderThanConfig.OlderThan)
})
}
func registerGCLFS() {
if !setting.LFS.StartServer {
return
}
type GCLFSConfig struct {
OlderThanConfig
LastUpdatedMoreThanAgo time.Duration
NumberToCheckPerRepo int64
ProportionToCheckPerRepo float64
}
RegisterTaskFatal("gc_lfs", &GCLFSConfig{
OlderThanConfig: OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: false,
RunAtStart: false,
Schedule: "@every 24h",
},
// Only attempt to garbage collect lfs meta objects older than a week as the order of git lfs upload
// and git object upload is not necessarily guaranteed. It's possible to imagine a situation whereby
// an LFS object is uploaded but the git branch is not uploaded immediately, or there are some rapid
// changes in new branches that might lead to lfs objects becoming temporarily unassociated with git
// objects.
//
// It is likely that a week is potentially excessive but it should definitely be enough that any
// unassociated LFS object is genuinely unassociated.
OlderThan: 24 * time.Hour * 7,
},
// Only GC things that haven't been looked at in the past 3 days
LastUpdatedMoreThanAgo: 24 * time.Hour * 3,
NumberToCheckPerRepo: 100,
ProportionToCheckPerRepo: 0.6,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
gcLFSConfig := config.(*GCLFSConfig)
return repo_service.GarbageCollectLFSMetaObjects(ctx, repo_service.GarbageCollectLFSMetaObjectsOptions{
AutoFix: true,
OlderThan: time.Now().Add(-gcLFSConfig.OlderThan),
UpdatedLessRecentlyThan: time.Now().Add(-gcLFSConfig.LastUpdatedMoreThanAgo),
})
})
}
func initExtendedTasks() {
registerDeleteInactiveUsers()
registerDeleteRepositoryArchives()
registerGarbageCollectRepositories()
registerRewriteAllPublicKeys()
registerRewriteAllPrincipalKeys()
registerRepositoryUpdateHook()
registerReinitMissingRepositories()
registerDeleteMissingRepositories()
registerRemoveRandomAvatars()
registerDeleteOldActions()
registerUpdateGiteaChecker()
registerDeleteOldSystemNotices()
registerGCLFS()
}