Artifacts retention and auto clean up (#26131)

Currently, Artifact does not have an expiration and automatic cleanup
mechanism, and this feature needs to be added. It contains the following
key points:

- [x] add global artifact retention days option in config file. Default
value is 90 days.
- [x] add cron task to clean up expired artifacts. It should run once a
day.
- [x] support custom retention period from `retention-days: 5` in
`upload-artifact@v3`.
- [x] artifacts link in actions view should be non-clickable text when
expired.
This commit is contained in:
FuXiaoHei 2023-09-06 15:41:06 +08:00 committed by GitHub
parent 113eb5fc24
commit 460a2b0edf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 221 additions and 25 deletions

View File

@ -2564,6 +2564,8 @@ LEVEL = Info
;; ;;
;; Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance. ;; Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance.
;DEFAULT_ACTIONS_URL = github ;DEFAULT_ACTIONS_URL = github
;; Default artifact retention time in days, default is 90 days
;ARTIFACT_RETENTION_DAYS = 90
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@ -955,6 +955,12 @@ Default templates for project boards:
- `SCHEDULE`: **@midnight** : Interval as a duration between each synchronization, it will always attempt synchronization when the instance starts. - `SCHEDULE`: **@midnight** : Interval as a duration between each synchronization, it will always attempt synchronization when the instance starts.
- `UPDATE_EXISTING`: **true**: Create new users, update existing user data and disable users that are not in external source anymore (default) or only create new users if UPDATE_EXISTING is set to false. - `UPDATE_EXISTING`: **true**: Create new users, update existing user data and disable users that are not in external source anymore (default) or only create new users if UPDATE_EXISTING is set to false.
## Cron - Cleanup Expired Actions Assets (`cron.cleanup_actions`)
- `ENABLED`: **true**: Enable cleanup expired actions assets job.
- `RUN_AT_START`: **true**: Run job at start time (if ENABLED).
- `SCHEDULE`: **@midnight** : Cron syntax for the job.
### Extended cron tasks (not enabled by default) ### Extended cron tasks (not enabled by default)
#### Cron - Garbage collect all repositories (`cron.git_gc_repos`) #### Cron - Garbage collect all repositories (`cron.git_gc_repos`)
@ -1381,6 +1387,7 @@ PROXY_HOSTS = *.github.com
- `DEFAULT_ACTIONS_URL`: **github**: Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance. - `DEFAULT_ACTIONS_URL`: **github**: Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance.
- `STORAGE_TYPE`: **local**: Storage type for actions logs, `local` for local disk or `minio` for s3 compatible object storage service, default is `local` or other name defined with `[storage.xxx]` - `STORAGE_TYPE`: **local**: Storage type for actions logs, `local` for local disk or `minio` for s3 compatible object storage service, default is `local` or other name defined with `[storage.xxx]`
- `MINIO_BASE_PATH`: **actions_log/**: Minio base path on the bucket only available when STORAGE_TYPE is `minio` - `MINIO_BASE_PATH`: **actions_log/**: Minio base path on the bucket only available when STORAGE_TYPE is `minio`
- `ARTIFACT_RETENTION_DAYS`: **90**: Number of days to keep artifacts. Set to 0 to disable artifact retention. Default is 90 days if not set.
`DEFAULT_ACTIONS_URL` indicates where the Gitea Actions runners should find the actions with relative path. `DEFAULT_ACTIONS_URL` indicates where the Gitea Actions runners should find the actions with relative path.
For example, `uses: actions/checkout@v3` means `https://github.com/actions/checkout@v3` since the value of `DEFAULT_ACTIONS_URL` is `github`. For example, `uses: actions/checkout@v3` means `https://github.com/actions/checkout@v3` since the value of `DEFAULT_ACTIONS_URL` is `github`.

View File

@ -9,19 +9,21 @@ package actions
import ( import (
"context" "context"
"errors" "errors"
"time"
"code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util"
) )
// ArtifactStatus is the status of an artifact, uploading, expired or need-delete
type ArtifactStatus int64
const ( const (
// ArtifactStatusUploadPending is the status of an artifact upload that is pending ArtifactStatusUploadPending ArtifactStatus = iota + 1 // 1 ArtifactStatusUploadPending is the status of an artifact upload that is pending
ArtifactStatusUploadPending = 1 ArtifactStatusUploadConfirmed // 2 ArtifactStatusUploadConfirmed is the status of an artifact upload that is confirmed
// ArtifactStatusUploadConfirmed is the status of an artifact upload that is confirmed ArtifactStatusUploadError // 3 ArtifactStatusUploadError is the status of an artifact upload that is errored
ArtifactStatusUploadConfirmed = 2 ArtifactStatusExpired // 4, ArtifactStatusExpired is the status of an artifact that is expired
// ArtifactStatusUploadError is the status of an artifact upload that is errored
ArtifactStatusUploadError = 3
) )
func init() { func init() {
@ -45,9 +47,10 @@ type ActionArtifact struct {
Status int64 `xorm:"index"` // The status of the artifact, uploading, expired or need-delete Status int64 `xorm:"index"` // The status of the artifact, uploading, expired or need-delete
CreatedUnix timeutil.TimeStamp `xorm:"created"` CreatedUnix timeutil.TimeStamp `xorm:"created"`
UpdatedUnix timeutil.TimeStamp `xorm:"updated index"` UpdatedUnix timeutil.TimeStamp `xorm:"updated index"`
ExpiredUnix timeutil.TimeStamp `xorm:"index"` // The time when the artifact will be expired
} }
func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string) (*ActionArtifact, error) { func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string, expiredDays int64) (*ActionArtifact, error) {
if err := t.LoadJob(ctx); err != nil { if err := t.LoadJob(ctx); err != nil {
return nil, err return nil, err
} }
@ -61,7 +64,8 @@ func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPa
RepoID: t.RepoID, RepoID: t.RepoID,
OwnerID: t.OwnerID, OwnerID: t.OwnerID,
CommitSHA: t.CommitSHA, CommitSHA: t.CommitSHA,
Status: ArtifactStatusUploadPending, Status: int64(ArtifactStatusUploadPending),
ExpiredUnix: timeutil.TimeStamp(time.Now().Unix() + 3600*24*expiredDays),
} }
if _, err := db.GetEngine(ctx).Insert(artifact); err != nil { if _, err := db.GetEngine(ctx).Insert(artifact); err != nil {
return nil, err return nil, err
@ -126,15 +130,16 @@ func ListUploadedArtifactsByRunID(ctx context.Context, runID int64) ([]*ActionAr
type ActionArtifactMeta struct { type ActionArtifactMeta struct {
ArtifactName string ArtifactName string
FileSize int64 FileSize int64
Status int64
} }
// ListUploadedArtifactsMeta returns all uploaded artifacts meta of a run // ListUploadedArtifactsMeta returns all uploaded artifacts meta of a run
func ListUploadedArtifactsMeta(ctx context.Context, runID int64) ([]*ActionArtifactMeta, error) { func ListUploadedArtifactsMeta(ctx context.Context, runID int64) ([]*ActionArtifactMeta, error) {
arts := make([]*ActionArtifactMeta, 0, 10) arts := make([]*ActionArtifactMeta, 0, 10)
return arts, db.GetEngine(ctx).Table("action_artifact"). return arts, db.GetEngine(ctx).Table("action_artifact").
Where("run_id=? AND status=?", runID, ArtifactStatusUploadConfirmed). Where("run_id=? AND (status=? OR status=?)", runID, ArtifactStatusUploadConfirmed, ArtifactStatusExpired).
GroupBy("artifact_name"). GroupBy("artifact_name").
Select("artifact_name, sum(file_size) as file_size"). Select("artifact_name, sum(file_size) as file_size, max(status) as status").
Find(&arts) Find(&arts)
} }
@ -149,3 +154,16 @@ func ListArtifactsByRunIDAndName(ctx context.Context, runID int64, name string)
arts := make([]*ActionArtifact, 0, 10) arts := make([]*ActionArtifact, 0, 10)
return arts, db.GetEngine(ctx).Where("run_id=? AND artifact_name=?", runID, name).Find(&arts) return arts, db.GetEngine(ctx).Where("run_id=? AND artifact_name=?", runID, name).Find(&arts)
} }
// ListNeedExpiredArtifacts returns all need expired artifacts but not deleted
func ListNeedExpiredArtifacts(ctx context.Context) ([]*ActionArtifact, error) {
arts := make([]*ActionArtifact, 0, 10)
return arts, db.GetEngine(ctx).
Where("expired_unix < ? AND status = ?", timeutil.TimeStamp(time.Now().Unix()), ArtifactStatusUploadConfirmed).Find(&arts)
}
// SetArtifactExpired sets an artifact to expired
func SetArtifactExpired(ctx context.Context, artifactID int64) error {
_, err := db.GetEngine(ctx).Where("id=? AND status = ?", artifactID, ArtifactStatusUploadConfirmed).Cols("status").Update(&ActionArtifact{Status: int64(ArtifactStatusExpired)})
return err
}

View File

@ -528,6 +528,8 @@ var migrations = []Migration{
NewMigration("Add Version to ActionRun table", v1_21.AddVersionToActionRunTable), NewMigration("Add Version to ActionRun table", v1_21.AddVersionToActionRunTable),
// v273 -> v274 // v273 -> v274
NewMigration("Add Action Schedule Table", v1_21.AddActionScheduleTable), NewMigration("Add Action Schedule Table", v1_21.AddActionScheduleTable),
// v274 -> v275
NewMigration("Add Actions artifacts expiration date", v1_21.AddExpiredUnixColumnInActionArtifactTable),
} }
// GetCurrentDBVersion returns the current db version // GetCurrentDBVersion returns the current db version

View File

@ -0,0 +1,36 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package v1_21 //nolint
import (
"time"
"code.gitea.io/gitea/modules/timeutil"
"xorm.io/xorm"
)
func AddExpiredUnixColumnInActionArtifactTable(x *xorm.Engine) error {
type ActionArtifact struct {
ExpiredUnix timeutil.TimeStamp `xorm:"index"` // time when the artifact will be expired
}
if err := x.Sync(new(ActionArtifact)); err != nil {
return err
}
return updateArtifactsExpiredUnixTo90Days(x)
}
func updateArtifactsExpiredUnixTo90Days(x *xorm.Engine) error {
sess := x.NewSession()
defer sess.Close()
if err := sess.Begin(); err != nil {
return err
}
expiredTime := time.Now().AddDate(0, 0, 90).Unix()
if _, err := sess.Exec(`UPDATE action_artifact SET expired_unix=? WHERE status='2' AND expired_unix is NULL`, expiredTime); err != nil {
return err
}
return sess.Commit()
}

View File

@ -15,6 +15,7 @@ var (
Actions = struct { Actions = struct {
LogStorage *Storage // how the created logs should be stored LogStorage *Storage // how the created logs should be stored
ArtifactStorage *Storage // how the created artifacts should be stored ArtifactStorage *Storage // how the created artifacts should be stored
ArtifactRetentionDays int64 `ini:"ARTIFACT_RETENTION_DAYS"`
Enabled bool Enabled bool
DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"` DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"`
}{ }{
@ -76,5 +77,10 @@ func loadActionsFrom(rootCfg ConfigProvider) error {
Actions.ArtifactStorage, err = getStorage(rootCfg, "actions_artifacts", "", actionsSec) Actions.ArtifactStorage, err = getStorage(rootCfg, "actions_artifacts", "", actionsSec)
// default to 90 days in Github Actions
if Actions.ArtifactRetentionDays <= 0 {
Actions.ArtifactRetentionDays = 90
}
return err return err
} }

View File

@ -2731,6 +2731,7 @@ dashboard.reinit_missing_repos = Reinitialize all missing Git repositories for w
dashboard.sync_external_users = Synchronize external user data dashboard.sync_external_users = Synchronize external user data
dashboard.cleanup_hook_task_table = Cleanup hook_task table dashboard.cleanup_hook_task_table = Cleanup hook_task table
dashboard.cleanup_packages = Cleanup expired packages dashboard.cleanup_packages = Cleanup expired packages
dashboard.cleanup_actions = Cleanup actions expired logs and artifacts
dashboard.server_uptime = Server Uptime dashboard.server_uptime = Server Uptime
dashboard.current_goroutine = Current Goroutines dashboard.current_goroutine = Current Goroutines
dashboard.current_memory_usage = Current Memory Usage dashboard.current_memory_usage = Current Memory Usage

View File

@ -172,6 +172,7 @@ func (ar artifactRoutes) buildArtifactURL(runID int64, artifactHash, suffix stri
type getUploadArtifactRequest struct { type getUploadArtifactRequest struct {
Type string Type string
Name string Name string
RetentionDays int64
} }
type getUploadArtifactResponse struct { type getUploadArtifactResponse struct {
@ -192,10 +193,16 @@ func (ar artifactRoutes) getUploadArtifactURL(ctx *ArtifactContext) {
return return
} }
// set retention days
retentionQuery := ""
if req.RetentionDays > 0 {
retentionQuery = fmt.Sprintf("?retentionDays=%d", req.RetentionDays)
}
// use md5(artifact_name) to create upload url // use md5(artifact_name) to create upload url
artifactHash := fmt.Sprintf("%x", md5.Sum([]byte(req.Name))) artifactHash := fmt.Sprintf("%x", md5.Sum([]byte(req.Name)))
resp := getUploadArtifactResponse{ resp := getUploadArtifactResponse{
FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"), FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"+retentionQuery),
} }
log.Debug("[artifact] get upload url: %s", resp.FileContainerResourceURL) log.Debug("[artifact] get upload url: %s", resp.FileContainerResourceURL)
ctx.JSON(http.StatusOK, resp) ctx.JSON(http.StatusOK, resp)
@ -219,8 +226,21 @@ func (ar artifactRoutes) uploadArtifact(ctx *ArtifactContext) {
return return
} }
// get artifact retention days
expiredDays := setting.Actions.ArtifactRetentionDays
if queryRetentionDays := ctx.Req.URL.Query().Get("retentionDays"); queryRetentionDays != "" {
expiredDays, err = strconv.ParseInt(queryRetentionDays, 10, 64)
if err != nil {
log.Error("Error parse retention days: %v", err)
ctx.Error(http.StatusBadRequest, "Error parse retention days")
return
}
}
log.Debug("[artifact] upload chunk, name: %s, path: %s, size: %d, retention days: %d",
artifactName, artifactPath, fileRealTotalSize, expiredDays)
// create or get artifact with name and path // create or get artifact with name and path
artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath) artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath, expiredDays)
if err != nil { if err != nil {
log.Error("Error create or get artifact: %v", err) log.Error("Error create or get artifact: %v", err)
ctx.Error(http.StatusInternalServerError, "Error create or get artifact") ctx.Error(http.StatusInternalServerError, "Error create or get artifact")

View File

@ -179,7 +179,7 @@ func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st st
// save storage path to artifact // save storage path to artifact
log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath) log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath)
artifact.StoragePath = storagePath artifact.StoragePath = storagePath
artifact.Status = actions.ArtifactStatusUploadConfirmed artifact.Status = int64(actions.ArtifactStatusUploadConfirmed)
if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil { if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil {
return fmt.Errorf("update artifact error: %v", err) return fmt.Errorf("update artifact error: %v", err)
} }

View File

@ -488,6 +488,7 @@ type ArtifactsViewResponse struct {
type ArtifactsViewItem struct { type ArtifactsViewItem struct {
Name string `json:"name"` Name string `json:"name"`
Size int64 `json:"size"` Size int64 `json:"size"`
Status string `json:"status"`
} }
func ArtifactsView(ctx *context_module.Context) { func ArtifactsView(ctx *context_module.Context) {
@ -510,9 +511,14 @@ func ArtifactsView(ctx *context_module.Context) {
Artifacts: make([]*ArtifactsViewItem, 0, len(artifacts)), Artifacts: make([]*ArtifactsViewItem, 0, len(artifacts)),
} }
for _, art := range artifacts { for _, art := range artifacts {
status := "completed"
if art.Status == int64(actions_model.ArtifactStatusExpired) {
status = "expired"
}
artifactsResponse.Artifacts = append(artifactsResponse.Artifacts, &ArtifactsViewItem{ artifactsResponse.Artifacts = append(artifactsResponse.Artifacts, &ArtifactsViewItem{
Name: art.ArtifactName, Name: art.ArtifactName,
Size: art.FileSize, Size: art.FileSize,
Status: status,
}) })
} }
ctx.JSON(http.StatusOK, artifactsResponse) ctx.JSON(http.StatusOK, artifactsResponse)

View File

@ -0,0 +1,42 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"time"
"code.gitea.io/gitea/models/actions"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
)
// Cleanup removes expired actions logs, data and artifacts
func Cleanup(taskCtx context.Context, olderThan time.Duration) error {
// TODO: clean up expired actions logs
// clean up expired artifacts
return CleanupArtifacts(taskCtx)
}
// CleanupArtifacts removes expired artifacts and set records expired status
func CleanupArtifacts(taskCtx context.Context) error {
artifacts, err := actions.ListNeedExpiredArtifacts(taskCtx)
if err != nil {
return err
}
log.Info("Found %d expired artifacts", len(artifacts))
for _, artifact := range artifacts {
if err := storage.ActionsArtifacts.Delete(artifact.StoragePath); err != nil {
log.Error("Cannot delete artifact %d: %v", artifact.ID, err)
continue
}
if err := actions.SetArtifactExpired(taskCtx, artifact.ID); err != nil {
log.Error("Cannot set artifact %d expired: %v", artifact.ID, err)
continue
}
log.Info("Artifact %d set expired", artifact.ID)
}
return nil
}

View File

@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/models/webhook" "code.gitea.io/gitea/models/webhook"
"code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/actions"
"code.gitea.io/gitea/services/auth" "code.gitea.io/gitea/services/auth"
"code.gitea.io/gitea/services/migrations" "code.gitea.io/gitea/services/migrations"
mirror_service "code.gitea.io/gitea/services/mirror" mirror_service "code.gitea.io/gitea/services/mirror"
@ -156,6 +157,20 @@ func registerCleanupPackages() {
}) })
} }
func registerActionsCleanup() {
RegisterTaskFatal("cleanup_actions", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
},
OlderThan: 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
realConfig := config.(*OlderThanConfig)
return actions.Cleanup(ctx, realConfig.OlderThan)
})
}
func initBasicTasks() { func initBasicTasks() {
if setting.Mirror.Enabled { if setting.Mirror.Enabled {
registerUpdateMirrorTask() registerUpdateMirrorTask()
@ -172,4 +187,7 @@ func initBasicTasks() {
if setting.Packages.Enabled { if setting.Packages.Enabled {
registerCleanupPackages() registerCleanupPackages()
} }
if setting.Actions.Enabled {
registerActionsCleanup()
}
} }

View File

@ -20,6 +20,7 @@ type uploadArtifactResponse struct {
type getUploadArtifactRequest struct { type getUploadArtifactRequest struct {
Type string Type string
Name string Name string
RetentionDays int64
} }
func TestActionsArtifactUploadSingleFile(t *testing.T) { func TestActionsArtifactUploadSingleFile(t *testing.T) {
@ -252,3 +253,40 @@ func TestActionsArtifactDownloadMultiFiles(t *testing.T) {
assert.Equal(t, resp.Body.String(), body) assert.Equal(t, resp.Body.String(), body)
} }
} }
func TestActionsArtifactUploadWithRetentionDays(t *testing.T) {
defer tests.PrepareTestEnv(t)()
// acquire artifact upload url
req := NewRequestWithJSON(t, "POST", "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts", getUploadArtifactRequest{
Type: "actions_storage",
Name: "artifact-retention-days",
RetentionDays: 9,
})
req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a")
resp := MakeRequest(t, req, http.StatusOK)
var uploadResp uploadArtifactResponse
DecodeJSON(t, resp, &uploadResp)
assert.Contains(t, uploadResp.FileContainerResourceURL, "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts")
assert.Contains(t, uploadResp.FileContainerResourceURL, "?retentionDays=9")
// get upload url
idx := strings.Index(uploadResp.FileContainerResourceURL, "/api/actions_pipeline/_apis/pipelines/")
url := uploadResp.FileContainerResourceURL[idx:] + "&itemPath=artifact-retention-days/abc.txt"
// upload artifact chunk
body := strings.Repeat("A", 1024)
req = NewRequestWithBody(t, "PUT", url, strings.NewReader(body))
req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a")
req.Header.Add("Content-Range", "bytes 0-1023/1024")
req.Header.Add("x-tfs-filelength", "1024")
req.Header.Add("x-actions-results-md5", "1HsSe8LeLWh93ILaw1TEFQ==") // base64(md5(body))
MakeRequest(t, req, http.StatusOK)
t.Logf("Create artifact confirm")
// confirm artifact upload
req = NewRequest(t, "PATCH", "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts?artifactName=artifact-retention-days")
req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a")
MakeRequest(t, req, http.StatusOK)
}