2020-01-12 05:11:17 -07:00
|
|
|
// Copyright 2019 The Gitea Authors. All rights reserved.
|
2022-11-27 11:20:29 -07:00
|
|
|
// SPDX-License-Identifier: MIT
|
2020-01-12 05:11:17 -07:00
|
|
|
|
|
|
|
package repository
|
|
|
|
|
|
|
|
import (
|
2021-09-23 09:45:36 -06:00
|
|
|
"context"
|
2020-01-12 05:11:17 -07:00
|
|
|
"fmt"
|
2022-06-06 02:01:49 -06:00
|
|
|
"os"
|
|
|
|
"path"
|
2023-01-13 11:54:02 -07:00
|
|
|
"path/filepath"
|
2020-01-12 05:11:17 -07:00
|
|
|
"strings"
|
|
|
|
|
2022-08-24 20:31:57 -06:00
|
|
|
activities_model "code.gitea.io/gitea/models/activities"
|
2021-09-19 05:49:59 -06:00
|
|
|
"code.gitea.io/gitea/models/db"
|
2022-06-12 09:51:54 -06:00
|
|
|
git_model "code.gitea.io/gitea/models/git"
|
2022-06-06 02:01:49 -06:00
|
|
|
access_model "code.gitea.io/gitea/models/perm/access"
|
2021-12-09 18:27:50 -07:00
|
|
|
repo_model "code.gitea.io/gitea/models/repo"
|
Refactor and enhance issue indexer to support both searching, filtering and paging (#26012)
Fix #24662.
Replace #24822 and #25708 (although it has been merged)
## Background
In the past, Gitea supported issue searching with a keyword and
conditions in a less efficient way. It worked by searching for issues
with the keyword and obtaining limited IDs (as it is heavy to get all)
on the indexer (bleve/elasticsearch/meilisearch), and then querying with
conditions on the database to find a subset of the found IDs. This is
why the results could be incomplete.
To solve this issue, we need to store all fields that could be used as
conditions in the indexer and support both keyword and additional
conditions when searching with the indexer.
## Major changes
- Redefine `IndexerData` to include all fields that could be used as
filter conditions.
- Refactor `Search(ctx context.Context, kw string, repoIDs []int64,
limit, start int, state string)` to `Search(ctx context.Context, options
*SearchOptions)`, so it supports more conditions now.
- Change the data type stored in `issueIndexerQueue`. Use
`IndexerMetadata` instead of `IndexerData` in case the data has been
updated while it is in the queue. This also reduces the storage size of
the queue.
- Enhance searching with Bleve/Elasticsearch/Meilisearch, make them
fully support `SearchOptions`. Also, update the data versions.
- Keep most logic of database indexer, but remove
`issues.SearchIssueIDsByKeyword` in `models` to avoid confusion where is
the entry point to search issues.
- Start a Meilisearch instance to test it in unit tests.
- Add unit tests with almost full coverage to test
Bleve/Elasticsearch/Meilisearch indexer.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-07-31 00:28:53 -06:00
|
|
|
issue_indexer "code.gitea.io/gitea/modules/indexer/issues"
|
2020-01-12 05:11:17 -07:00
|
|
|
"code.gitea.io/gitea/modules/log"
|
2022-06-06 02:01:49 -06:00
|
|
|
api "code.gitea.io/gitea/modules/structs"
|
2020-08-11 14:05:34 -06:00
|
|
|
"code.gitea.io/gitea/modules/util"
|
2020-01-12 05:11:17 -07:00
|
|
|
)
|
|
|
|
|
2023-01-13 11:54:02 -07:00
|
|
|
const notRegularFileMode = os.ModeSymlink | os.ModeNamedPipe | os.ModeSocket | os.ModeDevice | os.ModeCharDevice | os.ModeIrregular
|
|
|
|
|
|
|
|
// getDirectorySize returns the disk consumption for a given path
|
|
|
|
func getDirectorySize(path string) (int64, error) {
|
|
|
|
var size int64
|
2023-11-28 22:08:58 -07:00
|
|
|
err := filepath.WalkDir(path, func(_ string, entry os.DirEntry, err error) error {
|
|
|
|
if os.IsNotExist(err) { // ignore the error because some files (like temp/lock file) may be deleted during traversing.
|
|
|
|
return nil
|
|
|
|
} else if err != nil {
|
2023-01-13 11:54:02 -07:00
|
|
|
return err
|
|
|
|
}
|
2023-11-28 22:08:58 -07:00
|
|
|
if entry.IsDir() {
|
2023-01-13 11:54:02 -07:00
|
|
|
return nil
|
|
|
|
}
|
2023-11-28 22:08:58 -07:00
|
|
|
info, err := entry.Info()
|
|
|
|
if os.IsNotExist(err) { // ignore the error as above
|
|
|
|
return nil
|
|
|
|
} else if err != nil {
|
2023-01-13 11:54:02 -07:00
|
|
|
return err
|
|
|
|
}
|
2023-11-28 22:08:58 -07:00
|
|
|
if (info.Mode() & notRegularFileMode) == 0 {
|
|
|
|
size += info.Size()
|
2023-01-13 11:54:02 -07:00
|
|
|
}
|
2023-11-28 22:08:58 -07:00
|
|
|
return nil
|
2023-01-13 11:54:02 -07:00
|
|
|
})
|
|
|
|
return size, err
|
|
|
|
}
|
|
|
|
|
|
|
|
// UpdateRepoSize updates the repository size, calculating it using getDirectorySize
|
2022-06-06 02:01:49 -06:00
|
|
|
func UpdateRepoSize(ctx context.Context, repo *repo_model.Repository) error {
|
2023-01-13 11:54:02 -07:00
|
|
|
size, err := getDirectorySize(repo.RepoPath())
|
2022-06-06 02:01:49 -06:00
|
|
|
if err != nil {
|
2022-10-24 13:29:17 -06:00
|
|
|
return fmt.Errorf("updateSize: %w", err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
|
2022-06-12 09:51:54 -06:00
|
|
|
lfsSize, err := git_model.GetRepoLFSSize(ctx, repo.ID)
|
2022-06-06 02:01:49 -06:00
|
|
|
if err != nil {
|
2022-10-24 13:29:17 -06:00
|
|
|
return fmt.Errorf("updateSize: GetLFSMetaObjects: %w", err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
|
2023-06-28 16:41:02 -06:00
|
|
|
return repo_model.UpdateRepoSize(ctx, repo.ID, size, lfsSize)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// CheckDaemonExportOK creates/removes git-daemon-export-ok for git-daemon...
|
|
|
|
func CheckDaemonExportOK(ctx context.Context, repo *repo_model.Repository) error {
|
2023-02-18 05:11:03 -07:00
|
|
|
if err := repo.LoadOwner(ctx); err != nil {
|
2022-06-06 02:01:49 -06:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
// Create/Remove git-daemon-export-ok for git-daemon...
|
|
|
|
daemonExportFile := path.Join(repo.RepoPath(), `git-daemon-export-ok`)
|
|
|
|
|
|
|
|
isExist, err := util.IsExist(daemonExportFile)
|
|
|
|
if err != nil {
|
|
|
|
log.Error("Unable to check if %s exists. Error: %v", daemonExportFile, err)
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
isPublic := !repo.IsPrivate && repo.Owner.Visibility == api.VisibleTypePublic
|
|
|
|
if !isPublic && isExist {
|
|
|
|
if err = util.Remove(daemonExportFile); err != nil {
|
|
|
|
log.Error("Failed to remove %s: %v", daemonExportFile, err)
|
|
|
|
}
|
|
|
|
} else if isPublic && !isExist {
|
|
|
|
if f, err := os.Create(daemonExportFile); err != nil {
|
|
|
|
log.Error("Failed to create %s: %v", daemonExportFile, err)
|
|
|
|
} else {
|
|
|
|
f.Close()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// UpdateRepository updates a repository with db context
|
|
|
|
func UpdateRepository(ctx context.Context, repo *repo_model.Repository, visibilityChanged bool) (err error) {
|
|
|
|
repo.LowerName = strings.ToLower(repo.Name)
|
|
|
|
|
|
|
|
e := db.GetEngine(ctx)
|
|
|
|
|
|
|
|
if _, err = e.ID(repo.ID).AllCols().Update(repo); err != nil {
|
2022-10-24 13:29:17 -06:00
|
|
|
return fmt.Errorf("update: %w", err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
if err = UpdateRepoSize(ctx, repo); err != nil {
|
|
|
|
log.Error("Failed to update size for repository: %v", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if visibilityChanged {
|
2023-02-18 05:11:03 -07:00
|
|
|
if err = repo.LoadOwner(ctx); err != nil {
|
|
|
|
return fmt.Errorf("LoadOwner: %w", err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
if repo.Owner.IsOrganization() {
|
|
|
|
// Organization repository need to recalculate access table when visibility is changed.
|
|
|
|
if err = access_model.RecalculateTeamAccesses(ctx, repo, 0); err != nil {
|
2022-10-24 13:29:17 -06:00
|
|
|
return fmt.Errorf("recalculateTeamAccesses: %w", err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If repo has become private, we need to set its actions to private.
|
|
|
|
if repo.IsPrivate {
|
2022-08-24 20:31:57 -06:00
|
|
|
_, err = e.Where("repo_id = ?", repo.ID).Cols("is_private").Update(&activities_model.Action{
|
2022-06-06 02:01:49 -06:00
|
|
|
IsPrivate: true,
|
|
|
|
})
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2023-06-05 07:25:43 -06:00
|
|
|
|
|
|
|
if err = repo_model.ClearRepoStars(ctx, repo.ID); err != nil {
|
|
|
|
return err
|
|
|
|
}
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create/Remove git-daemon-export-ok for git-daemon...
|
2022-06-20 06:38:58 -06:00
|
|
|
if err := CheckDaemonExportOK(ctx, repo); err != nil {
|
2022-06-06 02:01:49 -06:00
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
|
|
|
forkRepos, err := repo_model.GetRepositoriesByForkID(ctx, repo.ID)
|
|
|
|
if err != nil {
|
2022-10-24 13:29:17 -06:00
|
|
|
return fmt.Errorf("getRepositoriesByForkID: %w", err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
for i := range forkRepos {
|
|
|
|
forkRepos[i].IsPrivate = repo.IsPrivate || repo.Owner.Visibility == api.VisibleTypePrivate
|
|
|
|
if err = UpdateRepository(ctx, forkRepos[i], true); err != nil {
|
2022-10-24 13:29:17 -06:00
|
|
|
return fmt.Errorf("updateRepository[%d]: %w", forkRepos[i].ID, err)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
}
|
Refactor and enhance issue indexer to support both searching, filtering and paging (#26012)
Fix #24662.
Replace #24822 and #25708 (although it has been merged)
## Background
In the past, Gitea supported issue searching with a keyword and
conditions in a less efficient way. It worked by searching for issues
with the keyword and obtaining limited IDs (as it is heavy to get all)
on the indexer (bleve/elasticsearch/meilisearch), and then querying with
conditions on the database to find a subset of the found IDs. This is
why the results could be incomplete.
To solve this issue, we need to store all fields that could be used as
conditions in the indexer and support both keyword and additional
conditions when searching with the indexer.
## Major changes
- Redefine `IndexerData` to include all fields that could be used as
filter conditions.
- Refactor `Search(ctx context.Context, kw string, repoIDs []int64,
limit, start int, state string)` to `Search(ctx context.Context, options
*SearchOptions)`, so it supports more conditions now.
- Change the data type stored in `issueIndexerQueue`. Use
`IndexerMetadata` instead of `IndexerData` in case the data has been
updated while it is in the queue. This also reduces the storage size of
the queue.
- Enhance searching with Bleve/Elasticsearch/Meilisearch, make them
fully support `SearchOptions`. Also, update the data versions.
- Keep most logic of database indexer, but remove
`issues.SearchIssueIDsByKeyword` in `models` to avoid confusion where is
the entry point to search issues.
- Start a Meilisearch instance to test it in unit tests.
- Add unit tests with almost full coverage to test
Bleve/Elasticsearch/Meilisearch indexer.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-07-31 00:28:53 -06:00
|
|
|
|
|
|
|
// If visibility is changed, we need to update the issue indexer.
|
|
|
|
// Since the data in the issue indexer have field to indicate if the repo is public or not.
|
|
|
|
issue_indexer.UpdateRepoIndexer(ctx, repo.ID)
|
2022-06-06 02:01:49 -06:00
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|