2016-11-03 16:16:01 -06:00
// Copyright 2015 The Gogs Authors. All rights reserved.
2019-06-07 14:29:29 -06:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2022-11-27 11:20:29 -07:00
// SPDX-License-Identifier: MIT
2016-11-03 16:16:01 -06:00
package git
import (
2021-12-23 01:32:29 -07:00
"bufio"
2020-05-25 23:58:07 -06:00
"bytes"
2022-01-19 16:26:57 -07:00
"context"
2021-12-23 01:32:29 -07:00
"errors"
2016-11-03 16:16:01 -06:00
"fmt"
2018-01-07 06:10:20 -07:00
"io"
2021-12-23 01:32:29 -07:00
"os"
"path/filepath"
2020-05-25 23:58:07 -06:00
"regexp"
2016-11-03 16:16:01 -06:00
"strconv"
"strings"
"time"
2019-06-12 13:41:28 -06:00
logger "code.gitea.io/gitea/modules/log"
2016-11-03 16:16:01 -06:00
)
2019-06-07 14:29:29 -06:00
// CompareInfo represents needed information for comparing references.
type CompareInfo struct {
2021-02-16 08:39:45 -07:00
MergeBase string
BaseCommitID string
HeadCommitID string
2021-08-09 12:08:51 -06:00
Commits [ ] * Commit
2021-02-16 08:39:45 -07:00
NumFiles int
2016-11-03 16:16:01 -06:00
}
2019-06-11 17:32:08 -06:00
// GetMergeBase checks and returns merge base of two branches and the reference used as base.
2021-12-19 21:41:31 -07:00
func ( repo * Repository ) GetMergeBase ( tmpRemote , base , head string ) ( string , string , error ) {
2019-06-07 14:29:29 -06:00
if tmpRemote == "" {
tmpRemote = "origin"
}
if tmpRemote != "origin" {
2021-12-02 00:28:08 -07:00
tmpBaseName := RemotePrefix + tmpRemote + "/tmp_" + base
2019-06-07 14:29:29 -06:00
// Fetch commit into a temporary branch in order to be able to handle commits and tags
2022-10-23 08:44:45 -06:00
_ , _ , err := NewCommand ( repo . Ctx , "fetch" , "--no-tags" ) . AddDynamicArguments ( tmpRemote ) . AddDashesAndList ( base + ":" + tmpBaseName ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2019-06-07 14:29:29 -06:00
if err == nil {
base = tmpBaseName
}
}
2022-10-23 08:44:45 -06:00
stdout , _ , err := NewCommand ( repo . Ctx , "merge-base" ) . AddDashesAndList ( base , head ) . RunStdString ( & RunOpts { Dir : repo . Path } )
2019-06-11 17:32:08 -06:00
return strings . TrimSpace ( stdout ) , base , err
2016-11-03 16:16:01 -06:00
}
2019-06-07 14:29:29 -06:00
// GetCompareInfo generates and returns compare information between base and head branches of repositories.
2021-10-15 10:05:33 -06:00
func ( repo * Repository ) GetCompareInfo ( basePath , baseBranch , headBranch string , directComparison , fileOnly bool ) ( _ * CompareInfo , err error ) {
2019-06-07 14:29:29 -06:00
var (
remoteBranch string
tmpRemote string
)
2016-11-03 16:16:01 -06:00
// We don't need a temporary remote for same repository.
if repo . Path != basePath {
// Add a temporary remote
2019-06-07 14:29:29 -06:00
tmpRemote = strconv . FormatInt ( time . Now ( ) . UnixNano ( ) , 10 )
2019-08-13 02:30:44 -06:00
if err = repo . AddRemote ( tmpRemote , basePath , false ) ; err != nil {
2022-10-24 13:29:17 -06:00
return nil , fmt . Errorf ( "AddRemote: %w" , err )
2016-11-03 16:16:01 -06:00
}
2019-06-12 13:41:28 -06:00
defer func ( ) {
if err := repo . RemoveRemote ( tmpRemote ) ; err != nil {
logger . Error ( "GetPullRequestInfo: RemoveRemote: %v" , err )
}
} ( )
2016-11-03 16:16:01 -06:00
}
2019-06-07 14:29:29 -06:00
compareInfo := new ( CompareInfo )
2021-02-16 08:39:45 -07:00
2022-01-19 16:26:57 -07:00
compareInfo . HeadCommitID , err = GetFullCommitID ( repo . Ctx , repo . Path , headBranch )
2021-02-16 08:39:45 -07:00
if err != nil {
compareInfo . HeadCommitID = headBranch
}
2019-06-11 17:32:08 -06:00
compareInfo . MergeBase , remoteBranch , err = repo . GetMergeBase ( tmpRemote , baseBranch , headBranch )
2019-04-09 14:45:58 -06:00
if err == nil {
2022-01-19 16:26:57 -07:00
compareInfo . BaseCommitID , err = GetFullCommitID ( repo . Ctx , repo . Path , remoteBranch )
2021-02-16 08:39:45 -07:00
if err != nil {
compareInfo . BaseCommitID = remoteBranch
}
2021-09-27 06:19:34 -06:00
separator := "..."
baseCommitID := compareInfo . MergeBase
if directComparison {
separator = ".."
baseCommitID = compareInfo . BaseCommitID
}
2020-07-29 11:53:04 -06:00
// We have a common base - therefore we know that ... should work
2021-10-15 10:05:33 -06:00
if ! fileOnly {
2023-03-26 12:31:21 -06:00
// avoid: ambiguous argument 'refs/a...refs/b': unknown revision or path not in the working tree. Use '--': 'git <command> [<revision>...] -- [<file>...]'
2022-03-31 20:55:30 -06:00
var logs [ ] byte
2023-03-26 12:31:21 -06:00
logs , _ , err = NewCommand ( repo . Ctx , "log" ) . AddArguments ( prettyLogFormat ) .
AddDynamicArguments ( baseCommitID + separator + headBranch ) . AddArguments ( "--" ) .
RunStdBytes ( & RunOpts { Dir : repo . Path } )
2021-10-15 10:05:33 -06:00
if err != nil {
return nil , err
}
compareInfo . Commits , err = repo . parsePrettyFormatLogToList ( logs )
if err != nil {
2022-10-24 13:29:17 -06:00
return nil , fmt . Errorf ( "parsePrettyFormatLogToList: %w" , err )
2021-10-15 10:05:33 -06:00
}
} else {
compareInfo . Commits = [ ] * Commit { }
2019-04-09 14:45:58 -06:00
}
} else {
2021-08-09 12:08:51 -06:00
compareInfo . Commits = [ ] * Commit { }
2022-01-19 16:26:57 -07:00
compareInfo . MergeBase , err = GetFullCommitID ( repo . Ctx , repo . Path , remoteBranch )
2019-04-09 14:45:58 -06:00
if err != nil {
2019-06-07 14:29:29 -06:00
compareInfo . MergeBase = remoteBranch
2019-04-09 14:45:58 -06:00
}
2021-02-16 08:39:45 -07:00
compareInfo . BaseCommitID = compareInfo . MergeBase
2016-11-03 16:16:01 -06:00
}
// Count number of changed files.
2020-05-25 23:58:07 -06:00
// This probably should be removed as we need to use shortstat elsewhere
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
2021-09-27 06:19:34 -06:00
compareInfo . NumFiles , err = repo . GetDiffNumChangedFiles ( remoteBranch , headBranch , directComparison )
2016-11-03 16:16:01 -06:00
if err != nil {
return nil , err
}
2019-06-07 14:29:29 -06:00
return compareInfo , nil
2016-11-03 16:16:01 -06:00
}
2020-05-25 23:58:07 -06:00
type lineCountWriter struct {
numLines int
}
// Write counts the number of newlines in the provided bytestream
func ( l * lineCountWriter ) Write ( p [ ] byte ) ( n int , err error ) {
n = len ( p )
l . numLines += bytes . Count ( p , [ ] byte { '\000' } )
2022-06-20 04:02:49 -06:00
return n , err
2020-05-25 23:58:07 -06:00
}
// GetDiffNumChangedFiles counts the number of changed files
// This is substantially quicker than shortstat but...
2021-09-27 06:19:34 -06:00
func ( repo * Repository ) GetDiffNumChangedFiles ( base , head string , directComparison bool ) ( int , error ) {
2020-05-25 23:58:07 -06:00
// Now there is git diff --shortstat but this appears to be slower than simply iterating with --nameonly
w := & lineCountWriter { }
stderr := new ( bytes . Buffer )
2021-09-27 06:19:34 -06:00
separator := "..."
if directComparison {
separator = ".."
}
2023-03-26 12:31:21 -06:00
// avoid: ambiguous argument 'refs/a...refs/b': unknown revision or path not in the working tree. Use '--': 'git <command> [<revision>...] -- [<file>...]'
if err := NewCommand ( repo . Ctx , "diff" , "-z" , "--name-only" ) . AddDynamicArguments ( base + separator + head ) . AddArguments ( "--" ) .
2022-03-31 20:55:30 -06:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 05:47:22 -07:00
} ) ; err != nil {
2020-07-29 11:53:04 -06:00
if strings . Contains ( stderr . String ( ) , "no merge base" ) {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff -z --name-only base head so let's try that...
w = & lineCountWriter { }
stderr . Reset ( )
2023-03-26 12:31:21 -06:00
if err = NewCommand ( repo . Ctx , "diff" , "-z" , "--name-only" ) . AddDynamicArguments ( base , head ) . AddArguments ( "--" ) . Run ( & RunOpts {
2022-03-31 20:55:30 -06:00
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 05:47:22 -07:00
} ) ; err == nil {
2020-07-29 11:53:04 -06:00
return w . numLines , nil
}
}
2022-10-24 13:29:17 -06:00
return 0 , fmt . Errorf ( "%w: Stderr: %s" , err , stderr )
2020-05-25 23:58:07 -06:00
}
return w . numLines , nil
}
// GetDiffShortStat counts number of changed files, number of additions and deletions
func ( repo * Repository ) GetDiffShortStat ( base , head string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 19:30:43 -07:00
numFiles , totalAdditions , totalDeletions , err = GetDiffShortStat ( repo . Ctx , repo . Path , nil , base + "..." + head )
2020-07-29 11:53:04 -06:00
if err != nil && strings . Contains ( err . Error ( ) , "no merge base" ) {
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 19:30:43 -07:00
return GetDiffShortStat ( repo . Ctx , repo . Path , nil , base , head )
2020-07-29 11:53:04 -06:00
}
2022-06-20 04:02:49 -06:00
return numFiles , totalAdditions , totalDeletions , err
2020-05-25 23:58:07 -06:00
}
// GetDiffShortStat counts number of changed files, number of additions and deletions
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 19:30:43 -07:00
func GetDiffShortStat ( ctx context . Context , repoPath string , trustedArgs TrustedCmdArgs , dynamicArgs ... string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
2020-05-25 23:58:07 -06:00
// Now if we call:
// $ git diff --shortstat 1ebb35b98889ff77299f24d82da426b434b0cca0...788b8b1440462d477f45b0088875
// we get:
// " 9902 files changed, 2034198 insertions(+), 298800 deletions(-)\n"
Refactor git command package to improve security and maintainability (#22678)
This PR follows #21535 (and replace #22592)
## Review without space diff
https://github.com/go-gitea/gitea/pull/22678/files?diff=split&w=1
## Purpose of this PR
1. Make git module command completely safe (risky user inputs won't be
passed as argument option anymore)
2. Avoid low-level mistakes like
https://github.com/go-gitea/gitea/pull/22098#discussion_r1045234918
3. Remove deprecated and dirty `CmdArgCheck` function, hide the `CmdArg`
type
4. Simplify code when using git command
## The main idea of this PR
* Move the `git.CmdArg` to the `internal` package, then no other package
except `git` could use it. Then developers could never do
`AddArguments(git.CmdArg(userInput))` any more.
* Introduce `git.ToTrustedCmdArgs`, it's for user-provided and already
trusted arguments. It's only used in a few cases, for example: use git
arguments from config file, help unit test with some arguments.
* Introduce `AddOptionValues` and `AddOptionFormat`, they make code more
clear and simple:
* Before: `AddArguments("-m").AddDynamicArguments(message)`
* After: `AddOptionValues("-m", message)`
* -
* Before: `AddArguments(git.CmdArg(fmt.Sprintf("--author='%s <%s>'",
sig.Name, sig.Email)))`
* After: `AddOptionFormat("--author='%s <%s>'", sig.Name, sig.Email)`
## FAQ
### Why these changes were not done in #21535 ?
#21535 is mainly a search&replace, it did its best to not change too
much logic.
Making the framework better needs a lot of changes, so this separate PR
is needed as the second step.
### The naming of `AddOptionXxx`
According to git's manual, the `--xxx` part is called `option`.
### How can it guarantee that `internal.CmdArg` won't be not misused?
Go's specification guarantees that. Trying to access other package's
internal package causes compilation error.
And, `golangci-lint` also denies the git/internal package. Only the
`git/command.go` can use it carefully.
### There is still a `ToTrustedCmdArgs`, will it still allow developers
to make mistakes and pass untrusted arguments?
Generally speaking, no. Because when using `ToTrustedCmdArgs`, the code
will be very complex (see the changes for examples). Then developers and
reviewers can know that something might be unreasonable.
### Why there was a `CmdArgCheck` and why it's removed?
At the moment of #21535, to reduce unnecessary changes, `CmdArgCheck`
was introduced as a hacky patch. Now, almost all code could be written
as `cmd := NewCommand(); cmd.AddXxx(...)`, then there is no need for
`CmdArgCheck` anymore.
### Why many codes for `signArg == ""` is deleted?
Because in the old code, `signArg` could never be empty string, it's
either `-S[key-id]` or `--no-gpg-sign`. So the `signArg == ""` is just
dead code.
---------
Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
2023-02-03 19:30:43 -07:00
cmd := NewCommand ( ctx , "diff" , "--shortstat" ) . AddArguments ( trustedArgs ... ) . AddDynamicArguments ( dynamicArgs ... )
stdout , _ , err := cmd . RunStdString ( & RunOpts { Dir : repoPath } )
2020-05-25 23:58:07 -06:00
if err != nil {
return 0 , 0 , 0 , err
}
return parseDiffStat ( stdout )
}
var shortStatFormat = regexp . MustCompile (
` \s*(\d+) files? changed(?:, (\d+) insertions?\(\+\))?(?:, (\d+) deletions?\(-\))? ` )
2021-12-23 01:32:29 -07:00
var patchCommits = regexp . MustCompile ( ` ^From\s(\w+)\s ` )
2020-05-25 23:58:07 -06:00
func parseDiffStat ( stdout string ) ( numFiles , totalAdditions , totalDeletions int , err error ) {
if len ( stdout ) == 0 || stdout == "\n" {
return 0 , 0 , 0 , nil
}
groups := shortStatFormat . FindStringSubmatch ( stdout )
if len ( groups ) != 4 {
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s groups: %s" , stdout , groups )
}
numFiles , err = strconv . Atoi ( groups [ 1 ] )
if err != nil {
2022-10-24 13:29:17 -06:00
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumFiles %w" , stdout , err )
2020-05-25 23:58:07 -06:00
}
if len ( groups [ 2 ] ) != 0 {
totalAdditions , err = strconv . Atoi ( groups [ 2 ] )
if err != nil {
2022-10-24 13:29:17 -06:00
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumAdditions %w" , stdout , err )
2020-05-25 23:58:07 -06:00
}
}
if len ( groups [ 3 ] ) != 0 {
totalDeletions , err = strconv . Atoi ( groups [ 3 ] )
if err != nil {
2022-10-24 13:29:17 -06:00
return 0 , 0 , 0 , fmt . Errorf ( "unable to parse shortstat: %s. Error parsing NumDeletions %w" , stdout , err )
2020-05-25 23:58:07 -06:00
}
}
2022-06-20 04:02:49 -06:00
return numFiles , totalAdditions , totalDeletions , err
2020-05-25 23:58:07 -06:00
}
2019-12-13 15:21:06 -07:00
// GetDiffOrPatch generates either diff or formatted patch data between given revisions
2021-09-27 15:09:49 -06:00
func ( repo * Repository ) GetDiffOrPatch ( base , head string , w io . Writer , patch , binary bool ) error {
if patch {
2019-12-13 15:21:06 -07:00
return repo . GetPatch ( base , head , w )
}
2021-09-27 15:09:49 -06:00
if binary {
return repo . GetDiffBinary ( base , head , w )
}
2019-12-13 15:21:06 -07:00
return repo . GetDiff ( base , head , w )
2016-11-03 16:16:01 -06:00
}
2018-01-07 06:10:20 -07:00
2021-09-27 15:09:49 -06:00
// GetDiff generates and returns patch data between given revisions, optimized for human readability
2019-12-13 15:21:06 -07:00
func ( repo * Repository ) GetDiff ( base , head string , w io . Writer ) error {
2022-10-23 08:44:45 -06:00
return NewCommand ( repo . Ctx , "diff" , "-p" ) . AddDynamicArguments ( base , head ) . Run ( & RunOpts {
2022-03-31 20:55:30 -06:00
Dir : repo . Path ,
Stdout : w ,
2022-02-11 05:47:22 -07:00
} )
2021-09-27 15:09:49 -06:00
}
// GetDiffBinary generates and returns patch data between given revisions, including binary diffs.
func ( repo * Repository ) GetDiffBinary ( base , head string , w io . Writer ) error {
2022-10-23 08:44:45 -06:00
return NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" , "--histogram" ) . AddDynamicArguments ( base , head ) . Run ( & RunOpts {
2022-03-31 20:55:30 -06:00
Dir : repo . Path ,
Stdout : w ,
2022-02-11 05:47:22 -07:00
} )
2019-12-13 15:21:06 -07:00
}
2018-01-07 06:10:20 -07:00
2021-09-27 15:09:49 -06:00
// GetPatch generates and returns format-patch data between given revisions, able to be used with `git apply`
2019-12-13 15:21:06 -07:00
func ( repo * Repository ) GetPatch ( base , head string , w io . Writer ) error {
2020-07-29 11:53:04 -06:00
stderr := new ( bytes . Buffer )
2022-10-23 08:44:45 -06:00
err := NewCommand ( repo . Ctx , "format-patch" , "--binary" , "--stdout" ) . AddDynamicArguments ( base + "..." + head ) .
2022-03-31 20:55:30 -06:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 05:47:22 -07:00
} )
2020-07-29 11:53:04 -06:00
if err != nil && bytes . Contains ( stderr . Bytes ( ) , [ ] byte ( "no merge base" ) ) {
2022-10-23 08:44:45 -06:00
return NewCommand ( repo . Ctx , "format-patch" , "--binary" , "--stdout" ) . AddDynamicArguments ( base , head ) .
2022-03-31 20:55:30 -06:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
2022-02-11 05:47:22 -07:00
} )
2020-07-29 11:53:04 -06:00
}
return err
2018-01-07 06:10:20 -07:00
}
2020-01-08 18:47:45 -07:00
2022-05-07 12:28:10 -06:00
// GetFilesChangedBetween returns a list of all files that have been changed between the given commits
2023-08-03 20:53:15 -06:00
// If base is undefined empty SHA (zeros), it only returns the files changed in the head commit
// If base is the SHA of an empty tree (EmptyTreeSHA), it returns the files changes from the initial commit to the head commit
2022-05-07 12:28:10 -06:00
func ( repo * Repository ) GetFilesChangedBetween ( base , head string ) ( [ ] string , error ) {
2024-03-11 22:21:27 -06:00
objectFormat , err := repo . GetObjectFormat ( )
if err != nil {
return nil , err
}
2023-08-03 20:53:15 -06:00
cmd := NewCommand ( repo . Ctx , "diff-tree" , "--name-only" , "--root" , "--no-commit-id" , "-r" , "-z" )
2024-03-11 22:21:27 -06:00
if base == objectFormat . EmptyObjectID ( ) . String ( ) {
2023-08-03 20:53:15 -06:00
cmd . AddDynamicArguments ( head )
} else {
cmd . AddDynamicArguments ( base , head )
}
stdout , _ , err := cmd . RunStdString ( & RunOpts { Dir : repo . Path } )
2022-05-07 12:28:10 -06:00
if err != nil {
return nil , err
}
2023-03-03 15:28:38 -07:00
split := strings . Split ( stdout , "\000" )
// Because Git will always emit filenames with a terminal NUL ignore the last entry in the split - which will always be empty.
if len ( split ) > 0 {
split = split [ : len ( split ) - 1 ]
}
return split , err
2022-05-07 12:28:10 -06:00
}
2020-01-08 18:47:45 -07:00
// GetDiffFromMergeBase generates and return patch data from merge base to head
func ( repo * Repository ) GetDiffFromMergeBase ( base , head string , w io . Writer ) error {
2020-07-29 11:53:04 -06:00
stderr := new ( bytes . Buffer )
2022-10-23 08:44:45 -06:00
err := NewCommand ( repo . Ctx , "diff" , "-p" , "--binary" ) . AddDynamicArguments ( base + "..." + head ) .
2022-03-31 20:55:30 -06:00
Run ( & RunOpts {
Dir : repo . Path ,
Stdout : w ,
Stderr : stderr ,
2022-02-11 05:47:22 -07:00
} )
2020-07-29 11:53:04 -06:00
if err != nil && bytes . Contains ( stderr . Bytes ( ) , [ ] byte ( "no merge base" ) ) {
2021-09-27 15:09:49 -06:00
return repo . GetDiffBinary ( base , head , w )
2020-07-29 11:53:04 -06:00
}
return err
2020-01-08 18:47:45 -07:00
}
2021-12-23 01:32:29 -07:00
// ReadPatchCommit will check if a diff patch exists and return stats
func ( repo * Repository ) ReadPatchCommit ( prID int64 ) ( commitSHA string , err error ) {
// Migrated repositories download patches to "pulls" location
patchFile := fmt . Sprintf ( "pulls/%d.patch" , prID )
loadPatch , err := os . Open ( filepath . Join ( repo . Path , patchFile ) )
if err != nil {
return "" , err
}
defer loadPatch . Close ( )
// Read only the first line of the patch - usually it contains the first commit made in patch
scanner := bufio . NewScanner ( loadPatch )
scanner . Scan ( )
// Parse the Patch stats, sometimes Migration returns a 404 for the patch file
commitSHAGroups := patchCommits . FindStringSubmatch ( scanner . Text ( ) )
if len ( commitSHAGroups ) != 0 {
commitSHA = commitSHAGroups [ 1 ]
} else {
return "" , errors . New ( "patch file doesn't contain valid commit ID" )
}
return commitSHA , nil
}