diff --git a/modules/markup/html.go b/modules/markup/html.go index b7291823b5..56e1a1c54e 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -53,38 +53,38 @@ var ( // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) - // anySHA1Pattern splits url containing SHA into parts + // anyHashPattern splits url containing SHA into parts anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) - validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`) + // fullURLPattern matches full URL like "mailto:...", "https://..." and "ssh+git://..." + fullURLPattern = regexp.MustCompile(`^[a-z][-+\w]+:`) - // While this email regex is definitely not perfect and I'm sure you can come up - // with edge cases, it is still accepted by the CommonMark specification, as - // well as the HTML5 spec: + // emailRegex is definitely not perfect with edge cases, + // it is still accepted by the CommonMark specification, as well as the HTML5 spec: // http://spec.commonmark.org/0.28/#email-address // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))") - // blackfriday extensions create IDs like fn:user-content-footnote + // blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`) - // EmojiShortCodeRegex find emoji by alias like :smile: - EmojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) + // emojiShortCodeRegex find emoji by alias like :smile: + emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) ) // CSS class for action keywords (e.g. "closes: #1") const keywordClass = "issue-keyword" -// IsLink reports whether link fits valid format. -func IsLink(link []byte) bool { - return validLinksPattern.Match(link) +// IsFullURLBytes reports whether link fits valid format. +func IsFullURLBytes(link []byte) bool { + return fullURLPattern.Match(link) } -func IsLinkStr(link string) bool { - return validLinksPattern.MatchString(link) +func IsFullURLString(link string) bool { + return fullURLPattern.MatchString(link) } // regexp for full links to issues/pulls @@ -399,7 +399,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) { if attr.Key != "src" { continue } - if len(attr.Val) > 0 && !IsLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { + if len(attr.Val) > 0 && !IsFullURLString(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") { attr.Val = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), attr.Val) } attr.Val = camoHandleLink(attr.Val) @@ -650,7 +650,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) { if equalPos := strings.IndexByte(v, '='); equalPos == -1 { // There is no equal in this argument; this is a mandatory arg if props["name"] == "" { - if IsLinkStr(v) { + if IsFullURLString(v) { // If we clearly see it is a link, we save it so // But first we need to ensure, that if both mandatory args provided @@ -725,7 +725,7 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) { DataAtom: atom.A, } childNode.Parent = linkNode - absoluteLink := IsLinkStr(link) + absoluteLink := IsFullURLString(link) if !absoluteLink { if image { link = strings.ReplaceAll(link, " ", "+") @@ -1059,7 +1059,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { start := 0 next := node.NextSibling for node != nil && node != next && start < len(node.Data) { - m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) + m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) if m == nil { return } diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 89ecfc036b..cb29431d4b 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -204,6 +204,15 @@ func TestRender_links(t *testing.T) { test( "magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download", `
magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download
`) + test( + `[link](https://example.com)`, + ``) + test( + `[link](mailto:test@example.com)`, + ``) + test( + `[link](javascript:xss)`, + `link
`) // Test that should *not* be turned into URL test( @@ -673,3 +682,9 @@ func TestIssue18471(t *testing.T) { assert.NoError(t, err) assert.Equal(t, "783b039...da951ce
", res.String())
}
+
+func TestIsFullURL(t *testing.T) {
+ assert.True(t, markup.IsFullURLString("https://example.com"))
+ assert.True(t, markup.IsFullURLString("mailto:test@example.com"))
+ assert.False(t, markup.IsFullURLString("/foo:bar"))
+}
diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go
index 36ce6397f4..c4b23e66fc 100644
--- a/modules/markup/markdown/goldmark.go
+++ b/modules/markup/markdown/goldmark.go
@@ -26,8 +26,6 @@ import (
"github.com/yuin/goldmark/util"
)
-var byteMailto = []byte("mailto:")
-
// ASTTransformer is a default transformer of the goldmark tree.
type ASTTransformer struct{}
@@ -84,7 +82,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
// 2. If they're not wrapped with a link they need a link wrapper
// Check if the destination is a real link
- if len(v.Destination) > 0 && !markup.IsLink(v.Destination) {
+ if len(v.Destination) > 0 && !markup.IsFullURLBytes(v.Destination) {
v.Destination = []byte(giteautil.URLJoin(
ctx.Links.ResolveMediaLink(ctx.IsWiki),
strings.TrimLeft(string(v.Destination), "/"),
@@ -130,23 +128,17 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
case *ast.Link:
// Links need their href to munged to be a real value
link := v.Destination
- if len(link) > 0 && !markup.IsLink(link) &&
- link[0] != '#' && !bytes.HasPrefix(link, byteMailto) {
- // special case: this is not a link, a hash link or a mailto:, so it's a
- // relative URL
-
- var base string
+ isAnchorFragment := len(link) > 0 && link[0] == '#'
+ if !isAnchorFragment && !markup.IsFullURLBytes(link) {
+ base := ctx.Links.Base
if ctx.IsWiki {
base = ctx.Links.WikiLink()
} else if ctx.Links.HasBranchInfo() {
base = ctx.Links.SrcLink()
- } else {
- base = ctx.Links.Base
}
-
link = []byte(giteautil.URLJoin(base, string(link)))
}
- if len(link) > 0 && link[0] == '#' {
+ if isAnchorFragment {
link = []byte("#user-content-" + string(link)[1:])
}
v.Destination = link
diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go
index ac1cedff6d..7f253ae5f1 100644
--- a/modules/markup/orgmode/orgmode.go
+++ b/modules/markup/orgmode/orgmode.go
@@ -136,8 +136,7 @@ type Writer struct {
func (r *Writer) resolveLink(kind, link string) string {
link = strings.TrimPrefix(link, "file:")
if !strings.HasPrefix(link, "#") && // not a URL fragment
- !markup.IsLinkStr(link) && // not an absolute URL
- !strings.HasPrefix(link, "mailto:") {
+ !markup.IsFullURLString(link) {
if kind == "regular" {
// orgmode reports the link kind as "regular" for "[[ImageLink.svg][The Image Desc]]"
// so we need to try to guess the link kind again here