Fix a panic in NotifyCreateIssueComment (caused by string truncation) (#17928)

* Fix a panic in NotifyCreateIssueComment (caused by string truncation)

* more unit tests

* refactor

* fix some edge cases

* use SplitStringAtByteN for comment content
This commit is contained in:
wxiaoguang 2021-12-09 13:41:17 +08:00 committed by GitHub
parent 183175263d
commit c7e23401a3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 104 additions and 17 deletions

View File

@ -6,7 +6,6 @@ package models
import ( import (
"code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo" repo_model "code.gitea.io/gitea/models/repo"
) )

View File

@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/notification/base" "code.gitea.io/gitea/modules/notification/base"
"code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/repository"
"code.gitea.io/gitea/modules/util"
) )
type actionNotifier struct { type actionNotifier struct {
@ -100,14 +101,15 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m
IsPrivate: issue.Repo.IsPrivate, IsPrivate: issue.Repo.IsPrivate,
} }
content := "" truncatedContent, truncatedRight := util.SplitStringAtByteN(comment.Content, 200)
if truncatedRight != "" {
if len(comment.Content) > 200 { // in case the content is in a Latin family language, we remove the last broken word.
content = comment.Content[:strings.LastIndex(comment.Content[0:200], " ")] + "…" lastSpaceIdx := strings.LastIndex(truncatedContent, " ")
} else { if lastSpaceIdx != -1 && (len(truncatedContent)-lastSpaceIdx < 15) {
content = comment.Content truncatedContent = truncatedContent[:lastSpaceIdx] + "…"
}
} }
act.Content = fmt.Sprintf("%d|%s", issue.Index, content) act.Content = fmt.Sprintf("%d|%s", issue.Index, truncatedContent)
if issue.IsPull { if issue.IsPull {
act.OpType = models.ActionCommentPull act.OpType = models.ActionCommentPull

View File

@ -6,20 +6,23 @@ package util
import "unicode/utf8" import "unicode/utf8"
// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
const utf8Ellipsis = "…"
const asciiEllipsis = "..."
// SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.) // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.)
func SplitStringAtByteN(input string, n int) (left, right string) { func SplitStringAtByteN(input string, n int) (left, right string) {
if len(input) <= n { if len(input) <= n {
left = input return input, ""
return
} }
if !utf8.ValidString(input) { if !utf8.ValidString(input) {
left = input[:n-3] + "..." if n-3 < 0 {
right = "..." + input[n-3:] return input, ""
return }
return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
} }
// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
end := 0 end := 0
for end <= n-3 { for end <= n-3 {
_, size := utf8.DecodeRuneInString(input[end:]) _, size := utf8.DecodeRuneInString(input[end:])
@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) {
end += size end += size
} }
left = input[:end] + "…" return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
right = "…" + input[end:] }
return
// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.)
func SplitStringAtRuneN(input string, n int) (left, right string) {
if !utf8.ValidString(input) {
if len(input) <= n || n-3 < 0 {
return input, ""
}
return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
}
if utf8.RuneCountInString(input) <= n {
return input, ""
}
count := 0
end := 0
for count < n-1 {
_, size := utf8.DecodeRuneInString(input[end:])
end += size
count++
}
return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
} }

View File

@ -0,0 +1,61 @@
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package util
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestSplitString(t *testing.T) {
type testCase struct {
input string
n int
leftSub string
ellipsis string
}
test := func(tc []*testCase, f func(input string, n int) (left, right string)) {
for _, c := range tc {
l, r := f(c.input, c.n)
if c.ellipsis != "" {
assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):])
} else {
assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "")
}
}
}
tc := []*testCase{
{"abc123xyz", 0, "", utf8Ellipsis},
{"abc123xyz", 1, "", utf8Ellipsis},
{"abc123xyz", 4, "a", utf8Ellipsis},
{"啊bc123xyz", 4, "", utf8Ellipsis},
{"啊bc123xyz", 6, "啊", utf8Ellipsis},
{"啊bc", 5, "啊bc", ""},
{"啊bc", 6, "啊bc", ""},
{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
{"\xef\x03", 1, "\xef\x03", ""},
}
test(tc, SplitStringAtByteN)
tc = []*testCase{
{"abc123xyz", 0, "", utf8Ellipsis},
{"abc123xyz", 1, "", utf8Ellipsis},
{"abc123xyz", 4, "abc", utf8Ellipsis},
{"啊bc123xyz", 4, "啊bc", utf8Ellipsis},
{"啊bc123xyz", 6, "啊bc12", utf8Ellipsis},
{"啊bc", 3, "啊bc", ""},
{"啊bc", 4, "啊bc", ""},
{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
{"\xef\x03", 1, "\xef\x03", ""},
}
test(tc, SplitStringAtRuneN)
}