From 09663493543a2ce15fc90fbb3808dda5b87335e5 Mon Sep 17 00:00:00 2001 From: Lunny Xiao Date: Wed, 30 Jun 2021 15:23:49 +0800 Subject: [PATCH] Make the github migration less rate limit waiting to get comment per page from repository but not per issue (#16070) * Make the github migration less rate limit waiting to get comment per page from repository but not per issue * Fix lint * adjust Downloader interface * Fix missed reviews * Fix test * Remove unused struct --- modules/migrations/base/downloader.go | 10 ++- modules/migrations/base/null_downloader.go | 9 ++- modules/migrations/base/retry_downloader.go | 7 +- modules/migrations/gitea_downloader.go | 16 ++-- modules/migrations/gitea_downloader_test.go | 4 +- modules/migrations/github.go | 87 ++++++++++++++++++++- modules/migrations/github_test.go | 4 +- modules/migrations/gitlab.go | 7 +- modules/migrations/gitlab_test.go | 4 +- modules/migrations/gogs.go | 7 +- modules/migrations/gogs_test.go | 4 +- modules/migrations/migrate.go | 77 ++++++++++++------ modules/migrations/restore.go | 14 ++-- 13 files changed, 194 insertions(+), 56 deletions(-) diff --git a/modules/migrations/base/downloader.go b/modules/migrations/base/downloader.go index 919f4b52a0..2388b2dd6e 100644 --- a/modules/migrations/base/downloader.go +++ b/modules/migrations/base/downloader.go @@ -11,6 +11,13 @@ import ( "code.gitea.io/gitea/modules/structs" ) +// GetCommentOptions represents an options for get comment +type GetCommentOptions struct { + IssueNumber int64 + Page int + PageSize int +} + // Downloader downloads the site repo informations type Downloader interface { SetContext(context.Context) @@ -20,7 +27,8 @@ type Downloader interface { GetReleases() ([]*Release, error) GetLabels() ([]*Label, error) GetIssues(page, perPage int) ([]*Issue, bool, error) - GetComments(issueNumber int64) ([]*Comment, error) + GetComments(opts GetCommentOptions) ([]*Comment, bool, error) + SupportGetRepoComments() bool GetPullRequests(page, perPage int) ([]*PullRequest, bool, error) GetReviews(pullRequestNumber int64) ([]*Review, error) FormatCloneURL(opts MigrateOptions, remoteAddr string) (string, error) diff --git a/modules/migrations/base/null_downloader.go b/modules/migrations/base/null_downloader.go index a93c20339b..53a536709d 100644 --- a/modules/migrations/base/null_downloader.go +++ b/modules/migrations/base/null_downloader.go @@ -51,8 +51,8 @@ func (n NullDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) { } // GetComments returns comments according issueNumber -func (n NullDownloader) GetComments(issueNumber int64) ([]*Comment, error) { - return nil, &ErrNotSupported{Entity: "Comments"} +func (n NullDownloader) GetComments(GetCommentOptions) ([]*Comment, bool, error) { + return nil, false, &ErrNotSupported{Entity: "Comments"} } // GetPullRequests returns pull requests according page and perPage @@ -80,3 +80,8 @@ func (n NullDownloader) FormatCloneURL(opts MigrateOptions, remoteAddr string) ( } return remoteAddr, nil } + +// SupportGetRepoComments return true if it supports get repo comments +func (n NullDownloader) SupportGetRepoComments() bool { + return false +} diff --git a/modules/migrations/base/retry_downloader.go b/modules/migrations/base/retry_downloader.go index 82a038b98b..e6c80038f1 100644 --- a/modules/migrations/base/retry_downloader.go +++ b/modules/migrations/base/retry_downloader.go @@ -150,18 +150,19 @@ func (d *RetryDownloader) GetIssues(page, perPage int) ([]*Issue, bool, error) { } // GetComments returns a repository's comments with retry -func (d *RetryDownloader) GetComments(issueNumber int64) ([]*Comment, error) { +func (d *RetryDownloader) GetComments(opts GetCommentOptions) ([]*Comment, bool, error) { var ( comments []*Comment + isEnd bool err error ) err = d.retry(func() error { - comments, err = d.Downloader.GetComments(issueNumber) + comments, isEnd, err = d.Downloader.GetComments(opts) return err }) - return comments, err + return comments, isEnd, err } // GetPullRequests returns a repository's pull requests with retry diff --git a/modules/migrations/gitea_downloader.go b/modules/migrations/gitea_downloader.go index 40820ae375..665466ffef 100644 --- a/modules/migrations/gitea_downloader.go +++ b/modules/migrations/gitea_downloader.go @@ -435,37 +435,37 @@ func (g *GiteaDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, err } // GetComments returns comments according issueNumber -func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) { +func (g *GiteaDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) { var allComments = make([]*base.Comment, 0, g.maxPerPage) // for i := 1; ; i++ { // make sure gitea can shutdown gracefully select { case <-g.ctx.Done(): - return nil, nil + return nil, false, nil default: } - comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, index, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{ + comments, _, err := g.client.ListIssueComments(g.repoOwner, g.repoName, opts.IssueNumber, gitea_sdk.ListIssueCommentOptions{ListOptions: gitea_sdk.ListOptions{ // PageSize: g.maxPerPage, // Page: i, }}) if err != nil { - return nil, fmt.Errorf("error while listing comments for issue #%d. Error: %v", index, err) + return nil, false, fmt.Errorf("error while listing comments for issue #%d. Error: %v", opts.IssueNumber, err) } for _, comment := range comments { reactions, err := g.getCommentReactions(comment.ID) if err != nil { - log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err) + log.Warn("Unable to load comment reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err) if err2 := models.CreateRepositoryNotice( - fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", index, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil { + fmt.Sprintf("Unable to load reactions during migrating issue #%d for comment %d to %s/%s. Error: %v", opts.IssueNumber, comment.ID, g.repoOwner, g.repoName, err)); err2 != nil { log.Error("create repository notice failed: ", err2) } } allComments = append(allComments, &base.Comment{ - IssueIndex: index, + IssueIndex: opts.IssueNumber, PosterID: comment.Poster.ID, PosterName: comment.Poster.UserName, PosterEmail: comment.Poster.Email, @@ -481,7 +481,7 @@ func (g *GiteaDownloader) GetComments(index int64) ([]*base.Comment, error) { // break // } //} - return allComments, nil + return allComments, true, nil } // GetPullRequests returns pull requests according page and perPage diff --git a/modules/migrations/gitea_downloader_test.go b/modules/migrations/gitea_downloader_test.go index babf038280..f62b19897c 100644 --- a/modules/migrations/gitea_downloader_test.go +++ b/modules/migrations/gitea_downloader_test.go @@ -224,7 +224,9 @@ func TestGiteaDownloadRepo(t *testing.T) { Closed: &closed2, }, issues[1]) - comments, err := downloader.GetComments(4) + comments, _, err := downloader.GetComments(base.GetCommentOptions{ + IssueNumber: 4, + }) assert.NoError(t, err) assert.Len(t, comments, 2) assert.EqualValues(t, 1598975370, comments[0].Created.Unix()) diff --git a/modules/migrations/github.go b/modules/migrations/github.go index 8a3f5d34c7..9b897662d0 100644 --- a/modules/migrations/github.go +++ b/modules/migrations/github.go @@ -11,6 +11,7 @@ import ( "io" "net/http" "net/url" + "strconv" "strings" "time" @@ -450,8 +451,22 @@ func (g *GithubDownloaderV3) GetIssues(page, perPage int) ([]*base.Issue, bool, return allIssues, len(issues) < perPage, nil } +// SupportGetRepoComments return true if it supports get repo comments +func (g *GithubDownloaderV3) SupportGetRepoComments() bool { + return true +} + // GetComments returns comments according issueNumber -func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, error) { +func (g *GithubDownloaderV3) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) { + if opts.IssueNumber > 0 { + comments, err := g.getComments(opts.IssueNumber) + return comments, false, err + } + + return g.GetAllComments(opts.Page, opts.PageSize) +} + +func (g *GithubDownloaderV3) getComments(issueNumber int64) ([]*base.Comment, error) { var ( allComments = make([]*base.Comment, 0, g.maxPerPage) created = "created" @@ -519,6 +534,75 @@ func (g *GithubDownloaderV3) GetComments(issueNumber int64) ([]*base.Comment, er return allComments, nil } +// GetAllComments returns repository comments according page and perPageSize +func (g *GithubDownloaderV3) GetAllComments(page, perPage int) ([]*base.Comment, bool, error) { + var ( + allComments = make([]*base.Comment, 0, perPage) + created = "created" + asc = "asc" + ) + opt := &github.IssueListCommentsOptions{ + Sort: &created, + Direction: &asc, + ListOptions: github.ListOptions{ + Page: page, + PerPage: perPage, + }, + } + + g.sleep() + comments, resp, err := g.client.Issues.ListComments(g.ctx, g.repoOwner, g.repoName, 0, opt) + if err != nil { + return nil, false, fmt.Errorf("error while listing repos: %v", err) + } + log.Trace("Request get comments %d/%d, but in fact get %d", perPage, page, len(comments)) + g.rate = &resp.Rate + for _, comment := range comments { + var email string + if comment.User.Email != nil { + email = *comment.User.Email + } + + // get reactions + var reactions []*base.Reaction + for i := 1; ; i++ { + g.sleep() + res, resp, err := g.client.Reactions.ListIssueCommentReactions(g.ctx, g.repoOwner, g.repoName, comment.GetID(), &github.ListOptions{ + Page: i, + PerPage: g.maxPerPage, + }) + if err != nil { + return nil, false, err + } + g.rate = &resp.Rate + if len(res) == 0 { + break + } + for _, reaction := range res { + reactions = append(reactions, &base.Reaction{ + UserID: reaction.User.GetID(), + UserName: reaction.User.GetLogin(), + Content: reaction.GetContent(), + }) + } + } + idx := strings.LastIndex(*comment.IssueURL, "/") + issueIndex, _ := strconv.ParseInt((*comment.IssueURL)[idx+1:], 10, 64) + allComments = append(allComments, &base.Comment{ + IssueIndex: issueIndex, + PosterID: *comment.User.ID, + PosterName: *comment.User.Login, + PosterEmail: email, + Content: *comment.Body, + Created: *comment.CreatedAt, + Updated: *comment.UpdatedAt, + Reactions: reactions, + }) + } + + return allComments, len(allComments) < perPage, nil +} + // GetPullRequests returns pull requests according page and perPage func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullRequest, bool, error) { if perPage > g.maxPerPage { @@ -539,6 +623,7 @@ func (g *GithubDownloaderV3) GetPullRequests(page, perPage int) ([]*base.PullReq if err != nil { return nil, false, fmt.Errorf("error while listing repos: %v", err) } + log.Trace("Request get pull requests %d/%d, but in fact get %d", perPage, page, len(prs)) g.rate = &resp.Rate for _, pr := range prs { var body string diff --git a/modules/migrations/github_test.go b/modules/migrations/github_test.go index 5bd980a9d8..e0ee2fea84 100644 --- a/modules/migrations/github_test.go +++ b/modules/migrations/github_test.go @@ -240,7 +240,9 @@ func TestGitHubDownloadRepo(t *testing.T) { }, issues) // downloader.GetComments() - comments, err := downloader.GetComments(2) + comments, _, err := downloader.GetComments(base.GetCommentOptions{ + IssueNumber: 2, + }) assert.NoError(t, err) assert.Len(t, comments, 2) assert.EqualValues(t, []*base.Comment{ diff --git a/modules/migrations/gitlab.go b/modules/migrations/gitlab.go index a697075ff8..c83989f771 100644 --- a/modules/migrations/gitlab.go +++ b/modules/migrations/gitlab.go @@ -430,7 +430,8 @@ func (g *GitlabDownloader) GetIssues(page, perPage int) ([]*base.Issue, bool, er // GetComments returns comments according issueNumber // TODO: figure out how to transfer comment reactions -func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) { +func (g *GitlabDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) { + var issueNumber = opts.IssueNumber var allComments = make([]*base.Comment, 0, g.maxPerPage) var page = 1 @@ -457,7 +458,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro } if err != nil { - return nil, fmt.Errorf("error while listing comments: %v %v", g.repoID, err) + return nil, false, fmt.Errorf("error while listing comments: %v %v", g.repoID, err) } for _, comment := range comments { // Flatten comment threads @@ -490,7 +491,7 @@ func (g *GitlabDownloader) GetComments(issueNumber int64) ([]*base.Comment, erro } page = resp.NextPage } - return allComments, nil + return allComments, true, nil } // GetPullRequests returns pull requests according page and perPage diff --git a/modules/migrations/gitlab_test.go b/modules/migrations/gitlab_test.go index 32ed6d807a..6a77ff3c23 100644 --- a/modules/migrations/gitlab_test.go +++ b/modules/migrations/gitlab_test.go @@ -204,7 +204,9 @@ func TestGitlabDownloadRepo(t *testing.T) { }, }, issues) - comments, err := downloader.GetComments(2) + comments, _, err := downloader.GetComments(base.GetCommentOptions{ + IssueNumber: 2, + }) assert.NoError(t, err) assert.Len(t, comments, 4) assert.EqualValues(t, []*base.Comment{ diff --git a/modules/migrations/gogs.go b/modules/migrations/gogs.go index b616907938..d689b0da11 100644 --- a/modules/migrations/gogs.go +++ b/modules/migrations/gogs.go @@ -227,12 +227,13 @@ func (g *GogsDownloader) getIssues(page int, state string) ([]*base.Issue, bool, } // GetComments returns comments according issueNumber -func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) { +func (g *GogsDownloader) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) { + var issueNumber = opts.IssueNumber var allComments = make([]*base.Comment, 0, 100) comments, err := g.client.ListIssueComments(g.repoOwner, g.repoName, issueNumber) if err != nil { - return nil, fmt.Errorf("error while listing repos: %v", err) + return nil, false, fmt.Errorf("error while listing repos: %v", err) } for _, comment := range comments { if len(comment.Body) == 0 || comment.Poster == nil { @@ -249,7 +250,7 @@ func (g *GogsDownloader) GetComments(issueNumber int64) ([]*base.Comment, error) }) } - return allComments, nil + return allComments, true, nil } // GetTopics return repository topics diff --git a/modules/migrations/gogs_test.go b/modules/migrations/gogs_test.go index d173952b06..4e384036d7 100644 --- a/modules/migrations/gogs_test.go +++ b/modules/migrations/gogs_test.go @@ -103,7 +103,9 @@ func TestGogsDownloadRepo(t *testing.T) { }, issues) // downloader.GetComments() - comments, err := downloader.GetComments(1) + comments, _, err := downloader.GetComments(base.GetCommentOptions{ + IssueNumber: 1, + }) assert.NoError(t, err) assert.Len(t, comments, 1) assert.EqualValues(t, []*base.Comment{ diff --git a/modules/migrations/migrate.go b/modules/migrations/migrate.go index 3cdf68ab62..0a507d9c33 100644 --- a/modules/migrations/migrate.go +++ b/modules/migrations/migrate.go @@ -292,6 +292,8 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts reviewBatchSize = uploader.MaxBatchInsertSize("review") ) + supportAllComments := downloader.SupportGetRepoComments() + if opts.Issues { log.Trace("migrating issues and comments") messenger("repo.migrate.migrating_issues") @@ -311,11 +313,13 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts return err } - if opts.Comments { + if opts.Comments && !supportAllComments { var allComments = make([]*base.Comment, 0, commentBatchSize) for _, issue := range issues { log.Trace("migrating issue %d's comments", issue.Number) - comments, err := downloader.GetComments(issue.Number) + comments, _, err := downloader.GetComments(base.GetCommentOptions{ + IssueNumber: issue.Number, + }) if err != nil { if !base.IsErrNotSupported(err) { return err @@ -366,30 +370,34 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts } if opts.Comments { - // plain comments - var allComments = make([]*base.Comment, 0, commentBatchSize) - for _, pr := range prs { - log.Trace("migrating pull request %d's comments", pr.Number) - comments, err := downloader.GetComments(pr.Number) - if err != nil { - if !base.IsErrNotSupported(err) { + if !supportAllComments { + // plain comments + var allComments = make([]*base.Comment, 0, commentBatchSize) + for _, pr := range prs { + log.Trace("migrating pull request %d's comments", pr.Number) + comments, _, err := downloader.GetComments(base.GetCommentOptions{ + IssueNumber: pr.Number, + }) + if err != nil { + if !base.IsErrNotSupported(err) { + return err + } + log.Warn("migrating comments is not supported, ignored") + } + + allComments = append(allComments, comments...) + + if len(allComments) >= commentBatchSize { + if err = uploader.CreateComments(allComments[:commentBatchSize]...); err != nil { + return err + } + allComments = allComments[commentBatchSize:] + } + } + if len(allComments) > 0 { + if err = uploader.CreateComments(allComments...); err != nil { return err } - log.Warn("migrating comments is not supported, ignored") - } - - allComments = append(allComments, comments...) - - if len(allComments) >= commentBatchSize { - if err = uploader.CreateComments(allComments[:commentBatchSize]...); err != nil { - return err - } - allComments = allComments[commentBatchSize:] - } - } - if len(allComments) > 0 { - if err = uploader.CreateComments(allComments...); err != nil { - return err } } @@ -439,6 +447,27 @@ func migrateRepository(downloader base.Downloader, uploader base.Uploader, opts } } + if opts.Comments && supportAllComments { + log.Trace("migrating comments") + for i := 1; ; i++ { + comments, isEnd, err := downloader.GetComments(base.GetCommentOptions{ + Page: i, + PageSize: commentBatchSize, + }) + if err != nil { + return err + } + + if err := uploader.CreateComments(comments...); err != nil { + return err + } + + if isEnd { + break + } + } + } + return uploader.Finish() } diff --git a/modules/migrations/restore.go b/modules/migrations/restore.go index 5b44811d52..6177f80cbb 100644 --- a/modules/migrations/restore.go +++ b/modules/migrations/restore.go @@ -212,27 +212,27 @@ func (r *RepositoryRestorer) GetIssues(page, perPage int) ([]*base.Issue, bool, } // GetComments returns comments according issueNumber -func (r *RepositoryRestorer) GetComments(issueNumber int64) ([]*base.Comment, error) { +func (r *RepositoryRestorer) GetComments(opts base.GetCommentOptions) ([]*base.Comment, bool, error) { var comments = make([]*base.Comment, 0, 10) - p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", issueNumber)) + p := filepath.Join(r.commentDir(), fmt.Sprintf("%d.yml", opts.IssueNumber)) _, err := os.Stat(p) if err != nil { if os.IsNotExist(err) { - return nil, nil + return nil, false, nil } - return nil, err + return nil, false, err } bs, err := ioutil.ReadFile(p) if err != nil { - return nil, err + return nil, false, err } err = yaml.Unmarshal(bs, &comments) if err != nil { - return nil, err + return nil, false, err } - return comments, nil + return comments, false, nil } // GetPullRequests returns pull requests according page and perPage