diff --git a/dtmsvr/api_http.go b/dtmsvr/api_http.go index 6027100..2d8dea0 100644 --- a/dtmsvr/api_http.go +++ b/dtmsvr/api_http.go @@ -8,6 +8,8 @@ package dtmsvr import ( "errors" + "strconv" + "time" "github.com/dtm-labs/dtm/dtmcli" "github.com/dtm-labs/dtm/dtmcli/dtmimp" @@ -26,6 +28,7 @@ func addRoute(engine *gin.Engine) { engine.POST("/api/dtmsvr/registerTccBranch", dtmutil.WrapHandler2(registerBranch)) // compatible for old sdk engine.GET("/api/dtmsvr/query", dtmutil.WrapHandler2(query)) engine.GET("/api/dtmsvr/all", dtmutil.WrapHandler2(all)) + engine.GET("/api/dtmsvr/resetCronTime", dtmutil.WrapHandler2(resetCronTime)) // add prometheus exporter h := promhttp.Handler() @@ -75,7 +78,21 @@ func query(c *gin.Context) interface{} { func all(c *gin.Context) interface{} { position := c.Query("position") - slimit := dtmimp.OrString(c.Query("limit"), "100") - globals := GetStore().ScanTransGlobalStores(&position, int64(dtmimp.MustAtoi(slimit))) + sLimit := dtmimp.OrString(c.Query("limit"), "100") + globals := GetStore().ScanTransGlobalStores(&position, int64(dtmimp.MustAtoi(sLimit))) return map[string]interface{}{"transactions": globals, "next_position": position} } + +// resetCronTime rest nextCronTime +// Prevent multiple backoff from causing NextCronTime to be too long +func resetCronTime(c *gin.Context) interface{} { + sTimeoutSecond := dtmimp.OrString(c.Query("timeout"), strconv.FormatInt(3*conf.TimeoutToFail, 10)) + sLimit := dtmimp.OrString(c.Query("limit"), "100") + timeout := time.Duration(dtmimp.MustAtoi(sTimeoutSecond)) * time.Second + + succeedCount, hasRemaining, err := GetStore().ResetCronTime(timeout, int64(dtmimp.MustAtoi(sLimit))) + if err != nil { + return err + } + return map[string]interface{}{"has_remaining": hasRemaining, "succeed_count": succeedCount} +} diff --git a/dtmsvr/storage/boltdb/boltdb.go b/dtmsvr/storage/boltdb/boltdb.go index 278e898..21abfb1 100644 --- a/dtmsvr/storage/boltdb/boltdb.go +++ b/dtmsvr/storage/boltdb/boltdb.go @@ -11,13 +11,12 @@ import ( "strings" "time" - bolt "go.etcd.io/bbolt" - "github.com/dtm-labs/dtm/dtmcli" "github.com/dtm-labs/dtm/dtmcli/dtmimp" "github.com/dtm-labs/dtm/dtmcli/logger" "github.com/dtm-labs/dtm/dtmsvr/storage" "github.com/dtm-labs/dtm/dtmutil" + bolt "go.etcd.io/bbolt" ) // Store implements storage.Store, and storage with boltdb @@ -409,3 +408,36 @@ func (s *Store) LockOneGlobalTrans(expireIn time.Duration) *storage.TransGlobalS dtmimp.E2P(err) return trans } + +// ResetCronTime rest nextCronTime +// Prevent multiple backoff from causing NextCronTime to be too long +func (s *Store) ResetCronTime(timeout time.Duration, limit int64) (succeedCount int64, hasRemaining bool, err error) { + next := time.Now() + var trans *storage.TransGlobalStore + min := fmt.Sprintf("%d", time.Now().Add(timeout).Unix()) + err = s.boltDb.Update(func(t *bolt.Tx) error { + cursor := t.Bucket(bucketIndex).Cursor() + succeedCount = 0 + for k, v := cursor.Seek([]byte(min)); k != nil && succeedCount <= limit; k, v = cursor.Next() { + if succeedCount == limit { + hasRemaining = true + break + } + + trans = tGetGlobal(t, string(v)) + err := t.Bucket(bucketIndex).Delete(k) + dtmimp.E2P(err) + + if trans.Status == dtmcli.StatusSucceed || trans.Status == dtmcli.StatusFailed { + continue + } + + trans.NextCronTime = &next + tPutGlobal(t, trans) + tPutIndex(t, next.Unix(), trans.Gid) + succeedCount++ + } + return nil + }) + return +} diff --git a/dtmsvr/storage/redis/redis.go b/dtmsvr/storage/redis/redis.go index 0f7e965..0dcf992 100644 --- a/dtmsvr/storage/redis/redis.go +++ b/dtmsvr/storage/redis/redis.go @@ -260,6 +260,38 @@ return gid } } +// ResetCronTime rest nextCronTime +// Prevent multiple backoff from causing NextCronTime to be too long +func (s *Store) ResetCronTime(timeout time.Duration, limit int64) (succeedCount int64, hasRemaining bool, err error) { + next := time.Now().Unix() + timeoutTimestamp := time.Now().Add(timeout).Unix() + args := newArgList().AppendGid("").AppendRaw(timeoutTimestamp).AppendRaw(next).AppendRaw(limit) + lua := `-- ResetCronTime +local r = redis.call('ZRANGEBYSCORE', KEYS[3], ARGV[3], '+inf', 'LIMIT', 0, ARGV[5]+1) +local i = 0 +for score,gid in pairs(r) do + if i == tonumber(ARGV[5]) then + i = i + 1 + break + end + redis.call('ZADD', KEYS[3], ARGV[4], gid) + i = i + 1 +end +return tostring(i) +` + r := "" + r, err = callLua(args, lua) + if err != nil { + return + } + succeedCount = int64(dtmimp.MustAtoi(r)) + if succeedCount > limit { + hasRemaining = true + succeedCount = limit + } + return +} + // TouchCronTime updates cronTime func (s *Store) TouchCronTime(global *storage.TransGlobalStore, nextCronInterval int64, nextCronTime *time.Time) { global.UpdateTime = dtmutil.GetNextTime(0) diff --git a/dtmsvr/storage/sql/sql.go b/dtmsvr/storage/sql/sql.go index 6c90822..566d2f8 100644 --- a/dtmsvr/storage/sql/sql.go +++ b/dtmsvr/storage/sql/sql.go @@ -5,14 +5,13 @@ import ( "math" "time" - "github.com/lithammer/shortuuid/v3" - "gorm.io/gorm" - "gorm.io/gorm/clause" - "github.com/dtm-labs/dtm/dtmcli/dtmimp" "github.com/dtm-labs/dtm/dtmsvr/config" "github.com/dtm-labs/dtm/dtmsvr/storage" "github.com/dtm-labs/dtm/dtmutil" + "github.com/lithammer/shortuuid/v3" + "gorm.io/gorm" + "gorm.io/gorm/clause" ) var conf = &config.Config @@ -157,6 +156,38 @@ func (s *Store) LockOneGlobalTrans(expireIn time.Duration) *storage.TransGlobalS return global } +// ResetCronTime rest nextCronTime +// Prevent multiple backoff from causing NextCronTime to be too long +func (s *Store) ResetCronTime(timeout time.Duration, limit int64) (succeedCount int64, hasRemaining bool, err error) { + db := dbGet() + getTime := func(second int) string { + return map[string]string{ + "mysql": fmt.Sprintf("date_add(now(), interval %d second)", second), + "postgres": fmt.Sprintf("current_timestamp + interval '%d second'", second), + }[conf.Store.Driver] + } + timeoutSecond := int(timeout / time.Second) + whereTime := fmt.Sprintf("next_cron_time > %s", getTime(timeoutSecond)) + global := &storage.TransGlobalStore{} + dbr := db.Must().Model(global). + Where(whereTime + "and status in ('prepared', 'aborting', 'submitted')"). + Limit(int(limit)). + Select([]string{"next_cron_time"}). + Updates(&storage.TransGlobalStore{ + NextCronTime: dtmutil.GetNextTime(0), + }) + succeedCount = dbr.RowsAffected + if succeedCount == limit { + var count int64 + db.Must().Model(global).Where(whereTime + "and status in ('prepared', 'aborting', 'submitted')").Limit(1).Count(&count) + if count > 0 { + hasRemaining = true + } + } + + return succeedCount, hasRemaining, dbr.Error +} + // SetDBConn sets db conn pool func SetDBConn(db *gorm.DB) { sqldb, _ := db.DB() diff --git a/dtmsvr/storage/store.go b/dtmsvr/storage/store.go index a03a912..391c4dd 100644 --- a/dtmsvr/storage/store.go +++ b/dtmsvr/storage/store.go @@ -30,4 +30,5 @@ type Store interface { ChangeGlobalStatus(global *TransGlobalStore, newStatus string, updates []string, finished bool) TouchCronTime(global *TransGlobalStore, nextCronInterval int64, nextCronTime *time.Time) LockOneGlobalTrans(expireIn time.Duration) *TransGlobalStore + ResetCronTime(timeout time.Duration, limit int64) (succeedCount int64, hasRemaining bool, err error) } diff --git a/test/api_test.go b/test/api_test.go index c864101..54e869d 100644 --- a/test/api_test.go +++ b/test/api_test.go @@ -8,6 +8,7 @@ package test import ( "fmt" + "strconv" "testing" "github.com/dtm-labs/dtm/dtmcli/dtmimp" @@ -79,3 +80,23 @@ func TestDtmMetrics(t *testing.T) { assert.Nil(t, err) assert.Equal(t, rest.StatusCode(), 200) } + +func TestAPIResetCronTime(t *testing.T) { + testStoreResetCronTime(t, dtmimp.GetFuncName(), func(timeout int64, limit int64) (int64, bool, error) { + sTimeout := strconv.FormatInt(timeout, 10) + sLimit := strconv.FormatInt(limit, 10) + + resp, err := dtmimp.RestyClient.R().SetQueryParams(map[string]string{ + "timeout": sTimeout, + "limit": sLimit, + }).Get(dtmutil.DefaultHTTPServer + "/resetCronTime") + + m := map[string]interface{}{} + dtmimp.MustUnmarshalString(resp.String(), &m) + hasRemaining, ok := m["has_remaining"].(bool) + assert.Equal(t, ok, true) + succeedCount, ok := m["succeed_count"].(float64) + assert.Equal(t, ok, true) + return int64(succeedCount), hasRemaining, err + }) +} diff --git a/test/store_test.go b/test/store_test.go index 46869e0..b9bb31e 100644 --- a/test/store_test.go +++ b/test/store_test.go @@ -1,6 +1,7 @@ package test import ( + "fmt" "testing" "time" @@ -13,6 +14,10 @@ import ( func initTransGlobal(gid string) (*storage.TransGlobalStore, storage.Store) { next := time.Now().Add(10 * time.Second) + return initTransGlobalByNextCronTime(gid, next) +} + +func initTransGlobalByNextCronTime(gid string, next time.Time) (*storage.TransGlobalStore, storage.Store) { g := &storage.TransGlobalStore{Gid: gid, Status: "prepared", NextCronTime: &next} bs := []storage.TransBranchStore{ {Gid: gid, BranchID: "01"}, @@ -88,6 +93,85 @@ func TestStoreLockTrans(t *testing.T) { assert.Nil(t, g2) } +func TestStoreResetCronTime(t *testing.T) { + s := registry.GetStore() + testStoreResetCronTime(t, dtmimp.GetFuncName(), func(timeout int64, limit int64) (int64, bool, error) { + return s.ResetCronTime(time.Duration(timeout)*time.Second, limit) + }) +} + +func testStoreResetCronTime(t *testing.T, funcName string, restCronHandler func(expire int64, limit int64) (int64, bool, error)) { + s := registry.GetStore() + var restTimeTimeout, lockExpireIn, limit, i int64 + restTimeTimeout = 100 //The time that will be ResetCronTime + lockExpireIn = 2 //The time that will be LockOneGlobalTrans + limit = 10 // rest limit + + // Will be reset + for i = 0; i < limit; i++ { + gid := funcName + fmt.Sprintf("%d", i) + _, _ = initTransGlobalByNextCronTime(gid, time.Now().Add(time.Duration(restTimeTimeout+10)*time.Second)) + } + + // Will not be reset + gid := funcName + fmt.Sprintf("%d", 10) + _, _ = initTransGlobalByNextCronTime(gid, time.Now().Add(time.Duration(restTimeTimeout-10)*time.Second)) + + // Not Fount + g := s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.Nil(t, g) + + // Rest limit-1 count + succeedCount, hasRemaining, err := restCronHandler(restTimeTimeout, limit-1) + assert.Equal(t, hasRemaining, true) + assert.Equal(t, succeedCount, limit-1) + assert.Nil(t, err) + // Fount limit-1 count + for i = 0; i < limit-1; i++ { + g = s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.NotNil(t, g) + s.ChangeGlobalStatus(g, "succeed", []string{}, true) + } + + // Not Fount + g = s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.Nil(t, g) + + // Rest 1 count + succeedCount, hasRemaining, err = restCronHandler(restTimeTimeout, limit) + assert.Equal(t, hasRemaining, false) + assert.Equal(t, succeedCount, int64(1)) + assert.Nil(t, err) + // Fount 1 count + g = s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.NotNil(t, g) + s.ChangeGlobalStatus(g, "succeed", []string{}, true) + + // Not Fount + g = s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.Nil(t, g) + + // reduce the restTimeTimeout, Rest 1 count + succeedCount, hasRemaining, err = restCronHandler(restTimeTimeout-12, limit) + assert.Equal(t, hasRemaining, false) + assert.Equal(t, succeedCount, int64(1)) + assert.Nil(t, err) + // Fount 1 count + g = s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.NotNil(t, g) + s.ChangeGlobalStatus(g, "succeed", []string{}, true) + + // Not Fount + g = s.LockOneGlobalTrans(time.Duration(lockExpireIn) * time.Second) + assert.Nil(t, g) + + // Not Fount + succeedCount, hasRemaining, err = restCronHandler(restTimeTimeout-12, limit) + assert.Equal(t, hasRemaining, false) + assert.Equal(t, succeedCount, int64(0)) + assert.Nil(t, err) +} + func TestUpdateBranches(t *testing.T) { if !conf.Store.IsDB() { _, err := registry.GetStore().UpdateBranches(nil, nil)