Skip to content

Instantly share code, notes, and snippets.

@morgo
Created September 1, 2022 15:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save morgo/8915fee322fd43ec719860340d247465 to your computer and use it in GitHub Desktop.
Save morgo/8915fee322fd43ec719860340d247465 to your computer and use it in GitHub Desktop.
pd fast region transfer (doesn't work yet)
diff --git a/server/cluster/cluster.go b/server/cluster/cluster.go
index 3897f926..efbe7bf0 100644
--- a/server/cluster/cluster.go
+++ b/server/cluster/cluster.go
@@ -49,6 +49,7 @@ import (
"github.com/tikv/pd/server/schedule/checker"
"github.com/tikv/pd/server/schedule/hbstream"
"github.com/tikv/pd/server/schedule/labeler"
+ "github.com/tikv/pd/server/schedule/operator"
"github.com/tikv/pd/server/schedule/placement"
"github.com/tikv/pd/server/schedulers"
"github.com/tikv/pd/server/statistics"
@@ -817,6 +818,9 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error {
c.Unlock()
return err
}
+ if c.core.GetStore(region.GetLeader().GetStoreId()).GetLeaderWeight() == 0 {
+ c.moveRegionFromZeroLeaderStore(region)
+ }
overlaps = c.core.PutRegion(region)
for _, item := range overlaps {
if c.regionStats != nil {
@@ -886,6 +890,38 @@ func (c *RaftCluster) processRegionHeartbeat(region *core.RegionInfo) error {
return nil
}
+// moveRegionFromZeroLeaderStore moves a region from the store with zero leader weight to a new store.
+// it runs at heartbeat time instead of leader balance schedule because it should be run immediately.
+// a common scenario is when placement rules are used and a leader fails. The regions will use raft
+// to elect a new leader, but this election will not consider leader weight. This means that a store
+// that was only intended for quorum could get leaders.
+func (c *RaftCluster) moveRegionFromZeroLeaderStore(region *core.RegionInfo) {
+ var newStoreId uint64
+ for _, p := range region.GetPeers() {
+ if c.core.GetStore(p.GetStoreId()).GetLeaderWeight() == 0 {
+ continue
+ }
+ if c.core.GetStore(p.GetStoreId()).IsDisconnected() {
+ continue
+ }
+ newStoreId = p.GetStoreId()
+ break
+ }
+ if newStoreId > 0 {
+ log.Warn("moving region from zero leader store",
+ zap.Uint64("region-id", region.GetID()),
+ zap.Uint64("store-id", region.GetLeader().GetStoreId()),
+ zap.Uint64("new-store-id", newStoreId))
+ op, err := operator.CreateTransferLeaderOperator("transfer-leader-zero-weight-store", c, region, region.GetLeader().GetStoreId(), newStoreId, []uint64{}, operator.OpAdmin)
+ if err != nil {
+ log.Debug("failed to create transfer leader operator", errs.ZapError(err))
+ }
+ if ok := c.GetOperatorController().AddOperator(op); !ok {
+ log.Warn("fail to add transfer leader operator")
+ }
+ }
+}
+
func (c *RaftCluster) updateStoreStatusLocked(id uint64) {
leaderCount := c.core.GetStoreLeaderCount(id)
regionCount := c.core.GetStoreRegionCount(id)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment