replication: Simplify metrics calculation (#13274)

Also doing some code cleanup
This commit is contained in:
Poorna Krishnamoorthy
2021-09-22 13:48:45 -04:00
committed by GitHub
parent 46724508f8
commit 19ecdc75a8
5 changed files with 24 additions and 54 deletions

View File

@@ -435,13 +435,12 @@ func networkMetricsPrometheus(ch chan<- prometheus.Metric) {
// get the most current of in-memory replication stats and data usage info from crawler.
func getLatestReplicationStats(bucket string, u BucketUsageInfo) (s BucketReplicationStats) {
s = BucketReplicationStats{}
bucketStats := globalNotificationSys.GetClusterBucketStats(GlobalContext, bucket)
// accumulate cluster bucket stats
stats := make(map[string]*BucketReplicationStat)
var totReplicaSize int64
for _, bucketStat := range bucketStats {
s.ReplicaSize += bucketStat.ReplicationStats.ReplicaSize
totReplicaSize += bucketStat.ReplicationStats.ReplicaSize
for arn, stat := range bucketStat.ReplicationStats.Stats {
oldst, ok := stats[arn]
if !ok {
@@ -452,16 +451,13 @@ func getLatestReplicationStats(bucket string, u BucketUsageInfo) (s BucketReplic
FailedSize: stat.FailedSize + oldst.FailedSize,
ReplicatedSize: stat.ReplicatedSize + oldst.ReplicatedSize,
}
s.FailedCount += stats[arn].FailedCount
s.FailedSize += stats[arn].FailedSize
s.ReplicatedSize += stats[arn].ReplicatedSize
}
}
s.Stats = make(map[string]*BucketReplicationStat, len(stats))
// add initial usage stat to cluster stats
usageStat := globalReplicationStats.GetInitialUsage(bucket)
if usageStat.Stats != nil {
totReplicaSize += usageStat.ReplicaSize
for arn, stat := range usageStat.Stats {
st, ok := stats[arn]
if !ok {
@@ -478,34 +474,37 @@ func getLatestReplicationStats(bucket string, u BucketUsageInfo) (s BucketReplic
stats[arn] = st
}
}
s.ReplicaSize += usageStat.ReplicaSize
s = BucketReplicationStats{
Stats: make(map[string]*BucketReplicationStat, len(stats)),
}
var latestTotReplicatedSize int64
for _, st := range u.ReplicationInfo {
latestTotReplicatedSize += int64(st.ReplicatedSize)
}
// normalize computed real time stats with latest usage stat
var usgReplicatedSize int64
for arn, tgtstat := range stats {
st := BucketReplicationStat{}
bu, ok := usageStat.Stats[arn]
if !ok {
bu = &BucketReplicationStat{}
}
usgReplicatedSize += bu.ReplicatedSize
// use in memory replication stats if it is ahead of usage info.
st.ReplicatedSize = bu.ReplicatedSize
if tgtstat.ReplicatedSize >= bu.ReplicatedSize {
st.ReplicatedSize = tgtstat.ReplicatedSize
}
s.ReplicatedSize += st.ReplicatedSize
// Reset FailedSize and FailedCount to 0 for negative overflows which can
// happen since data usage picture can lag behind actual usage state at the time of cluster start
st.FailedSize = int64(math.Max(float64(tgtstat.FailedSize), 0))
st.FailedCount = int64(math.Max(float64(tgtstat.FailedCount), 0))
st.ReplicaSize = int64(math.Max(float64(tgtstat.ReplicaSize), float64(u.ReplicaSize)))
s.Stats[arn] = &st
s.FailedSize += st.FailedSize
s.FailedCount += st.FailedCount
}
// normalize overall stats
s.FailedSize = int64(math.Max(float64(s.FailedSize), 0))
s.FailedCount = int64(math.Max(float64(s.FailedCount), 0))
s.ReplicaSize = int64(math.Max(float64(s.ReplicaSize), float64(u.ReplicaSize)))
s.ReplicatedSize = int64(math.Max(float64(s.ReplicatedSize), float64(usgReplicatedSize)))
s.ReplicaSize = int64(math.Max(float64(totReplicaSize), float64(u.ReplicaSize)))
s.ReplicatedSize = int64(math.Max(float64(s.ReplicatedSize), float64(latestTotReplicatedSize)))
return s
}