Ceph的数据分布是由CRUSH Map解决定的,而CRUSH算法是伪HASH的,所以在一定时间内,数据会存在倾斜,这就需要我们用脚本来进行定期的reweight:
#author jesse.js.lyu@gmail.com
#reweight for ceph osds
import hashlib
import commands
import threading
import sys
from time import ctime,sleep
import urllib2
import json
def doReweight(osdMaxUtilId, osdTargetReweight):
print 'ceph osd reweight ' + str(osdMaxUtilId) + ' ' + str(osdTargetReweight)
(status,output) = commands.getstatusoutput('ceph osd reweight '+ str(osdMaxUtilId) +' ' + str(osdTargetReweight))
print status,output
def canOSDReweight():
#
canDoReweight = False
pgStateIsOk = False
osdUtilDiffEnough = False
#determine pg status
(status,output) = commands.getstatusoutput('ceph -s --format=json-pretty')
pgMapJson = json.loads(output)
#print pgMapJson['pgmap']['pgs_by_state']
numPGS = pgMapJson['pgmap']['num_pgs']
for pgState in pgMapJson['pgmap']['pgs_by_state']:
if pgState['state_name'] == 'active+clean' and pgState['count'] == numPGS:
pgStateIsOk = True
if pgStateIsOk == False:
print "pg is reweighting or can't do reweight right now...."
exit
#determine OSD util diff
(status,output) = commands.getstatusoutput('ceph osd df --format=json-pretty')
#print status,output
osdMaxUtil=0
osdMaxUtilId=-1
osdMaxReweight=1
osdMinUtil=100
osdMinUtilId=-1
osdReweightStep=0.01
if status == 0:
osdDictJson = json.loads(output)
for node in osdDictJson['nodes']:
if node['utilization'] > osdMaxUtil:
osdMaxUtilId = node['id']
osdMaxUtil = node['utilization']
osdMaxReweight = node['reweight']
if node['utilization'] < osdMinUtil:
osdMinUtilId = node['id']
osdMinUtil = node['utilization']
osdTargetReweight = osdMaxReweight - osdReweightStep
osdUtilDiff = (osdMaxUtil - osdMinUtil)/osdMinUtil*100
if osdUtilDiff > 10:
osdUtilDiffEnough = True
print "Max and Min OSD's utilization diff is " + str(osdUtilDiff)
else:
print "Max and Min OSD's utilization diff is " + str(osdUtilDiff) + ",less then 10%, give up..."
exit
if pgStateIsOk == True and osdUtilDiffEnough == True and osdTargetReweight > 0:
print "================ doing reweight ==============="
print osdMaxUtilId,osdMaxUtil,osdMaxReweight
print osdMinUtilId,osdMinUtil
doReweight(osdMaxUtilId, osdTargetReweight)
def invokeOSDReweight():
while True:
sleep(30)
canOSDReweight()
if __name__ == '__main__':
invokeOSDReweight()