Skip to content

Instantly share code, notes, and snippets.

@pcd1193182
Last active May 21, 2021 20:01
Show Gist options
  • Save pcd1193182/5c35b1bc8bf084b582703ab7b17c1adf to your computer and use it in GitHub Desktop.
Save pcd1193182/5c35b1bc8bf084b582703ab7b17c1adf to your computer and use it in GitHub Desktop.
Object Storage MMP Pseudocode
heartbeat_interval = 1 second
lease_duration = 10 seconds
agent_id_path = /run/zfs_agent_id
heartbeat_thread(s3):
file = open(agent_id_path)
if !file.exists:
file.create()
file.write(random_guid())
else:
guid = file.read()
existing = s3.read("zfs/agents/{}" % guid)
hiccup = false
while existing == SUCCESS:
result = s3.write("zfs/agents/{}" % guid,
{"timestamp" : now(),
"hostname" : get_hostname(),
"addr" : get_ip_address(),
"lease_duration" : lease_duration
}).timeout(2 * heartbeat_interval)
if result == SUCCESS:
if now() - existing.data.timestamp < lease_duration:
hiccup = true
break
else:
break
if not hiccup:
file.write(random_guid())
guid = file.read()
last_heartbeat = INT64_MAX
while true:
result = s3.write("zfs/agents/{}" % guid,
{"timestamp" : now(),
"hostname" : get_hostname(),
"addr" : get_ip_address(),
"lease_duration" : lease_duration
}).timeout(2 * heartbeat_interval)
if now() - last_heartbeat > lease_duration:
suspend_pools() // will also either exit or soft reset the agent
if result == SUCCESS:
last_heartbeat = now()
sleep(heartbeat_interval)
import_pool_claim_ownership(s3, guid, self_id):
pool_object = "zfs/{}/owner" % guid
self_object = "zfs/agents/{}" % self_id
result = s3.read(pool_object)
duration = result.duration
if result == SUCCESS:
result2 = s3.read(result.data.owner)
duration += result2.duration
if result2 == SUCCESS:
if result.data.owner == self_object:
return SUCCESS
sleep(result2.data.lease_duration * 2)
result3 = s3.read(result.data.owner)
if result3 == SUCCESS and result3.data.timestamp != result2.data.timestamp:
return FAILURE
result4 = s3.read(pool_object)
if result4 == SUCCESS and result4.data.owner != result.data.owner:
return FAILURE
duration = result4.duration
if duration > 2 seconds:
return RETRY
result5 = s3.write(pool_object, {"owner" : self_object).timeout(2 seconds - duration)
/*
* If the write takes too long here, we could race with another system importing
* the pool who is already sleeping below. To avoid that, we time out the write
* after a reasonable window and retry the whole process.
*/
if result5 == TIMEOUT:
return RETRY
/*
* The time is flexible here, just needs to be long enough that we can be pretty
* confident any other contender for the pool would have had time to finish issuing
* a GET and PUT request.
*/
sleep(4 seconds)
result6 = s3.read(pool_object)
if result6 == FAILURE or result6.data.owner != self_object:
return FAILURE
return SUCCESS
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment