Last active
May 21, 2021 20:01
-
-
Save pcd1193182/5c35b1bc8bf084b582703ab7b17c1adf to your computer and use it in GitHub Desktop.
Object Storage MMP Pseudocode
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
heartbeat_interval = 1 second | |
lease_duration = 10 seconds | |
agent_id_path = /run/zfs_agent_id | |
heartbeat_thread(s3): | |
file = open(agent_id_path) | |
if !file.exists: | |
file.create() | |
file.write(random_guid()) | |
else: | |
guid = file.read() | |
existing = s3.read("zfs/agents/{}" % guid) | |
hiccup = false | |
while existing == SUCCESS: | |
result = s3.write("zfs/agents/{}" % guid, | |
{"timestamp" : now(), | |
"hostname" : get_hostname(), | |
"addr" : get_ip_address(), | |
"lease_duration" : lease_duration | |
}).timeout(2 * heartbeat_interval) | |
if result == SUCCESS: | |
if now() - existing.data.timestamp < lease_duration: | |
hiccup = true | |
break | |
else: | |
break | |
if not hiccup: | |
file.write(random_guid()) | |
guid = file.read() | |
last_heartbeat = INT64_MAX | |
while true: | |
result = s3.write("zfs/agents/{}" % guid, | |
{"timestamp" : now(), | |
"hostname" : get_hostname(), | |
"addr" : get_ip_address(), | |
"lease_duration" : lease_duration | |
}).timeout(2 * heartbeat_interval) | |
if now() - last_heartbeat > lease_duration: | |
suspend_pools() // will also either exit or soft reset the agent | |
if result == SUCCESS: | |
last_heartbeat = now() | |
sleep(heartbeat_interval) | |
import_pool_claim_ownership(s3, guid, self_id): | |
pool_object = "zfs/{}/owner" % guid | |
self_object = "zfs/agents/{}" % self_id | |
result = s3.read(pool_object) | |
duration = result.duration | |
if result == SUCCESS: | |
result2 = s3.read(result.data.owner) | |
duration += result2.duration | |
if result2 == SUCCESS: | |
if result.data.owner == self_object: | |
return SUCCESS | |
sleep(result2.data.lease_duration * 2) | |
result3 = s3.read(result.data.owner) | |
if result3 == SUCCESS and result3.data.timestamp != result2.data.timestamp: | |
return FAILURE | |
result4 = s3.read(pool_object) | |
if result4 == SUCCESS and result4.data.owner != result.data.owner: | |
return FAILURE | |
duration = result4.duration | |
if duration > 2 seconds: | |
return RETRY | |
result5 = s3.write(pool_object, {"owner" : self_object).timeout(2 seconds - duration) | |
/* | |
* If the write takes too long here, we could race with another system importing | |
* the pool who is already sleeping below. To avoid that, we time out the write | |
* after a reasonable window and retry the whole process. | |
*/ | |
if result5 == TIMEOUT: | |
return RETRY | |
/* | |
* The time is flexible here, just needs to be long enough that we can be pretty | |
* confident any other contender for the pool would have had time to finish issuing | |
* a GET and PUT request. | |
*/ | |
sleep(4 seconds) | |
result6 = s3.read(pool_object) | |
if result6 == FAILURE or result6.data.owner != self_object: | |
return FAILURE | |
return SUCCESS |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment