Skip to content

Instantly share code, notes, and snippets.

@heyalexchoi
Last active April 3, 2020 03:25
Show Gist options
  • Save heyalexchoi/178852bcd618c68b40a4df9ae39dbf1b to your computer and use it in GitHub Desktop.
Save heyalexchoi/178852bcd618c68b40a4df9ae39dbf1b to your computer and use it in GitHub Desktop.
copy redis
# there are a few different ways to copy data from one redis instance to another
# from what i've read, you can set up your new instance as a slave to the old
# and let redis replication do its thing
# or, it seems you can copy the underlying dump data from one instance's file system to the other's
# however, sometimes you don't manage your own redis instances, or maybe you just don't want to bother
# with any of that. you could just copy the data.
# this script does that.
# there were some other scripts online already
# - https://github.com/jeremyfa/node-redis-dump
# - https://github.com/yaauie/redis-copy
# the first one ran out of memory. it's a reported issue. the owner says 🤷🏽‍♀️ sorry, find another tool.
# the second one was really slow.
# so, i wrote this one.
# it uses redis pipelines, dump, restore, and larger-than-default batch sizes to reduce network latency effects.
# it also sets a default ttl on each key it copies over. for my use case these are all good things but you can
# adjust the ttl and batch sizes pretty easily below.
# I run python 3.6
# 🙏🏽
import os
import redis
# copy data FROM source
source_redis_url = 'redis://user:pass@host:port'
# copy data TO destination
destination_redis_url = 'redis://user2:pass2@host2:port2'
# redis scan count default is 10.
# this variable is used for a couple related things
# 1. how many keys are requested from redis on each scan
# 2. how many commands are dispatched to redis in each pipeline execution
# generally speaking, i've found i can reduce significant network latency effects
# by batching things in this way. eg, pay the cost for a round trip across the network every
# N commands instead of every 1 command.
scan_count = 1000
# 72 hour expiration (ttl param is in ms). set this to 0 for no ttl expiration on your keys.
default_ttl = 60 * 60 * 72 * 1000
source_conn = redis.StrictRedis.from_url(source_redis_url, decode_responses=False)
destination_conn = redis.StrictRedis.from_url(destination_redis_url, decode_responses=False)
# pipelines dump calls to keys
# returns zipped list of keys and serialized dump results
# [[key1, serialized_result1], [key2, serialized_result2],...]
def dump_keys(conn, keys):
pipe = conn.pipeline()
for key in keys:
pipe.dump(key)
results = pipe.execute()
zipped_results = zip(keys, results)
return zipped_results
# pipelines restore calls
# takes in zipped list of keys and serialized dump values
# restores serialized value at key, with replacement, and a default ttl of 72 hours
def restore_dump_values(conn, zipped_results):
pipe = conn.pipeline()
for pair in zipped_results:
key = pair[0]
value = pair[1]
pipe.restore(name=key, ttl=default_ttl, value=value, replace=True)
result = pipe.execute()
return result
def dump_and_restore(source_conn, destination_conn, keys):
# dump all the keys
zipped_results = dump_keys(conn=source_conn, keys=keys)
# restore the serialized results
restore_result = restore_dump_values(conn=destination_conn, zipped_results=zipped_results)
return restore_result
total_run = 0
total_success = 0
keys = []
for key in source_conn.scan_iter(count=scan_count):
keys.append(key)
bucket_count = len(keys)
if bucket_count >= scan_count:
res = dump_and_restore(source_conn=source_conn, destination_conn=destination_conn, keys=keys)
total_run = total_run + len(res)
successes = [result for result in res if result == b'OK']
total_success += len(successes)
keys = []
print(f"total_run {total_run}")
print(f"total_success {total_success}")
res = dump_and_restore(source_conn=source_conn, destination_conn=destination_conn, keys=keys)
total_run = total_run + len(res)
successes = [result for result in res if result == b'OK']
total_success += len(successes)
print(f"total_run {total_run}")
print(f"total_success {total_success}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment