Last active
April 26, 2022 14:30
-
-
Save Phyks/4e4c65fcd12d600374a7 to your computer and use it in GitHub Desktop.
Mirror a distant FTP using Python and lftp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Simple Python wrapper around lftp to mirror a remote ftp onto a local folder. | |
New files are downloaded, deleted files are marked for removal in a TO_DELETE.sh | |
script. | |
""" | |
import os | |
import pprint | |
import re | |
import subprocess | |
import sys | |
import urllib.parse | |
def get_diff(local_dir, remote_dir, host, user, password): | |
""" | |
Get a diff between the local copy and the remote server. | |
Params: | |
- local_dir is the path to the local copy. | |
- remote_dir is the path to the folder to replicate on the distant ftp. | |
- host is the ftp host. | |
- user is the user to authenticate with. | |
- password is the associated password. | |
Returns a diff ie a list of lftp commands. | |
""" | |
parts_per_file = 10 # Number of parts per file | |
parallel = 2 # Number of parallel transfers | |
# The --dry-run generates lftp commands that we parse into a diff list. | |
lftp_input = """ | |
open {host} | |
user {user} \"{password}\" | |
mirror --dry-run --delete -c --use-pget-n={parts_per_file} --parallel={parallel} {remote_dir} {local_dir} | |
bye""".format(host=host, user=user, password=password, | |
parts_per_file=parts_per_file, parallel=parallel, | |
remote_dir=remote_dir, local_dir=local_dir) | |
process = subprocess.run(["lftp", "-f" "/dev/stdin"], | |
input=lftp_input.encode("utf-8"), | |
stdout=subprocess.PIPE) | |
stdout = process.stdout.decode("utf-8").strip() | |
if stdout != "": | |
diff = stdout.split("\n") | |
else: | |
diff = [] | |
return diff | |
def apply_diff(diff): | |
""" | |
Apply a diff (list of lftp commands). | |
Params: | |
- diff is the list of lftp commands to apply. | |
""" | |
lftp_input = "set xfer:clobber on\n" | |
lftp_input += "\n".join(diff) | |
process = subprocess.run(["lftp", "-f" "/dev/stdin"], | |
input=lftp_input.encode("utf-8")) | |
def extract_rm_commands(diff): | |
""" | |
Extract removal commands (rm) from the diff. | |
Params: | |
- diff is the list of lftp commands to apply. | |
Returns a tuple (rm_commands, diff) where rm_commands is a list of rm | |
commands to apply, in sh format, and diff is a list of lftp commands to | |
apply without any removal command. | |
""" | |
rm_commands = [i for i in diff if i.startswith("rm")] | |
diff = [i for i in diff if i not in rm_commands] | |
# Replace file: URLs by quoted non-urlencoded paths | |
rm_commands = [re.sub(r"file:(.*)", | |
lambda x: "\"%s\"" % (urllib.parse.unquote(x.group(1))), | |
i) | |
for i in rm_commands] | |
return rm_commands, diff | |
def uniq_append(file, content): | |
""" | |
Append to a file, ensuring there are not any duplicate lines. | |
""" | |
# Ensure directory exists | |
if not os.path.isdir(os.path.dirname(file)): | |
os.mkdir(os.path.dirname(file)) | |
# Add \n to the commands in content, as diff is not \n-terminated. | |
content = [i + "\n" for i in content] | |
# Add to content all the previous content | |
if os.path.isfile(file): | |
with open(file, "r") as fh: | |
content.extend(fh.readlines()) | |
# Remove empty lines | |
content = [i for i in content if i.strip() != ""] | |
# Use a set to uniqify it | |
content = set(content) | |
# Trick to ensure the rm command associated to the TO_DELETE file is always | |
# at the bottom. We remove it from content. | |
try: | |
content.remove("rm %s\n" % (file,)) | |
except KeyError: | |
pass | |
# Write new content | |
with open(file, "w") as fh: | |
for line in content: | |
fh.write(line) | |
# And we ensure to write the rm command associated with the TO_DELETE | |
# file at the end. | |
fh.write("rm %s\n" % (file,)) | |
if __name__ == "__main__": | |
if len(sys.argv) < 6: | |
sys.exit("Usage: %s LOCAL_DIR REMOTE_DIR HOST USER PASS" % | |
(sys.argv[0],)) | |
local_dir = sys.argv[1] | |
remote_dir = sys.argv[2] | |
host = sys.argv[3] | |
user = sys.argv[4] | |
password = sys.argv[5] | |
print("Fetching diff…") | |
diff = get_diff(local_dir, remote_dir, | |
host, user, password) | |
print("Diff is:") | |
pprint.pprint(diff) | |
print() | |
# Save rm commands in a script | |
rm_commands, diff = extract_rm_commands(diff) | |
rm_script = os.path.normpath(local_dir + "/TO_DELETE.sh") | |
uniq_append(rm_script, rm_commands) | |
if len(diff) > 0: | |
print("Applying get commands in diff…") | |
apply_diff(diff) | |
else: | |
print("No new files to get.") | |
if len(rm_commands) > 0: | |
print("Rm commands in diff were saved to %s." % (rm_script,)) | |
print("Done!") |
lftp_process = subprocess.Popen(["lftp", "-u", auth, server], stdin=subprocess.PIPE, stdout=subprocess.PIPE)
stdout, stderr = lftp_process.communicate(input=lftp_input)
用这两句替换了 subprocess.run,可以用。
auth,server,lftp_input都是字符串,在其他地方定义的。
FYI: Code is written for python 3. Which shouldn't be a problem soon since python 2 is being deprecated..
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
subprocess.run 需要 python 3.0 以上,我现在的版本是 2.7。这个方法可以用其他方法替换掉吗?比如 subprocess.call