Skip to content

Instantly share code, notes, and snippets.

@Jwink3101
Created February 13, 2023 21:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jwink3101/8d3487639e0d4be5cf4424e6170edc9f to your computer and use it in GitHub Desktop.
Save Jwink3101/8d3487639e0d4be5cf4424e6170edc9f to your computer and use it in GitHub Desktop.
This is a WIP and incomplete way to use rclone programmatically to connect to cloud storage
{
"cells": [
{
"cell_type": "markdown",
"id": "372c730b-581d-459a-9973-eecd922d0cee",
"metadata": {},
"source": [
"# Rclone as API\n",
"\n",
"**PROOF OF CONCEPT**. Needs work! \n",
"**Last Update**: 2023-02-13\n",
"\n",
"This is just an example of using rclone via subprocess. It is far from fully fleshed out nor is it even remotely close to feature complete. It also doesn't have guard rails on things like logging\n",
"\n",
"It has some built in calls but, of course, you can do a low-level `rclone.call()` (witout the flags or executable set) on whatever you want.\n",
"\n",
"**TODO**: Capture failures. Especially around streaming uploads but really everywhere \n",
"**TODO**: Better documentation of paramaters"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7fff8df6-8e63-47cb-b77a-2fb2e88a9b4e",
"metadata": {},
"outputs": [],
"source": [
"import os, subprocess\n",
"import io\n",
"import tempfile\n",
"import json\n",
"import re"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ac9adef3-fc8f-4196-8c23-59a761503a09",
"metadata": {},
"outputs": [],
"source": [
"import logging\n",
"logger = logging.getLogger(__name__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e448badb-9dec-45fc-8673-310ba5e1fb25",
"metadata": {},
"outputs": [],
"source": [
"logging.basicConfig(level=logging.DEBUG)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "464211e0-4ecb-4be1-9ec4-85a2dde2df2b",
"metadata": {},
"outputs": [],
"source": [
"def RFC2date(datestr):\n",
" datestr = datestr.replace(':','')\n",
" # Python can only resolve microseconds.\n",
" if '.' in datestr:\n",
" dt,us_tz = datestr.split('.')\n",
" us,pm,tz = re.split(r'(\\+|-)',us_tz,maxsplit=1)\n",
" us = us.ljust(6,'0')[:6]\n",
" datestr = f'{dt}.{us}{pm}{tz}'\n",
" return datetime.datetime.strptime(datestr,'%Y-%m-%dT%H%M%S.%f%z')\n",
" return datetime.datetime.strptime(datestr,'%Y-%m-%dT%H%M%S%z')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b80cb06-2689-44bf-87cc-516e59ec57b1",
"metadata": {},
"outputs": [],
"source": [
"# This will REMOVE an envrionment variable. Can also do it by monkey-patching os.environ\n",
"DEL_SENTINEL = b'\\xb5\\x8bw\\xe7\\x16H\\xa7V' # os.urandom(8) \n",
"\n",
"class RCLONE:\n",
" def __init__(self,\n",
" remote,\n",
" *,\n",
" rclone_exe='rclone',\n",
" universal_flags=None,\n",
" universal_env=None):\n",
" self.remote = remote\n",
" self.rclone_exe = rclone_exe\n",
" self.uflags = universal_flags if universal_flags else []\n",
" \n",
" self.uenv = os.environ.copy()\n",
" if universal_env:\n",
" self.uenv.update(universal_env)\n",
" for key,val in universal_env.items():\n",
" if val == DEL_SENTINEL:\n",
" self.uenv.pop(key,None)\n",
" \n",
" def ls(self,*,subdir='',filters=(),flags=None):\n",
" \n",
" cmd = ['lsjson',self.remote + subdir,'--recursive']\n",
" for ff in filters:\n",
" cmd.append('--filter')\n",
" cmd.append(ff)\n",
" \n",
" if flags: \n",
" cmd.extend(flags)\n",
" # Long directories can be problematic so use a tempfile\n",
" # so as to not exhaust the buffer. We also want to handle this\n",
" # lazily since it could be long. \n",
" # This isn't perfect since we wait for the entire listing to finish then\n",
" # read line by line but it avoids accidentally deadlocking.\n",
" stdout,_ = self.call(cmd,buffer=False)\n",
" stdout.seek(0)\n",
" for line in stdout:\n",
" # lsjson returns one entry per line. And always UTF8\n",
" line = line.decode('utf8')\n",
" line = line.strip().rstrip(',').strip()\n",
" \n",
" if line == '[' or line == ']': # start or end line\n",
" continue\n",
" line = json.loads(line)\n",
" line.pop('Name',None) # Never understood why rclone gives us this. It's dumb\n",
" line['ModTime'] = RFC2date(line['ModTime'])\n",
" yield line\n",
" \n",
" def upload(self,local,destdir=''):\n",
" \"\"\"Upload a local file to the destdir\"\"\"\n",
" cmd = ['copy',\n",
" local,\n",
" self.remote + destdir,\n",
" '--no-traverse', # Single file. This is better\n",
" '--no-check-dest', # Always upload\n",
" ]\n",
" \n",
" self.call(cmd)\n",
" \n",
" def uploadto(self,local,destfile):\n",
" \"\"\"Upload a local file to the destfile\"\"\"\n",
" cmd = ['copyto',\n",
" local,\n",
" self.remote + destfile,\n",
" '--no-traverse', # Single file. This is better\n",
" '--no-check-dest', # Always upload\n",
" ]\n",
" \n",
" self.call(cmd)\n",
"\n",
" def stream_upload(self,destfile,data,size=None):\n",
" \"\"\"\n",
" Upload data (string or bytes) to a file. Note that more advanced useage\n",
" \"\"\"\n",
" if isinstance(data,str):\n",
" data = data.encode('utf8')\n",
" \n",
" if isinstance(data,bytes):\n",
" size = len(data)\n",
" data = io.BytesIO(data)\n",
" \n",
" \n",
" cmd = ['rcat',\n",
" self.remote + destfile,\n",
" '--no-traverse', # Single file. This is better\n",
" '--no-check-dest', # Always upload\n",
" ]\n",
" if size:\n",
" cmd.extend(['--size',str(size)])\n",
" \n",
" self.call(cmd,stdin=data)\n",
" \n",
" def download(self,remotefile,destdir):\n",
" \"\"\"downloas a remotefile file to the destdir\"\"\"\n",
" cmd = ['copy',\n",
" self.remote + remotefile,\n",
" destdir,\n",
" '--no-traverse', # Single file. This is better\n",
" '--no-check-dest', # Always upload\n",
" ]\n",
" \n",
" self.call(cmd)\n",
" \n",
" def downloadto(self,remotefile,destfile):\n",
" \"\"\"Upload a local file to the destfile\"\"\"\n",
" cmd = ['copyto',\n",
" self.remote + remotefile,\n",
" destfile,\n",
" '--no-traverse', # Single file. This is better\n",
" '--no-check-dest', # Always upload\n",
" ]\n",
" \n",
" self.call(cmd)\n",
" \n",
" def stream_download(self,remotefile,offset=None,count=None):\n",
" \"\"\"Stream the download. Returns the file-object (which may be a real file if buffer=False)\"\"\"\n",
" cmd = ['cat',self.remote + remotefile]\n",
" if offset:\n",
" cmd.extend(['--offset',str(offset)])\n",
" if count:\n",
" cmd.extend(['--count',str(count)])\n",
" proc = self.call(cmd,return_proc=True,buffer=True)\n",
" return proc.stdout.read()\n",
" \n",
" def delete(self,remotefile):\n",
" \"\"\"\n",
" Delete a remote path, potentially including directories.\n",
" \n",
" TODO: Add filters...\n",
" \"\"\"\n",
" cmd = ['delete',self.remote + remotefile,'--rmdirs']\n",
" self.call(cmd)\n",
" \n",
" def call(self,cmd,stdin=None,buffer=True,return_proc=False):\n",
" \"\"\"\n",
" Call rclone. If stdin is bytes, it is sent. If it is a file object, it is\n",
" read in 1kb chunks.\n",
" \n",
" If buffer is False, temporary file objects are returned instead of data\n",
" \"\"\"\n",
" \n",
" if buffer:\n",
" stdout=subprocess.PIPE\n",
" stderr=subprocess.PIPE\n",
" else:\n",
" stdout = tempfile.NamedTemporaryFile(delete=False)\n",
" stderr = tempfile.NamedTemporaryFile(delete=False) \n",
" \n",
" finalcmd = [self.rclone_exe] + self.uflags + cmd\n",
" \n",
" logger.debug('rclone call %s',str(finalcmd))\n",
" \n",
" proc = subprocess.Popen(\n",
" finalcmd,\n",
" stdout=stdout,\n",
" stderr=stderr,\n",
" stdin=subprocess.PIPE,\n",
" env=self.uenv)\n",
" \n",
" if hasattr(stdin,'read'):\n",
" while block := stdin.read(1024):\n",
" proc.stdin.write(block)\n",
" stdin = None\n",
" \n",
" if return_proc:\n",
" if not buffer:\n",
" proc.stdout,proc.stderr = stdout,stderr\n",
" return proc\n",
" \n",
" out,err = proc.communicate(stdin)\n",
" \n",
" if not buffer:\n",
" return stdout,stderr\n",
" \n",
" return out,err\n",
" "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b576ed55-a4e7-43b8-b606-042aabe16736",
"metadata": {},
"outputs": [],
"source": [
"# Demo. \n",
"rclone = RCLONE(\n",
" 'remote:',\n",
" universal_env={\n",
" \"RCLONE_CONFIG\": \"test.cfg\",\n",
" \"RCLONE_PASSWORD_COMMAND\": DEL_SENTINEL, # Demo but I also have this set\n",
" },\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7c00e212-6fed-4661-a421-90dfac03a3fb",
"metadata": {},
"outputs": [],
"source": [
"list(rclone.ls())\n",
"\n",
"# Upload files\n",
"rclone.upload('localfile') # to 'remote:localfile'\n",
"rclone.upload('localfile','sub/dir') # to remote:sub/dir/localfile. Will create if needed\n",
"rclone.uploadto('another/local/file','a/sub/dir/newname.ext') # to 'a/sub/dir/newname.ext'\n",
"\n",
"# Stream upload either text, bytes, or a file object\n",
"rclone.stream_upload('streamed.txt','This is some text')\n",
"rclone.stream_upload('streamed.bin',b'This is some bytes')\n",
"\n",
"# Streaming data. Use a fake example. Note that this does block until done\n",
"proc = subprocess.Popen(['python','-c',\"\"\"\\\n",
"import time\n",
"for i in range(10):\n",
" print(i,flush=True)\n",
" time.sleep(0.5)\n",
"\"\"\"],stdout=subprocess.PIPE)\n",
"rclone.stream_upload('streamed.out',proc.stdout)\n",
"\n",
"# Download\n",
"rclone.download('file.ext','local/dir/') # from remote:file.ext to local/dir/file.ext\n",
"rclone.downloadto('file.ext','newfile.ext') # from remote:file.ext to newfile.ext\n",
"\n",
"# Get a fileobject. Note that this IS NOT smart with the ability to read on the fly...\n",
"out = rclone.stream_download('file.ext')\n",
"\n",
"# ...But, you can specify offsets and mounts\n",
"out = rclone.stream_download('file.ext',offset=3,count=5)\n",
"\n",
"# Delete\n",
"rclone.delete('oldfile')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e09af9f7-3343-4671-a014-fbde2770981c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "a9e4ffd5-130b-46ae-841a-18784e30a3db",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment