Skip to content

Instantly share code, notes, and snippets.

@tennox
Last active August 16, 2016 14:25
Show Gist options
  • Save tennox/55169f40ab75590eefd8a3f3ac82506f to your computer and use it in GitHub Desktop.
Save tennox/55169f40ab75590eefd8a3f3ac82506f to your computer and use it in GitHub Desktop.
Phpwcms Backend File Downloader
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This scripts downloads files from the phpwcms backend\n",
"\n",
"### Required data:"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# ENTER DATA HERE:\n",
"backend_links_file = 'wcms-files.json' # contains download links+paths of wcms backend, generated by a JS script\n",
"target_dir = 'wcms-files/' # target directory for downloads\n",
"\n",
"wcms_baseurl = 'http://www.your-site.org/'\n",
"cookies = {'PHPSESSID': 'XYZ'} # for Phpwcms backend"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Initialize"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import json\n",
"import codecs"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# small helper function to peek into variables\n",
"def glance(collection, n=3):\n",
" print(\"%d entries\" % len(collection))\n",
" if type(collection) == dict:\n",
" return list(collection.items())[:n]\n",
" elif type(collection) == list:\n",
" return collection[:n]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load Data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"332 entries\n"
]
},
{
"data": {
"text/plain": [
"[('include/inc_act/act_download.php?pl=1&dl=455',\n",
" ['Example file 1.pdf', 'PDF/']),\n",
" ('include/inc_act/act_download.php?pl=1&dl=48',\n",
" ['Example file 2.jpg', 'Images/Other/']),\n",
" ('include/inc_act/act_download.php?pl=1&dl=142',\n",
" ['DSC_0047_2.jpg', 'Images'])]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# generated by JS\n",
"with codecs.open(backend_links_file, encoding='utf-8') as f:\n",
" wcms_links = json.load(f)\n",
" \n",
"glance(wcms_links) # quick glance"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Download"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import os\n",
"import requests\n",
"\n",
"def dl(link, path, filename):\n",
" r = requests.get(wcms_baseurl + link, cookies=cookies, stream=True)\n",
" r.raise_for_status()\n",
" \n",
" os.makedirs(target_dir + path, exist_ok=True) # create directory if not existent\n",
" with open(target_dir + path + filename, 'wb') as fd:\n",
" for chunk in r.iter_content(1024):\n",
" fd.write(chunk)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false,
"scrolled": true
},
"outputs": [
],
"source": [
"done_count = 0\n",
"total_count = len(wcms_links)\n",
"for link, loc in wcms_links.items():\n",
" print('Downloading [%d / %d]: %s -> %s' % (done_count+1, total_count, link, loc[1] + loc[0]))\n",
" dl(link, loc[1], loc[0])\n",
" done_count += 1"
]
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [Root]",
"language": "python",
"name": "Python [Root]"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
# coding: utf-8
# This scripts downloads files from the phpwcms backend
#
# ### Required data:
# In[19]:
# ENTER DATA HERE:
backend_links_file = 'wcms-files.json' # contains download links+paths of wcms backend, generated by a JS script
target_dir = 'wcms-files/' # target directory for downloads
wcms_baseurl = 'http://www.your-site.org/'
cookies = {'PHPSESSID': 'XYZ'} # for Phpwcms backend
# # Initialize
# In[5]:
import json
import codecs
# In[3]:
# small helper function to peek into variables
def glance(collection, n=3):
print("%d entries" % len(collection))
if type(collection) == dict:
return list(collection.items())[:n]
elif type(collection) == list:
return collection[:n]
# # Load Data
# In[10]:
# generated by JS
with codecs.open(backend_links_file, encoding='utf-8') as f:
wcms_links = json.load(f)
glance(wcms_links) # quick glance
# # Download
# In[15]:
import os
import requests
def dl(link, path, filename):
r = requests.get(wcms_baseurl + link, cookies=cookies, stream=True)
r.raise_for_status()
os.makedirs(target_dir + path, exist_ok=True) # create directory if not existent
with open(target_dir + path + filename, 'wb') as fd:
for chunk in r.iter_content(1024):
fd.write(chunk)
# In[18]:
done_count = 0
total_count = len(wcms_links)
for link, loc in wcms_links.items():
print('Downloading [%d / %d]: %s -> %s' % (done_count+1, total_count, link, loc[1] + loc[0]))
dl(link, loc[1], loc[0])
done_count += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment