Skip to content

Instantly share code, notes, and snippets.

@wjch
Created May 20, 2019 09:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wjch/82ddafac5b8ebaffa342dbe23f95e5b6 to your computer and use it in GitHub Desktop.
Save wjch/82ddafac5b8ebaffa342dbe23f95e5b6 to your computer and use it in GitHub Desktop.
将wordpress文章中的新浪图片下载到本地,并替换
#-*- coding=utf-8 -*-
"""
作者:Abbey
博客:www.abbeyok.com
脚本简介:脚本通过查找wordpress文章中的新浪图床图片,自动下载到本地目录,并替换文章中的链接。
脚本使用说明:
1. 安装好Python2.7。如果是linux系统,应该可以免去该步骤
2. 安装依赖包:
pip install requests
pip install pymysql
3. 将脚本放到wordpress目录下。比如我的是:/www/wwwroot/www.abbeyok.com
4. 运行:
python wordpress_sina_image_replace.py
"""
import os
import requests
import pymysql
import sys
import re
import datetime
import subprocess
import time
_theme={
'warn':'\033[1;37;44m',
'end':'\033[0m'
}
if not os.path.exists('./wp-config.php'):
print(_theme['warn']+'请将脚本放到wordpress源码同目录下!'+_theme['end'])
sys.exit(0)
class WPRP(object):
"""docstring for WPRP"""
config_path='./wp-config.php'
download_path_base='./wp-content/uploads/'
def __init__(self):
self.db_user,self.db_pass,self.db_host,self.db_database,self.db_charset=self.read_config()
now=datetime.datetime.now()
self.download_path=self.download_path_base+str(now.year)
month=str(now.month)
if len(month)==1:
month='0'+month
self.download_path=self.download_path+'/'+month
self.db=pymysql.connect(host=self.db_host,user=self.db_user,password=self.db_pass,db=self.db_database,charset=self.db_charset)
self.cursor=self.db.cursor()
self.backup_db()
def backup_db(self):
cmd="mysqldump -u{} -p{} {} > backup.sql".format(self.db_user,self.db_pass,self.db_database)
sp=subprocess.Popen(cmd,shell=True)
t=time.time()
ct=0
while sp.poll():
print('backup databases...{}s'.format(ct))
ct+=1
time.sleep(1)
print(_theme['warn']+'数据库备份好啦!'+_theme['end'])
def read_config(self):
with open(self.config_path,'r') as f:
config_content=f.read()
db_user=re.findall("define\('DB_USER', '(.*?)'\);",config_content)[0]
db_pass=re.findall("define\('DB_PASSWORD', '(.*?)'\);",config_content)[0]
db_host=re.findall("define\('DB_HOST', '(.*?)'\);",config_content)[0]
db_database=re.findall("define\('DB_NAME', '(.*?)'\);",config_content)[0]
db_charset=re.findall("define\('DB_CHARSET', '(.*?)'\);",config_content)[0]
return (db_user,db_pass,db_host,db_database,db_charset)
def get_posts(self):
sql='select ID,post_content from wp_posts where post_type="post" and post_status="publish";'
self.cursor.execute(sql)
datas=self.cursor.fetchall()
return datas
def replace_post(self,id,content):
sizes=['mw690','mw1024','mw2048']
sina_pics=re.findall('src="(http[s]*?://w[wxs]\d.sinaimg.cn.*?)"',content)
if len(sina_pics)>0:
for pic in sina_pics:
# print 'replace post_id:{},pic:{}'.format(id,pic)
old_pic=pic
for size in sizes:
s='/'+size+'/'
if s in pic:
pic=pic.replace(s,'/large/')
pic=self.get_valid_pic(pic)
pic_name=os.path.basename(pic)
pic_download_path=self.download_path+'/'+pic_name
new_image_url=pic_download_path.replace('.','',1)
print('replace post_id:{},pic:{},local path:{}, new image url:{}'.format(id,pic,pic_download_path,new_image_url))
self.download_pic(pic,pic_download_path)
content=content.replace(old_pic,new_image_url)
sql='update wp_posts set post_content=%s where ID=%s'
self.cursor.execute(sql,(content,id))
self.db.commit()
def download_pic(self,image_url,download_path):
r = requests.get(image_url, stream=True)
with open(download_path, "wb") as f:
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
def test_pic(self,image_url):
r=requests.get(image_url)
if r.status_code==200:
return True
else:
return False
def get_valid_pic(self,image_url):
heads=['wx'+str(i) for i in range(1,5)]+['ws'+str(i) for i in range(1,5)]+['ww'+str(i) for i in range(1,5)]
while 1:
if not self.test_pic(image_url):
print image_url
old_head=re.findall('//(\w\d+).sinaimg',image_url)[0]
heads.remove(old_head)
head=heads[0]
image_url=image_url.replace(old_head,head,1)
else:
break
return image_url
if __name__=='__main__':
wp=WPRP()
datas=wp.get_posts()
for data in datas:
wp.replace_post(data[0],data[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment