Created
May 20, 2019 09:54
-
-
Save wjch/82ddafac5b8ebaffa342dbe23f95e5b6 to your computer and use it in GitHub Desktop.
将wordpress文章中的新浪图片下载到本地,并替换
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#-*- coding=utf-8 -*- | |
""" | |
作者:Abbey | |
博客:www.abbeyok.com | |
脚本简介:脚本通过查找wordpress文章中的新浪图床图片,自动下载到本地目录,并替换文章中的链接。 | |
脚本使用说明: | |
1. 安装好Python2.7。如果是linux系统,应该可以免去该步骤 | |
2. 安装依赖包: | |
pip install requests | |
pip install pymysql | |
3. 将脚本放到wordpress目录下。比如我的是:/www/wwwroot/www.abbeyok.com | |
4. 运行: | |
python wordpress_sina_image_replace.py | |
""" | |
import os | |
import requests | |
import pymysql | |
import sys | |
import re | |
import datetime | |
import subprocess | |
import time | |
_theme={ | |
'warn':'\033[1;37;44m', | |
'end':'\033[0m' | |
} | |
if not os.path.exists('./wp-config.php'): | |
print(_theme['warn']+'请将脚本放到wordpress源码同目录下!'+_theme['end']) | |
sys.exit(0) | |
class WPRP(object): | |
"""docstring for WPRP""" | |
config_path='./wp-config.php' | |
download_path_base='./wp-content/uploads/' | |
def __init__(self): | |
self.db_user,self.db_pass,self.db_host,self.db_database,self.db_charset=self.read_config() | |
now=datetime.datetime.now() | |
self.download_path=self.download_path_base+str(now.year) | |
month=str(now.month) | |
if len(month)==1: | |
month='0'+month | |
self.download_path=self.download_path+'/'+month | |
self.db=pymysql.connect(host=self.db_host,user=self.db_user,password=self.db_pass,db=self.db_database,charset=self.db_charset) | |
self.cursor=self.db.cursor() | |
self.backup_db() | |
def backup_db(self): | |
cmd="mysqldump -u{} -p{} {} > backup.sql".format(self.db_user,self.db_pass,self.db_database) | |
sp=subprocess.Popen(cmd,shell=True) | |
t=time.time() | |
ct=0 | |
while sp.poll(): | |
print('backup databases...{}s'.format(ct)) | |
ct+=1 | |
time.sleep(1) | |
print(_theme['warn']+'数据库备份好啦!'+_theme['end']) | |
def read_config(self): | |
with open(self.config_path,'r') as f: | |
config_content=f.read() | |
db_user=re.findall("define\('DB_USER', '(.*?)'\);",config_content)[0] | |
db_pass=re.findall("define\('DB_PASSWORD', '(.*?)'\);",config_content)[0] | |
db_host=re.findall("define\('DB_HOST', '(.*?)'\);",config_content)[0] | |
db_database=re.findall("define\('DB_NAME', '(.*?)'\);",config_content)[0] | |
db_charset=re.findall("define\('DB_CHARSET', '(.*?)'\);",config_content)[0] | |
return (db_user,db_pass,db_host,db_database,db_charset) | |
def get_posts(self): | |
sql='select ID,post_content from wp_posts where post_type="post" and post_status="publish";' | |
self.cursor.execute(sql) | |
datas=self.cursor.fetchall() | |
return datas | |
def replace_post(self,id,content): | |
sizes=['mw690','mw1024','mw2048'] | |
sina_pics=re.findall('src="(http[s]*?://w[wxs]\d.sinaimg.cn.*?)"',content) | |
if len(sina_pics)>0: | |
for pic in sina_pics: | |
# print 'replace post_id:{},pic:{}'.format(id,pic) | |
old_pic=pic | |
for size in sizes: | |
s='/'+size+'/' | |
if s in pic: | |
pic=pic.replace(s,'/large/') | |
pic=self.get_valid_pic(pic) | |
pic_name=os.path.basename(pic) | |
pic_download_path=self.download_path+'/'+pic_name | |
new_image_url=pic_download_path.replace('.','',1) | |
print('replace post_id:{},pic:{},local path:{}, new image url:{}'.format(id,pic,pic_download_path,new_image_url)) | |
self.download_pic(pic,pic_download_path) | |
content=content.replace(old_pic,new_image_url) | |
sql='update wp_posts set post_content=%s where ID=%s' | |
self.cursor.execute(sql,(content,id)) | |
self.db.commit() | |
def download_pic(self,image_url,download_path): | |
r = requests.get(image_url, stream=True) | |
with open(download_path, "wb") as f: | |
for chunk in r.iter_content(chunk_size=512): | |
if chunk: | |
f.write(chunk) | |
def test_pic(self,image_url): | |
r=requests.get(image_url) | |
if r.status_code==200: | |
return True | |
else: | |
return False | |
def get_valid_pic(self,image_url): | |
heads=['wx'+str(i) for i in range(1,5)]+['ws'+str(i) for i in range(1,5)]+['ww'+str(i) for i in range(1,5)] | |
while 1: | |
if not self.test_pic(image_url): | |
print image_url | |
old_head=re.findall('//(\w\d+).sinaimg',image_url)[0] | |
heads.remove(old_head) | |
head=heads[0] | |
image_url=image_url.replace(old_head,head,1) | |
else: | |
break | |
return image_url | |
if __name__=='__main__': | |
wp=WPRP() | |
datas=wp.get_posts() | |
for data in datas: | |
wp.replace_post(data[0],data[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment