Skip to content

Instantly share code, notes, and snippets.

@YieldNull
Created November 17, 2015 15:27
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save YieldNull/f45b4872889ab4deb072 to your computer and use it in GitHub Desktop.
Save YieldNull/f45b4872889ab4deb072 to your computer and use it in GitHub Desktop.
备份QQ空间说说

#功能 备份QQ空间说说(先用浏览器登陆获取Cookie)

#用法 先将Cookie及用户写入config.json中,格式如下

{
	"shuoshuo":"<shuoshuo_cookie>",
	"friends":"<friends_cookie>",
	"user":"<your_qq_number>",
	"host_user":"<the_owner_qq>"
}

然后

python shuoshuo.py config.json

若出现错误,可能是friends的cookie的问题,把函数set_config中设置friends_skey语句中的p_key改成skey试试

获取Cookie

获取"说说"Cookie

登录QZone,按F12打开浏览器调试窗口。点击进入说说(非个人中心)。在调试窗口Network面板中搜索taotao.qq.com,找到emotion_cgi_msglist_v6项。点击详情,在Request Headers中找到Cookie并记录之。

获取friends Cookie

程序首先获取好友列表。同上,在获取到“说说”的Cookie之后,继续在Network面板中搜索friends,找到包含friends的URL项,同理可获得Cookie

#说说解析

说说以json格式传到客户端。下面对其格式进行分类解析,也即解析msglist中数据格式

{
	"msglist":[
		{
			... //数据格式在下面分类解析
		},...
	]
}

##纯文本

{
	"content":"说说内容",
	"created_time":timestamp
}

content,created_time必然存在于每条说说。前者内容可以为空。下面所有类型将二者省略不写。

##图文

{
	"pic":[
		{
			"pic_id":"图片id", // 格式为"your_qq_number,*****,*****"
			"url2":"最大图链接"
		},...
	]
}

##转发

{
	"pic":[
		{
			"pic_id":"图片id", // 格式为",*****,*****"
			"url2":"最大图链接"
		},...
	],
	"rt_uin": 源作者QQ号,
	"rt_uinname":"源作者备注",
	"rt_createTime":"源说说创建时间" // 格式"YYYY年MM月DD日",
	"rt_con":{
		"content":"源作者所著内容"
	}
}

若转发纯文本,则pic项不存在

##评论 cmtnum=0则commentlist不存在

{
	"cmtnum":评论数,即原始评论数,不包含回复
	"commentlist":[
		{
			"content":"评论内容",
			"create_time":评论时间,
			"name":"评论者备注",
			"uin":评论者QQ号,
			"reply_num":在此评论下子评论(回复)数量,
			"list_3":[
				{
					"content":"评论(回复)内容",
					"create_time":评论时间,
					"name":"评论者备注",
					"uin":评论者QQ号,
				}...
			],
			"pictotal":图片总数
			"pic":[
				{
					"hd_url":"最大图片地址"
				}...
			]
		}...
	]
}

reply_num不为0,则在其同级目录下会有list_3reply_numlist_3中项目个数。此列表与上面的评论列表一样,是按评论的时间先后顺序排列的。

要是评论中有图片,在评论的同级目录下会有pic以及pictotal。只有顶级评论中才能加图片,不能在回复中添加图片。

二级评论(回复)中,content字段首部包含@{uin:qq,nick:nickname,who:1,auto:1}字段,用来表示回复谁。

#数据库模型

CREATE TABLE friends(
	qqnum       INTEGER PRIMARY KEY,
	name        TEXT
);

CREATE TABLE shuoshuo(
	timestamp       INTEGER,
	content         TEXT,
	rt_qqnum        INTEGER,
	rt_timestamp    INTEGER,
	rt_content      TEXT,
	FOREIGN KEY(rt_qqnum) REFERENCES friends(qqnum)
);

CREATE TABLE comments(
	timestamp       INTEGER,
	content         TEXT,
	qqnum           TEXT NOT NULL,
	to_shuoshuo     INTEGER,
	to_user         INTEGER,
	to_comment      INTEGER,
	FOREIGN KEY(to_shuoshuo) REFERENCES shuoshuo(timestamp),
	FOREIGN KEY(to_comment) REFERENCES comments(timestamp),
	FOREIGN KEY(to_user) REFERENCES friends(qqnum)
);

CREATE TABLE pictures(
	id				INTEGER PRIMARY KEY AUTOINCREMENT,
	url				TEXT NOT NULL,
	to_shuoshuo		INTEGER,
	to_comment    	INTEGER,
	FOREIGN KEY(to_shuoshuo) REFERENCES shuoshuo(timestamp),
	FOREIGN KEY(to_comment) REFERENCES comment(timestamp)
);

##查询说说 首先,程序会获取用户的好友列表。并将其QQ号、昵称、备注存于数据库(包括用户本身)。表shuoshuo中,content可以为空,因为可以发图片说说。rt开头的字段记录转发信息,也可以为空。

查询图片

由于评论中也可以出现图片,因此表pictures中也要有comment_id字段。查询说说中的图片时,假若已知说说的id为your_id,使用以下SQL语句:

SELECT * FROM pictures WHERE shuoshuo=your_id AND comment_id IS NULL;

假若要查询评论的图片,其id为your_comment_id,则使用下面的SQL语句进行查询:

SELECT * FROM pictures WHERE shuoshuo IS NULL AND comment_id=your_comment_id;

查询评论

查询说说的顶级评论:

SELECT * FROM comments WHERE to_shuoshuo=your_shuoshuo_id AND to_comment IS NULL;

查询顶级评论下的回复:

SELECT * FROM comments WHERE to_shuoshuo IS NULL AND to_comment=your_comment_id;

顶级评论中,to_user字段为空,在次级评论中,可以查询到to_user字段。评论可以以timestamp进行排序,或者以id进行排序,因为存储时,是按照时间顺序存进去的。

##Attention 本来是准备只备份自己的,于是就有了数据库中表friends,而且还设置了指向它的外键。

当要备份别人的说说时,可能会出现问题。但是我试了以下好像并没有完整性异常。。。。

有问题的话就把外键删了好了。

##示例

#!/usr/bin/env python
# coding:utf-8
"""
Backup qzone shuoshuo using user given local Cookie
Created on 16/10/2015
"""
import urllib
import urllib2
import gzip
import cStringIO
import math
import json
import codecs
import re
import time
import sys
import sqlite3
import os
import datetime
__author__ = 'hejunjie.net'
dbconn = None # databse connection
HOST_USER = None
HOST_USER_NAME = None
# common HTTP headers
BASE_HEADERS = [
('Accept', '*/*'),
('Accept-Encoding', 'gzip, deflate, sdch'),
('Accept-Language', 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4'),
('Connection', 'keep-alive'),
('User-Agent', 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko)'
'Ubuntu Chromium/45.0.2454.101 Chrome/45.0.2454.101 Safari/537.36'),
]
FRIENDS_URL = 'http://user.qzone.qq.com/p/r/cgi-bin/tfriend/friend_hat_get.cgi?' \
'hat_seed=1&uin=865013616&fupdate=1&g_tk=22222222'
FRIENDS_HEADERS = BASE_HEADERS + [
('Host', 'user.qzone.qq.com'),
# ('Referer', 'http://user.qzone.qq.com/865013616')
]
# URL of shuoshuo. hostUin:HOST_USER uin:HOST_USER
SHUOSHUO_URL = 'http://taotao.qq.com/cgi-bin/emotion_cgi_msglist_v6?' \
'uin=865013616&hostUin=865013616&inCharset=utf-8&outCharset=utf-8' \
'&notice=0&sort=0&pos=0&num=20' \
'&cgi_host=http%3A%2F%2Ftaotao.qq.com%2Fcgi-bin%2Femotion_cgi_msglist_v6' \
'&code_version=1&format=jsonp&need_private_comment=1&g_tk=222222'
# HTTP headers of shuoshuo
SHUOSHUO_HEADERS = BASE_HEADERS + [
('Host', 'taotao.qq.com'),
('Referer', 'http://ctc.qzs.qq.com/qzone/app/mood_v6/html/index.html'),
#('Cookie', '')
]
def error(msg):
print msg
sys.exit(0)
def set_config(conf):
global FRIENDS_URL, FRIENDS_HEADERS
global SHUOSHUO_URL, SHUOSHUO_HEADERS
global HOST_USER
# cookie config
user = conf['user'] # QQ of the user who is using this program
HOST_USER = conf['host_user'] # QQ of the owner of the web pages
FRIENDS_HEADERS.append(('Cookie', conf['friends']))
SHUOSHUO_HEADERS.append(('Cookie', conf['shuoshuo']))
FRIENDS_HEADERS.append(('Referer', 'http://user.qzone.qq.com/' + 'user'))
# g_tk config
shuo_skey = re.search(r'\bskey=(.*?)(;|$)', conf['shuoshuo'])
friends_skey = re.search(r'\bp_skey=(.*?);', conf['friends'])
if not shuo_skey:
error('Cookie str invalid. Please login in QZone and get valid Cookie')
shuo_gtk = calcu_gtk(shuo_skey.group(1))
friends_gtk = calcu_gtk(friends_skey.group(1))
FRIENDS_URL = re.sub(r'g_tk=\d+', 'g_tk=%d' % friends_gtk, FRIENDS_URL)
SHUOSHUO_URL = re.sub(r'g_tk=\d+', 'g_tk=%d' % shuo_gtk, SHUOSHUO_URL)
# user and host user config
SHUOSHUO_URL = re.sub(r'uin=\d+&hostUin=\d+',
'uin=%s&hostUin=%s' % (HOST_USER, HOST_USER),
SHUOSHUO_URL)
FRIENDS_URL = re.sub(r'uin=\d+', 'uin=%s' % user, FRIENDS_URL)
def create_db():
sql = """
CREATE TABLE friends(
qqnum INTEGER PRIMARY KEY,
name TEXT
);
CREATE TABLE shuoshuo(
timestamp INTEGER,
content TEXT,
rt_qqnum INTEGER,
rt_timestamp INTEGER,
rt_content TEXT,
FOREIGN KEY(rt_qqnum) REFERENCES friends(qqnum)
);
CREATE TABLE comments(
timestamp INTEGER,
content TEXT,
qqnum TEXT NOT NULL,
to_shuoshuo INTEGER,
to_user INTEGER,
to_comment INTEGER,
FOREIGN KEY(to_shuoshuo) REFERENCES shuoshuo(timestamp),
FOREIGN KEY(to_comment) REFERENCES comments(timestamp),
FOREIGN KEY(to_user) REFERENCES friends(qqnum)
);
CREATE TABLE pictures(
id INTEGER PRIMARY KEY AUTOINCREMENT,
url TEXT NOT NULL,
to_shuoshuo INTEGER,
to_comment INTEGER,
FOREIGN KEY(to_shuoshuo) REFERENCES shuoshuo(timestamp),
FOREIGN KEY(to_comment) REFERENCES comment(timestamp)
);
"""
cursor = dbconn.cursor()
cursor.executescript(sql)
dbconn.commit()
def calcu_gtk(skey):
"""
calculate 'g_tk' from 'skey' in cookie
"""
b, c = 5381, 0
while c < len(skey):
b += (b << 5) + ord(skey[c])
c += 1
return b & 2147483647
def do_http(url, headers, encoding='utf-8'):
"""
send http request to server using GET
:param headers: http headers
:param encoding: decode data from server using that encoding
:return: response from server as unicode
"""
opener = urllib2.build_opener()
opener.addheaders = headers
res = opener.open(url)
# un-gzip
unziped = gzip.GzipFile(fileobj=cStringIO.StringIO(res.read()))
content = unziped.read().decode(encoding)
opener.close()
return content
def decode_jsonp(jsonp):
"""
transform jsonp data ( as the form of "_Callback(.*);" ) to json
:return: transformed data
"""
jsonp = jsonp.strip()
return jsonp[10:len(jsonp) - 2]
def get_friends():
"""
get firiends and store in db
"""
print 'Getting friends list......'
data = do_http(FRIENDS_URL, FRIENDS_HEADERS)
data = json.loads(decode_jsonp(data))
cursor = dbconn.cursor()
print 'Storing firiends info......'
for key, value in data['data'].items():
try:
qq = int(key)
except ValueError:
continue
cursor.execute('INSERT INTO friends(qqnum,name) VALUES(?,?)',
(qq, value['realname']))
dbconn.commit()
def store_shuoshuo(data):
"""
store shuoshuo data to db
:param data: json data
"""
cursor = dbconn.cursor()
for root in data['msglist']:
# base
content = root['content']
timestamp = root['created_time']
# share
rt_qqnum = root.get('rt_uin')
rt_timestamp = root.get('rt_createTime')
rt_content = root.get('rt_con')
if rt_content:
rt_content = rt_content['content']
rt_timestamp = int(time.mktime(datetime.datetime.strptime(
rt_timestamp.encode('utf-8'),
'%Y年%m月%d日').timetuple()))
# I once got an IntegrityError which says that timestamp is duplicated
try:
cursor.execute('INSERT INTO shuoshuo('
'content,timestamp,rt_qqnum,rt_content,rt_timestamp) VALUES(?,?,?,?,?)',
(content, timestamp, rt_qqnum, rt_content, rt_timestamp))
except IntegrityError:
# error('Unkonwn error encountered. Pelase retry')
print '%d duplicated' % timestamp
continue
# pictures
pic = root.get('pic')
if pic:
for p in pic:
cursor.execute('INSERT INTO pictures('
'url,to_shuoshuo) VALUES(?,?)',
(p['url2'], timestamp))
# comments
comlist = root.get('commentlist')
if comlist:
for comment in comlist:
# comment content
c_content = comment['content']
c_timestamp = comment['create_time']
c_qqnum = comment['uin']
cursor.execute('INSERT INTO comments('
'timestamp,content,qqnum,to_shuoshuo) VALUES(?,?,?,?)',
(c_timestamp, c_content, c_qqnum, timestamp))
# comment pictures
c_pic = comment.get('pic')
if c_pic:
for p in c_pic:
cursor.execute('INSERT INTO pictures('
'url,to_comment) VALUES(?,?)',
(p['hd_url'], c_timestamp))
# comment to comment
reply = comment.get('list_3')
if reply:
for rep in reply:
r_content = rep['content']
r_timestamp = rep['create_time']
r_qqnum = rep['uin']
try:
r_user = re.search(
r'^@{uin:(\d+),.*?}', r_content).group(1)
r_content = re.sub(r'^@{.*?}', '', r_content)
except AttributeError:
r_user = HOST_USER
cursor.execute('INSERT INTO comments('
'timestamp,content,qqnum,to_comment,to_user)'
'VALUES(?,?,?,?,?)',
(r_timestamp, r_content,
r_qqnum, c_timestamp, r_user))
dbconn.commit()
def get_shuoshuo():
print 'Dowloading shuoshuo......'
i, per = 0, 40.0
msgsum = math.pow(2, 32)
while True: # `per` entries per query
print 'Sending HTTP request......'
url = re.sub(r'pos=\d+&num=\d+',
'pos=%d&num=%d' % (i * per, per), SHUOSHUO_URL)
res = do_http(url, SHUOSHUO_HEADERS)
res = decode_jsonp(res)
res = json.loads(res)
if i == 0: # get the amount at the first query
global HOST_USER_NAME
msgsum = res['usrinfo']['msgnum']
HOST_USER_NAME = res['usrinfo']['name']
i += 1
store_shuoshuo(res)
print 'Got %d/%d. Storing to database......' % (
i * per if i * per < msgsum else msgsum, msgsum)
if i * per >= msgsum:
break
def download_shuoshuo():
global dbconn
db = '%s.db' % HOST_USER
if os.path.exists(db):
os.remove(db)
dbconn = sqlite3.connect(db)
create_db()
get_friends()
get_shuoshuo()
re_db = '%s_%s.db' % (HOST_USER, HOST_USER_NAME)
os.rename(db, re_db)
print u'Work done! Data was stored in "%s"' % re_db
if __name__ == '__main__':
if len(sys.argv) != 2:
error('Usage: shuoshuo.py conf.json')
# load config
with open(sys.argv[1], 'r') as f:
conf = json.load(f)
set_config(conf)
download_shuoshuo()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment