fffonion/dm123bot.py

## dm123bot.py
#coding=utf-8
__version__=1.4
#1.2 修正补全图片url时出现的bug
#1.3 htmlescape
#1.4 flash的BBCOD优化，调整大小；字体调整为雅黑，标题放大
import urllib2,re,win32clipboard as clipboard,time,win32con,os,sys
import random
import httplib2
import datetime
baseurl='http://www.dm123.cn'
def html2bbcode(str):
    #curdir=re.findall('(.+)/.+',cururl)[0]
    ignorelist=['<p.*?>','</p>','</embed>']
    #str=str.replace(' ','')
    for i in re.findall('<img.+src="(.+)".*?/>',str):
        #print i
        fullurl=lambda i: i.startswith('http') and i or baseurl+'/'+i
        str=re.sub('<img.*?'+i+'.*?>','[img]'+fullurl(i)+'[/img]',str)
    #embed object
    for i in re.findall('<embed.*src="(.*?)".*?>',str):
        str=re.sub('<embed.*?'+i+'.*?>','[flash w=720 h=405]'+i+'[/flash]',str)
    for j in ignorelist:
        str=re.sub(j,'',str)
    str=htmlescape(str)
    str=str.replace('<','[')
    str=str.replace('>',']')
    while str.find('\n\n')!=-1:
        str.replace('\n\n','\n')
    str=str.replace('[br/]','')
    str=str.replace('[br /]','')
    for i in ['STAFF','CAST','PV']:
        str=str.replace('【%s】'%i,'[h3]%s[/h3]'%i)
    return str
def add_format(str):
    colors=['#66ccff','Red','Orange','Indigo','Green','Yellow Green','Teal','Pink','Dark Olive','Dark Slate']
    replc_dict={'#title':'[size=4][color='+random.choice(colors)+'][b]','#/title':'[/b][/color][/size]','#desc':'','#/desc':''}
    str_new=('[font="Microsoft YaHei,微软雅黑;文泉驿微米黑"]'+str+'[/font]')
    for i in replc_dict:
        str_new=str_new.replace(i,replc_dict[i])
    return str_new

def htmlescape(str):
    def replc(match):
        #print match.group(0),match.group(1),match.group(2)
        dict={'amp':'&','nbsp':' ','quot':'"','lt':'<','gt':'>','copy':'?','reg':'?','ldquo':'“','rdquo':'”','mdash':'—','bull':'…','hellip':'‰'}
        if match.groups>2:
            if match.group(1)=='#':
                #print(match.group(2))
                if int(match.group(2)) in [12539,65381]:
                    return '.'
                return unichr(int(match.group(2)))
            else:
                return  dict.get(match.group(2),'?')
    htmlre=re.compile("&(#?)(\d{1,5}|\w{1,8}|[a-z]+);")
    return htmlre.sub(replc,str)

def setClipboard(str):
    clipboard.OpenClipboard()
    clipboard.EmptyClipboard()
    clipboard.SetClipboardData(win32con.CF_TEXT, str)
    clipboard.CloseClipboard()

def makeNum(num):
    #只支持两位数
    chn=['','一','二','三','四','五','六','七','八','九']
    str=''
    a=num/10
    b=num-a*10
    if a>0:
        if a>1:str=chn[a]
        str+='十'
    str+=chn[b]
    return str


if __name__=='__main__':
    reload(sys)
    sys.setdefaultencoding('gbk')
    ht=httplib2.Http()
    hd={'User-Agent': 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0','Connection': 'Keep-Alive','Accept-Encoding':'gzip'}
    yearmon=int(time.strftime('%Y%m',time.localtime(time.time())))
    #print -(-(yearmon%100)/4)
    season=(yearmon%100-1)/3
    seasonch=['春','夏','秋','冬'][season]
    mon=['4','7','10','1'][season]
    year=str(yearmon/100+(mon=='1' and 1 or 0))
    print('使用前先看readme嗯~【度娘网盘上有\n'+'-'*60)
    print('现在是'+str(yearmon/100)+'年'+str(yearmon%100)+'月 -> '+'下载'+year+'年'+seasonch+'季新番数据')
    mon=raw_input('要指定其他季度请在此输入(1,4,7,10)，按回车继续：') or mon
    mon="%.2d" % (int(mon))
    contenturl='http://www.dm123.cn/data/'+year+'/'+year+mon
    print('下载首页...')
    #try:
    resp,content=ht.request(contenturl,headers=hd)
    if int(resp['status'])>=400:
        raw_input('您太超前了，还木有'+mon+'月新番介绍，按回车退出……')
        os._exit(0)
    res=re.findall('#C3C3C3(.+)</table',content,re.DOTALL)[0]
    list=re.findall('tr >(.*?)</tr',res,re.DOTALL)
    dict=[]
    if os.path.exists('elem.txt'):
        ss=open('elem.txt','r').read().decode('utf-8').split('SEPSEPSEPSEP\n\n')
        for s in ss:
            elemdict={'name':'','time':'','url':'','thumb':'','desc':'','postid':''}
            if len(s.split(','))==5:
                elemdict['time'],elemdict['url'],elemdict['name'],elemdict['thumb'],elemdict['desc']=s.split(',')
                dict+=[elemdict]+[]
    else:
        for i in range(len(list)):
            #try:
            elemdict={'name':'','time':'','url':'','thumb':'','desc':'','postid':''}
            elemdict['time']='-'.join(re.findall('\d+', re.findall('2" >(.+)</td',list[i])[0]))
            elemdict['url'],elemdict['name'],thumb=re.findall('f="(.+)" target="_blank">(.+)</a>.+<img height="74" src="(.+)" width="20',list[i])[0]
            elemdict['thumb']=baseurl+thumb
            elemdict['desc']=html2bbcode(\
            re.findall('div id="nrzw">(.*?)</div',ht.request(elemdict['url'],headers=hd)[1],re.DOTALL)[0])
            elemdict['desc']=elemdict['desc']
            '''except Exception,e:
                #print('Skip '+elemdict['name']+' : '+e.reason)
            else:'''
            print('Get '+elemdict['name'])
            open('elem.txt','a').write(('%s,%s,%s,%s,%sSEPSEPSEPSEP\n\n'%(elemdict['time'],elemdict['url'],elemdict['name'],elemdict['thumb'],elemdict['desc'])).encode('utf-8'))
            dict+=[elemdict]+[]
    raw_input('将要生成各番组简介，全部完成后将生成索引页，按回车继续……')

    for i in range(len(dict)):
        #setClipboard(makeNum(i+1)+'、'+dict[i]['name']+'\n'+dict[i]['desc'])
        s=add_format('#title%s#/title\n#desc%s#/desc'%(dict[i]['name'],dict[i]['desc']))
        try:
            s=s.decode('gb2312','ignore').encode('gb2312')
            setClipboard(htmlescape(s))
        except:
            print('encoding error, see temp.txt')
        #print htmlescape(s)
        open('temp.txt','w').write(s.encode('utf-8').replace('\r\n','\n'))
        print(dict[i]['name']+' 简介已复制到剪贴板')
        dict[i]['postid']=raw_input('输入其楼层pid,回车自动+1：') or str(int(dict[i-1]['postid'])+1)
    print('\n'+'-'*60+'\n')
    baseid=raw_input('输入帖子id：')
    indexstr=''
    alldesc=''
    if not os.path.exists('_indexes'):
        os.mkdir('_indexes')
    for p in os.listdir('_indexes'):
        open(os.path.join('_indexes', p), 'w').close()
    for i in range(len(dict)):
        #http://www.kmgtp.org/forums.php?action=viewtopic&topicid=21427&page=p268050#pid268050
        indexstr='[url=http://www.kmgtp.org/forums.php?action=viewtopic&topicid='+baseid+\
        '&page=p'+dict[i]['postid']+'#pid'+dict[i]['postid']+'][img]'\
        +dict[i]['thumb']+'[/img]\n[b]'+dict[i]['name']+'[/b][/url]\n'
        try:
            open('_indexes/_%s.txt' %
                datetime.datetime.strptime(dict[i]['time'], '%Y-%m-%d').strftime('%w')
                ,'a').write(indexstr)
        except:
            open('_indexes/_undefined.txt' ,'a').write(indexstr)
        #alldesc+=dict[i]['desc']
    #
    #setClipboard(add_format(htmlescape(indexstr)))
    #open('index.txt','w').write(indexstr)
    #setClipboard(alldesc)
    print('索引已复制到剪贴板.')
    raw_input('木有了，按回车退出……')

## three_span.py
#coding:gbk
import win32clipboard as clipboard
import re
def setClipboard(str):
    clipboard.OpenClipboard()
    clipboard.EmptyClipboard()
    clipboard.SetClipboardData(win32con.CF_TEXT, str)
    clipboard.CloseClipboard()

open('span.txt','w').close()
fname_list = ['0', '1', '2', '3', '4', '5', '6', 'undefined']
weekday_jp = ['日', '月', '火', '水', '木', '金', '土', '未定']
weekday_cn = ['日', '一', '二', '三', '四', '五' ,'六', '？']
weekday_color = ['Red', 'Orange', 'Yellow Green', 'Medium', 'Purple', 'Navy', 'Gray', 'Black']
for idx in range(8):
    nv,nan,yiban=[],[],[]
    inp=''
    open('span.txt','a').write('[font="Microsoft YaHei,微软雅黑;文泉驿微米黑"]'
        '[size=4][color=%s][b]%s[/b][/color][/size] 星期%s[/font]\n' % (
            weekday_color[idx],
            weekday_jp[idx],
            weekday_cn[idx]))
    total=open('_indexes/_%s.txt' % fname_list[idx]).read().split('[/url]')
    i=0
    for t in total:
        t+='[/url]'
        #try:
        #    inp=raw_input(re.findall('\[b\](.*?)\[\/b\]',t)[0]+' > ')
        #except IndexError:
        #    break
        if i==0 :
            nan.append(t.strip('\n'))
        if i==1:
            nv.append(t.strip('\n'))
        if i==2:
            yiban.append(t.strip('\n'))
        i+=1
        i=i%3
    p1,p2,p3=0,0,0
    while p1<len(nan) or p2<len(nv) or p3<len(yiban):
        s1,s2,s3='','',''
        if p1<len(nan):
            s1=nan[p1]
            p1+=1
        if p2<len(nv):
            s2=nv[p2]
            p2+=1
        if p3<len(yiban):
            s3=yiban[p3]
            p3+=1
        if s1 and s1 != '[/url]':
            s2 = (s2 and s2 != '[/url]') and s2 or '[img]http://ww2.sinaimg.cn/mw600/436919cbjw1e8tbx9n07nj205k022aa6.jpg[/img]'
            s3 = (s3 and s3 != '[/url]') and s3 or '[img]http://ww2.sinaimg.cn/mw600/436919cbjw1e8tbx9n07nj205k022aa6.jpg[/img]'
        single='''[span style="width:90%; margin:0 auto; overflow:auto; _display:inline-block;"][span style="width: 250px; float: left;"]\n'''+s1+'''[/span][span style="width: 260px; float: left;"]
    '''+s2+'''[/span][span]\n'''+s3+'''[/span][/span]\n'''
        #print single
        open('span.txt','a').write(single)
    open('span.txt','a').write('\n')
	#coding=utf-8
	__version__=1.4
	#1.2 修正补全图片url时出现的bug
	#1.3 htmlescape
	#1.4 flash的BBCOD优化，调整大小；字体调整为雅黑，标题放大
	import urllib2,re,win32clipboard as clipboard,time,win32con,os,sys
	import random
	import httplib2
	import datetime
	baseurl='http://www.dm123.cn'
	def html2bbcode(str):
	#curdir=re.findall('(.+)/.+',cururl)[0]
	ignorelist=['<p.*?>','</p>','</embed>']
	#str=str.replace(' ','')
	for i in re.findall('<img.+src="(.+)".*?/>',str):
	#print i
	fullurl=lambda i: i.startswith('http') and i or baseurl+'/'+i
	str=re.sub('<img.?'+i+'.?>','[img]'+fullurl(i)+'[/img]',str)
	#embed object
	for i in re.findall('<embed.src="(.?)".*?>',str):
	str=re.sub('<embed.?'+i+'.?>','[flash w=720 h=405]'+i+'[/flash]',str)
	for j in ignorelist:
	str=re.sub(j,'',str)
	str=htmlescape(str)
	str=str.replace('<','[')
	str=str.replace('>',']')
	while str.find('\n\n')!=-1:
	str.replace('\n\n','\n')
	str=str.replace('[br/]','')
	str=str.replace('[br /]','')
	for i in ['STAFF','CAST','PV']:
	str=str.replace('【%s】'%i,'[h3]%s[/h3]'%i)
	return str
	def add_format(str):
	colors=['#66ccff','Red','Orange','Indigo','Green','Yellow Green','Teal','Pink','Dark Olive','Dark Slate']
	replc_dict={'#title':'[size=4][color='+random.choice(colors)+'][b]','#/title':'[/b][/color][/size]','#desc':'','#/desc':''}
	str_new=('[font="Microsoft YaHei,微软雅黑;文泉驿微米黑"]'+str+'[/font]')
	for i in replc_dict:
	str_new=str_new.replace(i,replc_dict[i])
	return str_new

	def htmlescape(str):
	def replc(match):
	#print match.group(0),match.group(1),match.group(2)
	dict={'amp':'&','nbsp':' ','quot':'"','lt':'<','gt':'>','copy':'?','reg':'?','ldquo':'“','rdquo':'”','mdash':'—','bull':'…','hellip':'‰'}
	if match.groups>2:
	if match.group(1)=='#':
	#print(match.group(2))
	if int(match.group(2)) in [12539,65381]:
	return '.'
	return unichr(int(match.group(2)))
	else:
	return dict.get(match.group(2),'?')
	htmlre=re.compile("&(#?)(\d{1,5}\|\w{1,8}\|[a-z]+);")
	return htmlre.sub(replc,str)

	def setClipboard(str):
	clipboard.OpenClipboard()
	clipboard.EmptyClipboard()
	clipboard.SetClipboardData(win32con.CF_TEXT, str)
	clipboard.CloseClipboard()

	def makeNum(num):
	#只支持两位数
	chn=['','一','二','三','四','五','六','七','八','九']
	str=''
	a=num/10
	b=num-a*10
	if a>0:
	if a>1:str=chn[a]
	str+='十'
	str+=chn[b]
	return str


	if __name__=='__main__':
	reload(sys)
	sys.setdefaultencoding('gbk')
	ht=httplib2.Http()
	hd={'User-Agent': 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0','Connection': 'Keep-Alive','Accept-Encoding':'gzip'}
	yearmon=int(time.strftime('%Y%m',time.localtime(time.time())))
	#print -(-(yearmon%100)/4)
	season=(yearmon%100-1)/3
	seasonch=['春','夏','秋','冬'][season]
	mon=['4','7','10','1'][season]
	year=str(yearmon/100+(mon=='1' and 1 or 0))
	print('使用前先看readme嗯~【度娘网盘上有\n'+'-'*60)
	print('现在是'+str(yearmon/100)+'年'+str(yearmon%100)+'月 -> '+'下载'+year+'年'+seasonch+'季新番数据')
	mon=raw_input('要指定其他季度请在此输入(1,4,7,10)，按回车继续：') or mon
	mon="%.2d" % (int(mon))
	contenturl='http://www.dm123.cn/data/'+year+'/'+year+mon
	print('下载首页...')
	#try:
	resp,content=ht.request(contenturl,headers=hd)
	if int(resp['status'])>=400:
	raw_input('您太超前了，还木有'+mon+'月新番介绍，按回车退出……')
	os._exit(0)
	res=re.findall('#C3C3C3(.+)</table',content,re.DOTALL)[0]
	list=re.findall('tr >(.*?)</tr',res,re.DOTALL)
	dict=[]
	if os.path.exists('elem.txt'):
	ss=open('elem.txt','r').read().decode('utf-8').split('SEPSEPSEPSEP\n\n')
	for s in ss:
	elemdict={'name':'','time':'','url':'','thumb':'','desc':'','postid':''}
	if len(s.split(','))==5:
	elemdict['time'],elemdict['url'],elemdict['name'],elemdict['thumb'],elemdict['desc']=s.split(',')
	dict+=[elemdict]+[]
	else:
	for i in range(len(list)):
	#try:
	elemdict={'name':'','time':'','url':'','thumb':'','desc':'','postid':''}
	elemdict['time']='-'.join(re.findall('\d+', re.findall('2" >(.+)</td',list[i])[0]))
	elemdict['url'],elemdict['name'],thumb=re.findall('f="(.+)" target="_blank">(.+)</a>.+<img height="74" src="(.+)" width="20',list[i])[0]
	elemdict['thumb']=baseurl+thumb
	elemdict['desc']=html2bbcode(\
	re.findall('div id="nrzw">(.*?)</div',ht.request(elemdict['url'],headers=hd)[1],re.DOTALL)[0])
	elemdict['desc']=elemdict['desc']
	'''except Exception,e:
	#print('Skip '+elemdict['name']+' : '+e.reason)
	else:'''
	print('Get '+elemdict['name'])
	open('elem.txt','a').write(('%s,%s,%s,%s,%sSEPSEPSEPSEP\n\n'%(elemdict['time'],elemdict['url'],elemdict['name'],elemdict['thumb'],elemdict['desc'])).encode('utf-8'))
	dict+=[elemdict]+[]
	raw_input('将要生成各番组简介，全部完成后将生成索引页，按回车继续……')

	for i in range(len(dict)):
	#setClipboard(makeNum(i+1)+'、'+dict[i]['name']+'\n'+dict[i]['desc'])
	s=add_format('#title%s#/title\n#desc%s#/desc'%(dict[i]['name'],dict[i]['desc']))
	try:
	s=s.decode('gb2312','ignore').encode('gb2312')
	setClipboard(htmlescape(s))
	except:
	print('encoding error, see temp.txt')
	#print htmlescape(s)
	open('temp.txt','w').write(s.encode('utf-8').replace('\r\n','\n'))
	print(dict[i]['name']+' 简介已复制到剪贴板')
	dict[i]['postid']=raw_input('输入其楼层pid,回车自动+1：') or str(int(dict[i-1]['postid'])+1)
	print('\n'+'-'*60+'\n')
	baseid=raw_input('输入帖子id：')
	indexstr=''
	alldesc=''
	if not os.path.exists('_indexes'):
	os.mkdir('_indexes')
	for p in os.listdir('_indexes'):
	open(os.path.join('_indexes', p), 'w').close()
	for i in range(len(dict)):
	#http://www.kmgtp.org/forums.php?action=viewtopic&topicid=21427&page=p268050#pid268050
	indexstr='[url=http://www.kmgtp.org/forums.php?action=viewtopic&topicid='+baseid+\
	'&page=p'+dict[i]['postid']+'#pid'+dict[i]['postid']+'][img]'\
	+dict[i]['thumb']+'[/img]\n[b]'+dict[i]['name']+'[/b][/url]\n'
	try:
	open('_indexes/_%s.txt' %
	datetime.datetime.strptime(dict[i]['time'], '%Y-%m-%d').strftime('%w')
	,'a').write(indexstr)
	except:
	open('_indexes/_undefined.txt' ,'a').write(indexstr)
	#alldesc+=dict[i]['desc']
	#
	#setClipboard(add_format(htmlescape(indexstr)))
	#open('index.txt','w').write(indexstr)
	#setClipboard(alldesc)
	print('索引已复制到剪贴板.')
	raw_input('木有了，按回车退出……')
	#coding:gbk
	import win32clipboard as clipboard
	import re
	def setClipboard(str):
	clipboard.OpenClipboard()
	clipboard.EmptyClipboard()
	clipboard.SetClipboardData(win32con.CF_TEXT, str)
	clipboard.CloseClipboard()

	open('span.txt','w').close()
	fname_list = ['0', '1', '2', '3', '4', '5', '6', 'undefined']
	weekday_jp = ['日', '月', '火', '水', '木', '金', '土', '未定']
	weekday_cn = ['日', '一', '二', '三', '四', '五' ,'六', '？']
	weekday_color = ['Red', 'Orange', 'Yellow Green', 'Medium', 'Purple', 'Navy', 'Gray', 'Black']
	for idx in range(8):
	nv,nan,yiban=[],[],[]
	inp=''
	open('span.txt','a').write('[font="Microsoft YaHei,微软雅黑;文泉驿微米黑"]'
	'[size=4][color=%s][b]%s[/b][/color][/size] 星期%s[/font]\n' % (
	weekday_color[idx],
	weekday_jp[idx],
	weekday_cn[idx]))
	total=open('_indexes/_%s.txt' % fname_list[idx]).read().split('[/url]')
	i=0
	for t in total:
	t+='[/url]'
	#try:
	# inp=raw_input(re.findall('\[b\](.*?)\[\/b\]',t)[0]+' > ')
	#except IndexError:
	# break
	if i==0 :
	nan.append(t.strip('\n'))
	if i==1:
	nv.append(t.strip('\n'))
	if i==2:
	yiban.append(t.strip('\n'))
	i+=1
	i=i%3
	p1,p2,p3=0,0,0
	while p1<len(nan) or p2<len(nv) or p3<len(yiban):
	s1,s2,s3='','',''
	if p1<len(nan):
	s1=nan[p1]
	p1+=1
	if p2<len(nv):
	s2=nv[p2]
	p2+=1
	if p3<len(yiban):
	s3=yiban[p3]
	p3+=1
	if s1 and s1 != '[/url]':
	s2 = (s2 and s2 != '[/url]') and s2 or '[img]http://ww2.sinaimg.cn/mw600/436919cbjw1e8tbx9n07nj205k022aa6.jpg[/img]'
	s3 = (s3 and s3 != '[/url]') and s3 or '[img]http://ww2.sinaimg.cn/mw600/436919cbjw1e8tbx9n07nj205k022aa6.jpg[/img]'
	single='''[span style="width:90%; margin:0 auto; overflow:auto; _display:inline-block;"][span style="width: 250px; float: left;"]\n'''+s1+'''[/span][span style="width: 260px; float: left;"]
	'''+s2+'''[/span][span]\n'''+s3+'''[/span][/span]\n'''
	#print single
	open('span.txt','a').write(single)
	open('span.txt','a').write('\n')