Skip to content

Instantly share code, notes, and snippets.

@fffonion
Last active December 23, 2015 14:39
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save fffonion/6650539 to your computer and use it in GitHub Desktop.
Save fffonion/6650539 to your computer and use it in GitHub Desktop.
a vely vely ugly scrlipt for converting dm123 xinfan jieshao to BBCode
#coding=utf-8
__version__=1.4
#1.2 修正补全图片url时出现的bug
#1.3 htmlescape
#1.4 flash的BBCOD优化,调整大小;字体调整为雅黑,标题放大
import urllib2,re,win32clipboard as clipboard,time,win32con,os,sys
import random
import httplib2
import datetime
baseurl='http://www.dm123.cn'
def html2bbcode(str):
#curdir=re.findall('(.+)/.+',cururl)[0]
ignorelist=['<p.*?>','</p>','</embed>']
#str=str.replace(' ','')
for i in re.findall('<img.+src="(.+)".*?/>',str):
#print i
fullurl=lambda i: i.startswith('http') and i or baseurl+'/'+i
str=re.sub('<img.*?'+i+'.*?>','[img]'+fullurl(i)+'[/img]',str)
#embed object
for i in re.findall('<embed.*src="(.*?)".*?>',str):
str=re.sub('<embed.*?'+i+'.*?>','[flash w=720 h=405]'+i+'[/flash]',str)
for j in ignorelist:
str=re.sub(j,'',str)
str=htmlescape(str)
str=str.replace('<','[')
str=str.replace('>',']')
while str.find('\n\n')!=-1:
str.replace('\n\n','\n')
str=str.replace('[br/]','')
str=str.replace('[br /]','')
for i in ['STAFF','CAST','PV']:
str=str.replace('【%s】'%i,'[h3]%s[/h3]'%i)
return str
def add_format(str):
colors=['#66ccff','Red','Orange','Indigo','Green','Yellow Green','Teal','Pink','Dark Olive','Dark Slate']
replc_dict={'#title':'[size=4][color='+random.choice(colors)+'][b]','#/title':'[/b][/color][/size]','#desc':'','#/desc':''}
str_new=('[font="Microsoft YaHei,微软雅黑;文泉驿微米黑"]'+str+'[/font]')
for i in replc_dict:
str_new=str_new.replace(i,replc_dict[i])
return str_new
def htmlescape(str):
def replc(match):
#print match.group(0),match.group(1),match.group(2)
dict={'amp':'&','nbsp':' ','quot':'"','lt':'<','gt':'>','copy':'?','reg':'?','ldquo':'“','rdquo':'”','mdash':'—','bull':'…','hellip':'‰'}
if match.groups>2:
if match.group(1)=='#':
#print(match.group(2))
if int(match.group(2)) in [12539,65381]:
return '.'
return unichr(int(match.group(2)))
else:
return dict.get(match.group(2),'?')
htmlre=re.compile("&(#?)(\d{1,5}|\w{1,8}|[a-z]+);")
return htmlre.sub(replc,str)
def setClipboard(str):
clipboard.OpenClipboard()
clipboard.EmptyClipboard()
clipboard.SetClipboardData(win32con.CF_TEXT, str)
clipboard.CloseClipboard()
def makeNum(num):
#只支持两位数
chn=['','一','二','三','四','五','六','七','八','九']
str=''
a=num/10
b=num-a*10
if a>0:
if a>1:str=chn[a]
str+='十'
str+=chn[b]
return str
if __name__=='__main__':
reload(sys)
sys.setdefaultencoding('gbk')
ht=httplib2.Http()
hd={'User-Agent': 'Mozilla/5.0 (Windows NT 6.0; WOW64; rv:5.0) Gecko/20100101 Firefox/5.0','Connection': 'Keep-Alive','Accept-Encoding':'gzip'}
yearmon=int(time.strftime('%Y%m',time.localtime(time.time())))
#print -(-(yearmon%100)/4)
season=(yearmon%100-1)/3
seasonch=['春','夏','秋','冬'][season]
mon=['4','7','10','1'][season]
year=str(yearmon/100+(mon=='1' and 1 or 0))
print('使用前先看readme嗯~【度娘网盘上有\n'+'-'*60)
print('现在是'+str(yearmon/100)+'年'+str(yearmon%100)+'月 -> '+'下载'+year+'年'+seasonch+'季新番数据')
mon=raw_input('要指定其他季度请在此输入(1,4,7,10),按回车继续:') or mon
mon="%.2d" % (int(mon))
contenturl='http://www.dm123.cn/data/'+year+'/'+year+mon
print('下载首页...')
#try:
resp,content=ht.request(contenturl,headers=hd)
if int(resp['status'])>=400:
raw_input('您太超前了,还木有'+mon+'月新番介绍,按回车退出……')
os._exit(0)
res=re.findall('#C3C3C3(.+)</table',content,re.DOTALL)[0]
list=re.findall('tr >(.*?)</tr',res,re.DOTALL)
dict=[]
if os.path.exists('elem.txt'):
ss=open('elem.txt','r').read().decode('utf-8').split('SEPSEPSEPSEP\n\n')
for s in ss:
elemdict={'name':'','time':'','url':'','thumb':'','desc':'','postid':''}
if len(s.split(','))==5:
elemdict['time'],elemdict['url'],elemdict['name'],elemdict['thumb'],elemdict['desc']=s.split(',')
dict+=[elemdict]+[]
else:
for i in range(len(list)):
#try:
elemdict={'name':'','time':'','url':'','thumb':'','desc':'','postid':''}
elemdict['time']='-'.join(re.findall('\d+', re.findall('2" >(.+)</td',list[i])[0]))
elemdict['url'],elemdict['name'],thumb=re.findall('f="(.+)" target="_blank">(.+)</a>.+<img height="74" src="(.+)" width="20',list[i])[0]
elemdict['thumb']=baseurl+thumb
elemdict['desc']=html2bbcode(\
re.findall('div id="nrzw">(.*?)</div',ht.request(elemdict['url'],headers=hd)[1],re.DOTALL)[0])
elemdict['desc']=elemdict['desc']
'''except Exception,e:
#print('Skip '+elemdict['name']+' : '+e.reason)
else:'''
print('Get '+elemdict['name'])
open('elem.txt','a').write(('%s,%s,%s,%s,%sSEPSEPSEPSEP\n\n'%(elemdict['time'],elemdict['url'],elemdict['name'],elemdict['thumb'],elemdict['desc'])).encode('utf-8'))
dict+=[elemdict]+[]
raw_input('将要生成各番组简介,全部完成后将生成索引页,按回车继续……')
for i in range(len(dict)):
#setClipboard(makeNum(i+1)+'、'+dict[i]['name']+'\n'+dict[i]['desc'])
s=add_format('#title%s#/title\n#desc%s#/desc'%(dict[i]['name'],dict[i]['desc']))
try:
s=s.decode('gb2312','ignore').encode('gb2312')
setClipboard(htmlescape(s))
except:
print('encoding error, see temp.txt')
#print htmlescape(s)
open('temp.txt','w').write(s.encode('utf-8').replace('\r\n','\n'))
print(dict[i]['name']+' 简介已复制到剪贴板')
dict[i]['postid']=raw_input('输入其楼层pid,回车自动+1:') or str(int(dict[i-1]['postid'])+1)
print('\n'+'-'*60+'\n')
baseid=raw_input('输入帖子id:')
indexstr=''
alldesc=''
if not os.path.exists('_indexes'):
os.mkdir('_indexes')
for p in os.listdir('_indexes'):
open(os.path.join('_indexes', p), 'w').close()
for i in range(len(dict)):
#http://www.kmgtp.org/forums.php?action=viewtopic&topicid=21427&page=p268050#pid268050
indexstr='[url=http://www.kmgtp.org/forums.php?action=viewtopic&topicid='+baseid+\
'&page=p'+dict[i]['postid']+'#pid'+dict[i]['postid']+'][img]'\
+dict[i]['thumb']+'[/img]\n[b]'+dict[i]['name']+'[/b][/url]\n'
try:
open('_indexes/_%s.txt' %
datetime.datetime.strptime(dict[i]['time'], '%Y-%m-%d').strftime('%w')
,'a').write(indexstr)
except:
open('_indexes/_undefined.txt' ,'a').write(indexstr)
#alldesc+=dict[i]['desc']
#
#setClipboard(add_format(htmlescape(indexstr)))
#open('index.txt','w').write(indexstr)
#setClipboard(alldesc)
print('索引已复制到剪贴板.')
raw_input('木有了,按回车退出……')
#coding:gbk
import win32clipboard as clipboard
import re
def setClipboard(str):
clipboard.OpenClipboard()
clipboard.EmptyClipboard()
clipboard.SetClipboardData(win32con.CF_TEXT, str)
clipboard.CloseClipboard()
open('span.txt','w').close()
fname_list = ['0', '1', '2', '3', '4', '5', '6', 'undefined']
weekday_jp = ['日', '月', '火', '水', '木', '金', '土', '未定']
weekday_cn = ['日', '一', '二', '三', '四', '五' ,'六', '?']
weekday_color = ['Red', 'Orange', 'Yellow Green', 'Medium', 'Purple', 'Navy', 'Gray', 'Black']
for idx in range(8):
nv,nan,yiban=[],[],[]
inp=''
open('span.txt','a').write('[font="Microsoft YaHei,微软雅黑;文泉驿微米黑"]'
'[size=4][color=%s][b]%s[/b][/color][/size] 星期%s[/font]\n' % (
weekday_color[idx],
weekday_jp[idx],
weekday_cn[idx]))
total=open('_indexes/_%s.txt' % fname_list[idx]).read().split('[/url]')
i=0
for t in total:
t+='[/url]'
#try:
# inp=raw_input(re.findall('\[b\](.*?)\[\/b\]',t)[0]+' > ')
#except IndexError:
# break
if i==0 :
nan.append(t.strip('\n'))
if i==1:
nv.append(t.strip('\n'))
if i==2:
yiban.append(t.strip('\n'))
i+=1
i=i%3
p1,p2,p3=0,0,0
while p1<len(nan) or p2<len(nv) or p3<len(yiban):
s1,s2,s3='','',''
if p1<len(nan):
s1=nan[p1]
p1+=1
if p2<len(nv):
s2=nv[p2]
p2+=1
if p3<len(yiban):
s3=yiban[p3]
p3+=1
if s1 and s1 != '[/url]':
s2 = (s2 and s2 != '[/url]') and s2 or '[img]http://ww2.sinaimg.cn/mw600/436919cbjw1e8tbx9n07nj205k022aa6.jpg[/img]'
s3 = (s3 and s3 != '[/url]') and s3 or '[img]http://ww2.sinaimg.cn/mw600/436919cbjw1e8tbx9n07nj205k022aa6.jpg[/img]'
single='''[span style="width:90%; margin:0 auto; overflow:auto; _display:inline-block;"][span style="width: 250px; float: left;"]\n'''+s1+'''[/span][span style="width: 260px; float: left;"]
'''+s2+'''[/span][span]\n'''+s3+'''[/span][/span]\n'''
#print single
open('span.txt','a').write(single)
open('span.txt','a').write('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment