LeNarvalo/veganisme.py Secret

## veganisme.py
# -*- coding: utf8 -*-
import urllib, webbrowser
import unicodedata
import threading
from Tkinter import *
import ttk
#import tkMessageBox
import os
import time
import re

#windowsAnswer = False
#def windowsAlert():
#	global windowsAnswer
#	windowsAnswer = tkMessageBox.askyesno("Continuer?", "Le script va ouvrir un nombre de page important, voulez vous continuer?")

master=Tk()
master.configure(background='#535353')


###VARIABLES###
global derTxt, texto, auteurSaved
auteurSaved = ""
derTxt = ""
displaySignature = False
displayVideo = False
displayImage = False
listResult = []
listCom = []
dico = {}
disableDisplay = True
chemin = os.path.expanduser('~/Veganisme')
balises=["<blockquote>",'<div class="xoopsQuote">',"<br />"]
accents=[["\xc3\xa7","ç"],["\xc3\x87","ç"],["\xe2\x80\x99","'"],["\xc3\xa9","é"],["\xc3\xa0","à"],["\xc3\xa8","è"],["\xc3\xb4","ô"],["\xc3\xb9","ù"],["\xc3\xaa","ê"],\
		["\xc3\xae","î"],["\xc3\xaf","ï"]]
accents2=[["\\xc3\\xa7","\xc3\xa7"],["\\xc3\\xae","\xc3\xae"],["\\xc3\\x87","\xc3\x87"],["\\xe2\\x80\\x99","\xe2\x80\x99"],["\\xc3\\xa9","\xc3\xa9"],["\\xc3\\xa0","\xc3\xa0"],\
		["\\xc3\\xa8","\xc3\xa8"],["\\xc3\\xb4","\xc3\xb4"],["\\xc3\\xb9","\xc3\xb9"],["\\xc3\\xaa","\xc3\xaa"],["\\xc3\\xaf","\xc3\xaf"]]
d = ['''<!DOCTYPE html>
<style>
table {
	width:100%;
}
table, th, td {
	border: 1px solid black;
	border-collapse: collapse;
}
th, td {
	padding: 15px;
	text-align: left;
}
table#t01 tr:nth-child(even) {
	background-color: #eee;
}
table#t01 tr:nth-child(odd) {
   background-color: #fff;
}

</style>
<body>
<table id="t01">
''']
###############

pageTest=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
strpageTest=pageTest.read()
splitText = '''<b>(1)</b> <a href="/modules/newbb/topic160787-20.html">2</a> <a href="/modules/newbb/topic160787-40.html">3</a> <a href="/modules/newbb/topic160787-60.html">4</a> ... <a '''
listSST = strpageTest.split(splitText)
listEST = listSST[1].split("</a>")
listNSST = listEST[0].split(">")
nbOfPages = int(listNSST[1])

def createFPList(strpage):
	#LISTE N° FORUMPOST
	global listCom #Liste de forumpost en string
	a = strpage.split('''<a id="f''')
	for l in a:
		if l.startswith('orumpost'):
			comNb= l.split('">')[0]
			listCom.append(comNb[8:])

def getDicoFromStr(fileStr):
	#OBTIENT DICO DEPUIS FICHIER SUJET.HTML EN STR UTF8
	global auteurList, commentList, dico, der, derTxt
	dico = {}
	list = fileStr.split("': ['")
	auteurList = ["Olrik"]
	commentList = []
	for line in list[1:]:
		try:
			try:
				auteur = line[line.index("'], '")+5:]
				if auteur not in auteurList:
					auteurList.append(auteur)
			except:
				auteur = line[line.index("']}")+3:]
				if auteur not in auteurList:
					auteurList.append(auteur)
			try:
				commentList.append(line[:line.index("'], '")])
			except:
				commentList.append(line[:line.index("']}")])
		except:
			derTxt = "\n"+"Bug général"
			texto.insert(END, derTxt)

	id = 0
	for auteur in auteurList[:-1]:
		list = commentList[id].split("', '")
		pgGot = False
		for comm in list:
			if len(comm) == 0:
				continue
			try:
				if not pgGot:
					page = int(comm)
					pgGot = True
				elif pgGot:
					fpCom = int(comm)
					pgGot = False
					try:
						dico[auteur].extend([str(page),str(fpCom)])
					except:
						dico[auteur]=[str(page),str(fpCom)]
			except:
				for a in accents2:
					comm = comm.replace(a[0],a[1])
				try :
					dico[auteur].append(comm)
				except:
					dico[auteur]=comm
		id+=1

	return dico

def rechercher():
	#ENREGISTRE LE SUJET/MET A JOUR L'ARCHIVAGE/RECHERCHE L'EXTRAIT DU TEXTE EN FONCTION DE L'AUTEUR
	global listCom, dico, texto, derTxt, research_thread
	lastPage=int(nbOfPages)*20 #LAST PAGE + (1*20)

	#PREMIERE UTILISATION (PAS D'ARCHIVE SUR LE DISQUE DUR)
	if not os.path.isfile(chemin+'\\Sujet.html'):
		if not os.path.exists(chemin):
			os.mkdir(chemin)
		file = open(chemin+"\\Sujet.html","w")
		file.close()

		listCom = []
		page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
		strpage=page.read()
		suf = range(0,9999, 20)
		countPage = 0
		countComm = -1
		while suf[countPage]!=lastPage:
			lastLenPage=len(strpage)
			createFPList(strpage)
			#DICO DES COMM/AUTEUR
			b = strpage.split('href="/memb')
			for c in b:
				if c.startswith('re/'):
					countComm += 1
					e = c.split('">')
					i = e[1].split('</a>')
					auteur = i[0]
					f = c.split('<div class="comText')
					f1 = f[1]
					h = f1.split('</div>\r\n\t    <br clear="all" />') #h[0] = ComText uniquement
					h02 = h[0][2:]
					##REMOVE IMG
					while "<img" in h02:
						idS = h02.index("<img")
						idE = h02[idS:].index(">")
						h02=h02.replace(h02[idS:idS+idE+1],"")

					##REMOVE VID
					h3 = ""
					while "<iframe" in h02 :
						idS = h02.index("<iframe")
						idE = h02[idS+10:].index(">")
						h02=h02.replace(h02[idS:idS+idE+11],"")

					try:
						dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
					except:
						dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

			countPage+=1
			page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
			strpage=page.read()
			pCurrent['value'] = countPage

		#ARCHIVAGE
		file = open(chemin+"\\Sujet.html","w")
		file.write('<meta charset="UTF-8">')
		file.write(str(dico))
		file.write("\n"+str(lastLenPage))
		file.close()

	#ARCHIVAGE DEJA EXISTANT
	else:
		if len(dico) < 1:
			file = open(chemin+"\\Sujet.html","r")
			fileStr = file.read()
			file.close()
			dico = getDicoFromStr(fileStr)

		#VERIFICATION DE LA MISE A JOUR
		listCom = []
		suf = range(1960,9999, 20)
		countPage = 0
		page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
		strpage=page.read()
		news = True
		file = open(chemin+"\\Sujet.html","r")
		fileList = file.readlines()
		file.close()
		lastLenPage = int(fileList[-1])
		while suf[countPage]!=lastPage:
			if len(strpage) == lastLenPage:
				news = False
			if len(strpage) != lastLenPage:
				news = True

			countPage+=1
			page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
			strpage=page.read()

		if not news:
			derTxt = "\n"+"BASE A JOUR!"
			texto.insert(END, derTxt)

		if news:
			derTxt = "\n"+"MISE A JOUR DE LA BASE..."
			texto.insert(END, derTxt)
			page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
			strpage=page.read()
			suf = range(1980,9999, 20)
			countPage = 0
			countComm = -1
			while suf[countPage]!=lastPage:
				createFPList(strpage)
				lastLenPage=len(strpage)
				#DICO DES COMM/AUTEUR
				b = strpage.split('href="/memb')
				for c in b:
					if c.startswith('re/'):
						countComm += 1
						e = c.split('">')
						i = e[1].split('</a>')
						auteur = i[0]
						f = c.split('<div class="comText')
						f1 = f[1]
						h = f1.split('</div>\r\n\t    <br clear="all" />') #h[0] = ComText uniquement
						h02 = h[0][2:]
						##REMOVE IMG
						while "<img" in h02:
							idS = h02.index("<img")
							idE = h02[idS:].index(">")
							h02=h02.replace(h02[idS:idS+idE+1],"")

						##REMOVE VID
						h3 = ""
						while "<iframe" in h02 :
							idS = h02.index("<iframe")
							idE = h02[idS+10:].index(">")
							h02=h02.replace(h02[idS:idS+idE+11],"")

						if auteur in dico:
							if listCom[countComm] not in dico[auteur]:
								dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
								derTxt = "\n"+"ECRITURE EN COURS"
								texto.insert(END, derTxt)
						else:
							dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

				countPage+=1
				page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
				strpage=page.read()

			file = open(chemin+"\\Sujet.html","w")
			file.write('<meta charset="UTF-8">')
			file.write(str(dico))
			file.write("\n"+str(lastLenPage))
			file.close()

	research_thread = threading.Thread(target=research)
	research_thread.daemon = True
	research_thread.start()

def research():
	global listResult, dico, texto, derTxt, auteurSaved, result, entree2, auteur2
	#RECHERCHER PAR AUTEUR L'EXTRAIT
	try:
		derTxt = "\n"+"RECHERCHE EN COURS..."
		texto.insert(END, derTxt)

		pCurrent['mode'] = "indeterminate"
		pCurrent.start(1)
		listResult = []
		result = 0
		for key in dico.keys():
			if  auteur2 in key.lower() or auteur==u"*":
				if auteur != u"*":
					auteurSaved = key
				else:
					auteurSaved = "Auteur inconnu"

				for comm in dico[key]:
					try:
						if int(comm)<200000:
							page = comm
					except:
						None
					try:
						if int(comm)>200000:
							num = comm
					except:
						None
					try:

						comm1 = comm
						for a in accents:
							comm1 = comm1.replace(a[0],a[1])
						if not casse.get():
							try:
								comm2 = unicodedata.normalize('NFKD', comm1.decode('utf8').lower()).encode('ascii', 'ignore')
							except:
								comm2 = comm1.lower() #Inutile normalement

						else:
							comm2 = comm1

						if  entree2.encode('utf8') in comm2 and len(comm.split())>1 :
							if result < 200:
								derTxt = "\n"+"PAGE:"+str(int(page)/20)+"    FORUMPOST :"+str(num)
								texto.insert(END, derTxt)
							result += 1
							listResult.append([page,num,comm,key])
							#if result % 80 == 0:
							#	derTxt = "\n"+"PAUSE REQUIERED"
							#	texto.insert(END, derTxt)
							#	time.sleep(1)

					except:
						None
			if  auteur2 in key.lower():
				break

		buildPageWeb_thread = threading.Thread(target=buildPageWeb)
		buildPageWeb_thread.daemon = True
		buildPageWeb_thread.start()
		if auteur==u"*":
			return

		derTxt = "\n"+"AUTEUR NON TROUVVE"
		texto.insert(END, derTxt)
		pCurrent.stop()
	except:
		None

def buildPageWeb(afficherPlusDe200=False):
	global texto, derTxt, entree2
	#if research_thread.isAlive():
	#	time.sleep(2)
	#	buildPageWeb()

	pCurrent.stop()
	if result >= 200 and not afficherPlusDe200:
		derTxt=""
		texto.delete(1.0, END)
		derTxt = "\n"+"Nombre de resultats : >200"
		texto.insert(END, derTxt)
		Afficher['state']='normal'
		return

	if result < 200:
		derTxt = "\n"+"Nombre de resultats : "+str(result)
		texto.insert(END, derTxt)
	e = '''<meta charset="UTF-8">
	<!DOCTYPE html>
	<html>
	<head>
	<style>
	table {
		width:100%;
	}
	table, th, td {
		border: 1px solid black;
		border-collapse: collapse;
	}
	th, td {
		padding: 15px;
		text-align: left;
	}
	table#t01 tr:nth-child(even) {
		background-color: #e0dfe7;
	}
	table#t01 tr:nth-child(odd) {
	background-color: #fff;
	}

	</style>
	</head><table id="t01" cellpadding="3px" cellspacing="0px" rules="all" style="border:solid 1px black; border-collapse:collapse; text-align:center;">	'''
	if casse.get():
		entree2 = entree2.encode('utf8')

	pageWeb = open(chemin+"\\pageWeb.html","w")
	pageWeb.write(e)
	pageWeb.write('''<tr>
	<th colspan="2" style="width:140px;background-color:#9a9ace"><FONT color="#fff">'''+auteurSaved+''' - Extrait du texte recherché : '''+entree2+''' - <U>Nombre de résultat(s)</U> : '''+str(result)+'''</FONT></th>
	</tr>''')
	for list in listResult:
		txt = list[2]
		for b in balises:
			txt = txt.replace(b,"")
		for a in accents:
			txt = txt.replace(a[0],a[1])

		try:
			id1=txt[:550][::-1].index('a<')
		except:
			id1=9999
		try:
			id2=txt[:550][::-1].index('>a/')
		except:
			id2=9999

		if id2 > id1:
			txt2 = txt[:550-id1-1]
		else:
			txt2 = txt[:550]

		pageWeb.write('<tr><td>'+txt2+'<div align="right"><font face="verdana" color="orange" size="2">-<i><b>'+str(list[3])+'</b></i></font></div></td><td><a href="https://www.koreus.com/modules/newbb/topic160787-'+str(list[0])+'.html#forumpost'+str(list[1])+'"> Page '+str(int(list[0])/20)+' / ForumPost.'+str(list[1])+'</a></td></tr>')
	pageWeb.close()
	displayPageWeb = threading.Thread(target=displaySearch)
	displayPageWeb.daemon = True
	displayPageWeb.start()

def displaySearch():
	os.startfile("C:/Users/LeNa/Veganisme/pageWeb.html")
	Afficher['state']='disabled'

def init2():
	global search_thread, derTxt, texto, entree2, auteur2, auteur
	derTxt=""
	texto.delete(1.0, END)
	pCurrent['mode'] = "determinate"
	Afficher['state']='disabled'

	if len(entreeAuteur.get())==0 or len(entreeText.get())==0:
		derTxt = "VEUILLEZ SAISIR UN NOM D'AUTEUR ET UN EXTRAIT DU TEXTE QU'IL AURAIT SAISI"\
		+"\nTAPEZ * POUR REMPLACER LE NOM DE L'AUTEUR SI VOUS NE SAVEZ PAS"
		texto.insert(END, derTxt)
		return
	if len(entreeText.get())<5:
		derTxt = "\n"+"L'EXTRAIT DE TEXTE EST TROP COURT (MIN 5 CARACTERES)"
		texto.insert(END, derTxt)
		return

	auteur = entreeAuteur.get() #Unicode si accent, string sinon
	try:
		auteur2 = unicodedata.normalize('NFKD', auteur).encode('ascii', 'ignore').lower()
	except:
		auteur2 = auteur.lower()

	entree = entreeText.get() #Unicode si accent, string sinon
	if not casse.get():
		try:
			entree2 = unicodedata.normalize('NFKD', entree).encode('ascii', 'ignore').lower()
		except:
			entree2 = entree.lower()
	else:
		entree2 = entree

	search_thread = threading.Thread(target=rechercher)
	search_thread.start()

def afficherPlusDe200():
	buildPageWeb(True)

def fenetre():
	global Afficher, texto, pCurrent, entreeAuteur, entreeText, Rechercher, scrollbar, casse

	t1 = Frame(master).grid(row=0)
	auteur_label = Label(t1, text="Auteur :",bg='#535353',fg="white")#.pack()
	auteur_label.grid(row=0,column=0, sticky=W)
	entreeAuteur = Entry(t1, width=50)
	entreeAuteur.grid(row=0,column=1, columnspan=3, sticky=W+E)

	t2 = Frame(master).grid(row=1)
	text_label = Label(t2, text="Texte :",bg='#535353',fg="white")#.pack()
	text_label.grid(row=1,column=0, sticky=W)
	entreeText = Entry(t2, width=50)
	entreeText.grid(row=1,column=1, columnspan=3, sticky=W+E)


	t3 = Frame(master).grid(row=2,pady=20)
	casse = IntVar()
	bouton=Checkbutton(t3, text="Casse", variable=casse,bg='#535353',activebackground='#535353',fg="#FFFFFF",selectcolor="black")
	bouton.grid(row=2)

	Rechercher = Button(t3, text ='Rechercher', command=init2,width=20)#.pack(fill=BOTH,padx=20,pady=10)
	Rechercher.grid(row=2,column=1, sticky=E)

	Afficher = Button(master,width=20, compound=LEFT, overrelief=GROOVE, text ='Afficher > 200 résultats', fg="red", command=afficherPlusDe200, state=DISABLED)
	Afficher.grid(row=2,column=2, sticky=W)

	t4 = Frame(master).grid(row=3)
	pCurrent = ttk.Progressbar(t4, orient='horizontal', mode='determinate', value=5, maximum=nbOfPages)
	pCurrent.grid(row=3, column=0,columnspan=4, sticky=W+E)#.pack(fill=BOTH, pady=10)

	t5 = Frame(master).grid(row=4)
	scrollbar = Scrollbar(t5)
	scrollbar.grid(row=4, column=3,sticky=W+S+N)#.pack(side=RIGHT, fill=Y)
	texto = Text(t5, wrap=WORD, yscrollcommand=scrollbar.set, width=50)
	texto.grid(row=4,column=0, columnspan=3, sticky=W+E)#.pack(side=LEFT)
	scrollbar.config(command=texto.yview)

fenetre()

master.mainloop()
	# -- coding: utf8 --
	import urllib, webbrowser
	import unicodedata
	import threading
	from Tkinter import *
	import ttk
	#import tkMessageBox
	import os
	import time
	import re

	#windowsAnswer = False
	#def windowsAlert():
	# global windowsAnswer
	# windowsAnswer = tkMessageBox.askyesno("Continuer?", "Le script va ouvrir un nombre de page important, voulez vous continuer?")

	master=Tk()
	master.configure(background='#535353')


	###VARIABLES###
	global derTxt, texto, auteurSaved
	auteurSaved = ""
	derTxt = ""
	displaySignature = False
	displayVideo = False
	displayImage = False
	listResult = []
	listCom = []
	dico = {}
	disableDisplay = True
	chemin = os.path.expanduser('~/Veganisme')
	balises=["<blockquote>",'<div class="xoopsQuote">',"<br />"]
	accents=[["\xc3\xa7","ç"],["\xc3\x87","ç"],["\xe2\x80\x99","'"],["\xc3\xa9","é"],["\xc3\xa0","à"],["\xc3\xa8","è"],["\xc3\xb4","ô"],["\xc3\xb9","ù"],["\xc3\xaa","ê"],\
	["\xc3\xae","î"],["\xc3\xaf","ï"]]
	accents2=[["\\xc3\\xa7","\xc3\xa7"],["\\xc3\\xae","\xc3\xae"],["\\xc3\\x87","\xc3\x87"],["\\xe2\\x80\\x99","\xe2\x80\x99"],["\\xc3\\xa9","\xc3\xa9"],["\\xc3\\xa0","\xc3\xa0"],\
	["\\xc3\\xa8","\xc3\xa8"],["\\xc3\\xb4","\xc3\xb4"],["\\xc3\\xb9","\xc3\xb9"],["\\xc3\\xaa","\xc3\xaa"],["\\xc3\\xaf","\xc3\xaf"]]
	d = ['''<!DOCTYPE html>
	<style>
	table {
	width:100%;
	}
	table, th, td {
	border: 1px solid black;
	border-collapse: collapse;
	}
	th, td {
	padding: 15px;
	text-align: left;
	}
	table#t01 tr:nth-child(even) {
	background-color: #eee;
	}
	table#t01 tr:nth-child(odd) {
	background-color: #fff;
	}

	</style>
	<body>
	<table id="t01">
	''']
	###############

	pageTest=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
	strpageTest=pageTest.read()
	splitText = '''<b>(1)</b> <a href="/modules/newbb/topic160787-20.html">2</a> <a href="/modules/newbb/topic160787-40.html">3</a> <a href="/modules/newbb/topic160787-60.html">4</a> ... <a '''
	listSST = strpageTest.split(splitText)
	listEST = listSST[1].split("</a>")
	listNSST = listEST[0].split(">")
	nbOfPages = int(listNSST[1])

	def createFPList(strpage):
	#LISTE N° FORUMPOST
	global listCom #Liste de forumpost en string
	a = strpage.split('''<a id="f''')
	for l in a:
	if l.startswith('orumpost'):
	comNb= l.split('">')[0]
	listCom.append(comNb[8:])

	def getDicoFromStr(fileStr):
	#OBTIENT DICO DEPUIS FICHIER SUJET.HTML EN STR UTF8
	global auteurList, commentList, dico, der, derTxt
	dico = {}
	list = fileStr.split("': ['")
	auteurList = ["Olrik"]
	commentList = []
	for line in list[1:]:
	try:
	try:
	auteur = line[line.index("'], '")+5:]
	if auteur not in auteurList:
	auteurList.append(auteur)
	except:
	auteur = line[line.index("']}")+3:]
	if auteur not in auteurList:
	auteurList.append(auteur)
	try:
	commentList.append(line[:line.index("'], '")])
	except:
	commentList.append(line[:line.index("']}")])
	except:
	derTxt = "\n"+"Bug général"
	texto.insert(END, derTxt)

	id = 0
	for auteur in auteurList[:-1]:
	list = commentList[id].split("', '")
	pgGot = False
	for comm in list:
	if len(comm) == 0:
	continue
	try:
	if not pgGot:
	page = int(comm)
	pgGot = True
	elif pgGot:
	fpCom = int(comm)
	pgGot = False
	try:
	dico[auteur].extend([str(page),str(fpCom)])
	except:
	dico[auteur]=[str(page),str(fpCom)]
	except:
	for a in accents2:
	comm = comm.replace(a[0],a[1])
	try :
	dico[auteur].append(comm)
	except:
	dico[auteur]=comm
	id+=1

	return dico

	def rechercher():
	#ENREGISTRE LE SUJET/MET A JOUR L'ARCHIVAGE/RECHERCHE L'EXTRAIT DU TEXTE EN FONCTION DE L'AUTEUR
	global listCom, dico, texto, derTxt, research_thread
	lastPage=int(nbOfPages)20 #LAST PAGE + (120)

	#PREMIERE UTILISATION (PAS D'ARCHIVE SUR LE DISQUE DUR)
	if not os.path.isfile(chemin+'\\Sujet.html'):
	if not os.path.exists(chemin):
	os.mkdir(chemin)
	file = open(chemin+"\\Sujet.html","w")
	file.close()

	listCom = []
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
	strpage=page.read()
	suf = range(0,9999, 20)
	countPage = 0
	countComm = -1
	while suf[countPage]!=lastPage:
	lastLenPage=len(strpage)
	createFPList(strpage)
	#DICO DES COMM/AUTEUR
	b = strpage.split('href="/memb')
	for c in b:
	if c.startswith('re/'):
	countComm += 1
	e = c.split('">')
	i = e[1].split('</a>')
	auteur = i[0]
	f = c.split('<div class="comText')
	f1 = f[1]
	h = f1.split('</div>\r\n\t <br clear="all" />') #h[0] = ComText uniquement
	h02 = h[0][2:]
	##REMOVE IMG
	while "<img" in h02:
	idS = h02.index("<img")
	idE = h02[idS:].index(">")
	h02=h02.replace(h02[idS:idS+idE+1],"")

	##REMOVE VID
	h3 = ""
	while "<iframe" in h02 :
	idS = h02.index("<iframe")
	idE = h02[idS+10:].index(">")
	h02=h02.replace(h02[idS:idS+idE+11],"")

	try:
	dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
	except:
	dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

	countPage+=1
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()
	pCurrent['value'] = countPage

	#ARCHIVAGE
	file = open(chemin+"\\Sujet.html","w")
	file.write('<meta charset="UTF-8">')
	file.write(str(dico))
	file.write("\n"+str(lastLenPage))
	file.close()

	#ARCHIVAGE DEJA EXISTANT
	else:
	if len(dico) < 1:
	file = open(chemin+"\\Sujet.html","r")
	fileStr = file.read()
	file.close()
	dico = getDicoFromStr(fileStr)

	#VERIFICATION DE LA MISE A JOUR
	listCom = []
	suf = range(1960,9999, 20)
	countPage = 0
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()
	news = True
	file = open(chemin+"\\Sujet.html","r")
	fileList = file.readlines()
	file.close()
	lastLenPage = int(fileList[-1])
	while suf[countPage]!=lastPage:
	if len(strpage) == lastLenPage:
	news = False
	if len(strpage) != lastLenPage:
	news = True

	countPage+=1
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()

	if not news:
	derTxt = "\n"+"BASE A JOUR!"
	texto.insert(END, derTxt)

	if news:
	derTxt = "\n"+"MISE A JOUR DE LA BASE..."
	texto.insert(END, derTxt)
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()
	suf = range(1980,9999, 20)
	countPage = 0
	countComm = -1
	while suf[countPage]!=lastPage:
	createFPList(strpage)
	lastLenPage=len(strpage)
	#DICO DES COMM/AUTEUR
	b = strpage.split('href="/memb')
	for c in b:
	if c.startswith('re/'):
	countComm += 1
	e = c.split('">')
	i = e[1].split('</a>')
	auteur = i[0]
	f = c.split('<div class="comText')
	f1 = f[1]
	h = f1.split('</div>\r\n\t <br clear="all" />') #h[0] = ComText uniquement
	h02 = h[0][2:]
	##REMOVE IMG
	while "<img" in h02:
	idS = h02.index("<img")
	idE = h02[idS:].index(">")
	h02=h02.replace(h02[idS:idS+idE+1],"")

	##REMOVE VID
	h3 = ""
	while "<iframe" in h02 :
	idS = h02.index("<iframe")
	idE = h02[idS+10:].index(">")
	h02=h02.replace(h02[idS:idS+idE+11],"")

	if auteur in dico:
	if listCom[countComm] not in dico[auteur]:
	dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
	derTxt = "\n"+"ECRITURE EN COURS"
	texto.insert(END, derTxt)
	else:
	dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

	countPage+=1
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()

	file = open(chemin+"\\Sujet.html","w")
	file.write('<meta charset="UTF-8">')
	file.write(str(dico))
	file.write("\n"+str(lastLenPage))
	file.close()

	research_thread = threading.Thread(target=research)
	research_thread.daemon = True
	research_thread.start()

	def research():
	global listResult, dico, texto, derTxt, auteurSaved, result, entree2, auteur2
	#RECHERCHER PAR AUTEUR L'EXTRAIT
	try:
	derTxt = "\n"+"RECHERCHE EN COURS..."
	texto.insert(END, derTxt)

	pCurrent['mode'] = "indeterminate"
	pCurrent.start(1)
	listResult = []
	result = 0
	for key in dico.keys():
	if auteur2 in key.lower() or auteur==u"*":
	if auteur != u"*":
	auteurSaved = key
	else:
	auteurSaved = "Auteur inconnu"

	for comm in dico[key]:
	try:
	if int(comm)<200000:
	page = comm
	except:
	None
	try:
	if int(comm)>200000:
	num = comm
	except:
	None
	try:

	comm1 = comm
	for a in accents:
	comm1 = comm1.replace(a[0],a[1])
	if not casse.get():
	try:
	comm2 = unicodedata.normalize('NFKD', comm1.decode('utf8').lower()).encode('ascii', 'ignore')
	except:
	comm2 = comm1.lower() #Inutile normalement

	else:
	comm2 = comm1

	if entree2.encode('utf8') in comm2 and len(comm.split())>1 :
	if result < 200:
	derTxt = "\n"+"PAGE:"+str(int(page)/20)+" FORUMPOST :"+str(num)
	texto.insert(END, derTxt)
	result += 1
	listResult.append([page,num,comm,key])
	#if result % 80 == 0:
	# derTxt = "\n"+"PAUSE REQUIERED"
	# texto.insert(END, derTxt)
	# time.sleep(1)

	except:
	None
	if auteur2 in key.lower():
	break

	buildPageWeb_thread = threading.Thread(target=buildPageWeb)
	buildPageWeb_thread.daemon = True
	buildPageWeb_thread.start()
	if auteur==u"*":
	return

	derTxt = "\n"+"AUTEUR NON TROUVVE"
	texto.insert(END, derTxt)
	pCurrent.stop()
	except:
	None

	def buildPageWeb(afficherPlusDe200=False):
	global texto, derTxt, entree2
	#if research_thread.isAlive():
	# time.sleep(2)
	# buildPageWeb()

	pCurrent.stop()
	if result >= 200 and not afficherPlusDe200:
	derTxt=""
	texto.delete(1.0, END)
	derTxt = "\n"+"Nombre de resultats : >200"
	texto.insert(END, derTxt)
	Afficher['state']='normal'
	return

	if result < 200:
	derTxt = "\n"+"Nombre de resultats : "+str(result)
	texto.insert(END, derTxt)
	e = '''<meta charset="UTF-8">
	<!DOCTYPE html>
	<html>
	<head>
	<style>
	table {
	width:100%;
	}
	table, th, td {
	border: 1px solid black;
	border-collapse: collapse;
	}
	th, td {
	padding: 15px;
	text-align: left;
	}
	table#t01 tr:nth-child(even) {
	background-color: #e0dfe7;
	}
	table#t01 tr:nth-child(odd) {
	background-color: #fff;
	}

	</style>
	</head><table id="t01" cellpadding="3px" cellspacing="0px" rules="all" style="border:solid 1px black; border-collapse:collapse; text-align:center;"> '''
	if casse.get():
	entree2 = entree2.encode('utf8')

	pageWeb = open(chemin+"\\pageWeb.html","w")
	pageWeb.write(e)
	pageWeb.write('''<tr>
	<th colspan="2" style="width:140px;background-color:#9a9ace"><FONT color="#fff">'''+auteurSaved+''' - Extrait du texte recherché : '''+entree2+''' - <U>Nombre de résultat(s)</U> : '''+str(result)+'''</FONT></th>
	</tr>''')
	for list in listResult:
	txt = list[2]
	for b in balises:
	txt = txt.replace(b,"")
	for a in accents:
	txt = txt.replace(a[0],a[1])

	try:
	id1=txt[:550][::-1].index('a<')
	except:
	id1=9999
	try:
	id2=txt[:550][::-1].index('>a/')
	except:
	id2=9999

	if id2 > id1:
	txt2 = txt[:550-id1-1]
	else:
	txt2 = txt[:550]

	pageWeb.write('<tr><td>'+txt2+'<div align="right"><font face="verdana" color="orange" size="2">-<i><b>'+str(list[3])+'</b></i></font></div></td><td><a href="https://www.koreus.com/modules/newbb/topic160787-'+str(list[0])+'.html#forumpost'+str(list[1])+'"> Page '+str(int(list[0])/20)+' / ForumPost.'+str(list[1])+'</a></td></tr>')
	pageWeb.close()
	displayPageWeb = threading.Thread(target=displaySearch)
	displayPageWeb.daemon = True
	displayPageWeb.start()

	def displaySearch():
	os.startfile("C:/Users/LeNa/Veganisme/pageWeb.html")
	Afficher['state']='disabled'

	def init2():
	global search_thread, derTxt, texto, entree2, auteur2, auteur
	derTxt=""
	texto.delete(1.0, END)
	pCurrent['mode'] = "determinate"
	Afficher['state']='disabled'

	if len(entreeAuteur.get())==0 or len(entreeText.get())==0:
	derTxt = "VEUILLEZ SAISIR UN NOM D'AUTEUR ET UN EXTRAIT DU TEXTE QU'IL AURAIT SAISI"\
	+"\nTAPEZ * POUR REMPLACER LE NOM DE L'AUTEUR SI VOUS NE SAVEZ PAS"
	texto.insert(END, derTxt)
	return
	if len(entreeText.get())<5:
	derTxt = "\n"+"L'EXTRAIT DE TEXTE EST TROP COURT (MIN 5 CARACTERES)"
	texto.insert(END, derTxt)
	return

	auteur = entreeAuteur.get() #Unicode si accent, string sinon
	try:
	auteur2 = unicodedata.normalize('NFKD', auteur).encode('ascii', 'ignore').lower()
	except:
	auteur2 = auteur.lower()

	entree = entreeText.get() #Unicode si accent, string sinon
	if not casse.get():
	try:
	entree2 = unicodedata.normalize('NFKD', entree).encode('ascii', 'ignore').lower()
	except:
	entree2 = entree.lower()
	else:
	entree2 = entree

	search_thread = threading.Thread(target=rechercher)
	search_thread.start()

	def afficherPlusDe200():
	buildPageWeb(True)

	def fenetre():
	global Afficher, texto, pCurrent, entreeAuteur, entreeText, Rechercher, scrollbar, casse

	t1 = Frame(master).grid(row=0)
	auteur_label = Label(t1, text="Auteur :",bg='#535353',fg="white")#.pack()
	auteur_label.grid(row=0,column=0, sticky=W)
	entreeAuteur = Entry(t1, width=50)
	entreeAuteur.grid(row=0,column=1, columnspan=3, sticky=W+E)

	t2 = Frame(master).grid(row=1)
	text_label = Label(t2, text="Texte :",bg='#535353',fg="white")#.pack()
	text_label.grid(row=1,column=0, sticky=W)
	entreeText = Entry(t2, width=50)
	entreeText.grid(row=1,column=1, columnspan=3, sticky=W+E)


	t3 = Frame(master).grid(row=2,pady=20)
	casse = IntVar()
	bouton=Checkbutton(t3, text="Casse", variable=casse,bg='#535353',activebackground='#535353',fg="#FFFFFF",selectcolor="black")
	bouton.grid(row=2)

	Rechercher = Button(t3, text ='Rechercher', command=init2,width=20)#.pack(fill=BOTH,padx=20,pady=10)
	Rechercher.grid(row=2,column=1, sticky=E)

	Afficher = Button(master,width=20, compound=LEFT, overrelief=GROOVE, text ='Afficher > 200 résultats', fg="red", command=afficherPlusDe200, state=DISABLED)
	Afficher.grid(row=2,column=2, sticky=W)

	t4 = Frame(master).grid(row=3)
	pCurrent = ttk.Progressbar(t4, orient='horizontal', mode='determinate', value=5, maximum=nbOfPages)
	pCurrent.grid(row=3, column=0,columnspan=4, sticky=W+E)#.pack(fill=BOTH, pady=10)

	t5 = Frame(master).grid(row=4)
	scrollbar = Scrollbar(t5)
	scrollbar.grid(row=4, column=3,sticky=W+S+N)#.pack(side=RIGHT, fill=Y)
	texto = Text(t5, wrap=WORD, yscrollcommand=scrollbar.set, width=50)
	texto.grid(row=4,column=0, columnspan=3, sticky=W+E)#.pack(side=LEFT)
	scrollbar.config(command=texto.yview)

	fenetre()

	master.mainloop()