LeNarvalo/recherche.py Secret

## recherche.py
# -*- coding: utf8 -*-
import urllib, webbrowser
import unicodedata
import threading
from Tkinter import *
import ttk
import tkMessageBox
import os
import time
import re

master=Tk()
master.configure(background='#535353')


###VARIABLES###
global derTxt, texto, auteurSaved
auteurSaved = ""
derTxt = ""
displaySignature = False
displayVideo = False
displayImage = False
listResult = []
listCom = []
dico = {}
disableDisplay = True
chemin = os.path.expanduser('~/Veganisme')
balises=["<blockquote>",'<div class="xoopsQuote">',"<br />"]
accents=[["\\xc3\\xa7","ç"],["\\xc3\\xae","î"],["\\xc3\\x87","ç"],["\\xe2\\x80\\x99","'"],["\\xc3\\xa9","é"],["\\xc3\\xa0","à"],["\\xc3\\xa8","è"],["\\xc3\\xb4","ô"],["\\xc3\\xb9","ù"],["\\xc3\\xaa","ê"],["\xc3\xa7","ç"],["\xc3\x87","ç"],["\xe2\x80\x99","'"],["\xc3\xa9","é"],["\xc3\xa0","à"],["\xc3\xa8","è"],["\xc3\xb4","ô"],["\xc3\xb9","ù"],["\xc3\xaa","ê"],["\xc3\xae","î"]]
d = ['''<!DOCTYPE html>
<style>
table {
	width:100%;
}
table, th, td {
	border: 1px solid black;
	border-collapse: collapse;
}
th, td {
	padding: 15px;
	text-align: left;
}
table#t01 tr:nth-child(even) {
	background-color: #eee;
}
table#t01 tr:nth-child(odd) {
   background-color: #fff;
}

</style>
<body>
<table id="t01">
''']
###############

pageTest=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
strpageTest=pageTest.read()
splitText = '''<b>(1)</b> <a href="/modules/newbb/topic160787-20.html">2</a> <a href="/modules/newbb/topic160787-40.html">3</a> <a href="/modules/newbb/topic160787-60.html">4</a> ... <a '''
listSST = strpageTest.split(splitText)
listEST = listSST[1].split("</a>")
listNSST = listEST[0].split(">")
nbOfPages = int(listNSST[1])

def createFPList(strpage):
	#LISTE N° FORUMPOST
	global listCom #Liste de forumpost en string
	a = strpage.split('''<a id="f''')
	for l in a:
		if l.startswith('orumpost'):
			comNb= l.split('">')[0]
			listCom.append(comNb[8:])

def getDicoFromStr(fileStr):
	#OBTIENT DICO DEPUIS FICHIER SUJET.HTML EN STR UTF8
	global auteurList, commentList, dico, der, derTxt
	dico = {}
	list = fileStr.split("': ['")
	auteurList = ["Olrik"]
	commentList = []
	for line in list[1:]:
		try:
			try:
				auteur = line[line.index("'], '")+5:]
				if auteur not in auteurList:
					auteurList.append(auteur)
			except:
				auteur = line[line.index("']}")+3:]
				if auteur not in auteurList:
					auteurList.append(auteur)
			try:
				commentList.append(line[:line.index("'], '")])
			except:
				commentList.append(line[:line.index("']}")])
		except:
			derTxt = "\n"+"Bug général"
			texto.insert(END, derTxt)

	id = 0
	for auteur in auteurList[:-1]:
		list = commentList[id].split("', '")
		pgGot = False
		for comm in list:
			if len(comm) == 0:
				continue
			try:
				if not pgGot:
					page = int(comm)
					pgGot = True
				elif pgGot:
					fpCom = int(comm)
					pgGot = False
					try:
						dico[auteur].extend([str(page),str(fpCom)])
					except:
						dico[auteur]=[str(page),str(fpCom)]
			except:
				try :
					dico[auteur].append(comm)
				except:
					dico[auteur]=comm
		id+=1

	return dico

def rechercher():
	#ENREGISTRE LE SUJET/MET A JOUR L'ARCHIVAGE/RECHERCHE L'EXTRAIT DU TEXTE EN FONCTION DE L'AUTEUR
	global listCom, entreeAuteur, listResult, dico, texto, derTxt, auteurSaved, accents, result, entree2
	lastPage=int(nbOfPages)*20 #LAST PAGE + (1*20)

	#PREMIERE UTILISATION (PAS D'ARCHIVE SUR LE DISQUE DUR)
	if not os.path.isfile(chemin+'\\Sujet.html'):
		if not os.path.exists(chemin):
			os.mkdir(chemin)
		file = open(chemin+"\\Sujet.html","w")
		file.close()

		listCom = []
		page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
		strpage=page.read()
		suf = range(0,9999, 20)
		countPage = 0
		countComm = -1
		while suf[countPage]!=lastPage:
			lastLenPage=len(strpage)
			createFPList(strpage)
			#DICO DES COMM/AUTEUR
			b = strpage.split('href="/memb')
			for c in b:
				if c.startswith('re/'):
					countComm += 1
					e = c.split('">')
					i = e[1].split('</a>')
					auteur = i[0]
					f = c.split('<div class="comText')
					f1 = f[1]
					h = f1.split('</div>\r\n\t    <br clear="all" />') #h[0] = ComText uniquement
					h02 = h[0][2:]
					##REMOVE IMG
					while "<img" in h02:
						idS = h02.index("<img")
						idE = h02[idS:].index(">")
						h02=h02.replace(h02[idS:idS+idE+1],"")

					##REMOVE VID
					h3 = ""
					while "<iframe" in h02 :
						idS = h02.index("<iframe")
						idE = h02[idS+10:].index(">")
						h02=h02.replace(h02[idS:idS+idE+11],"")

					try:
						dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
					except:
						dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

			countPage+=1
			page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
			strpage=page.read()
			pCurrent['value'] = countPage

		#ARCHIVAGE
		file = open(chemin+"\\Sujet.html","w")
		file.write('<meta charset="UTF-8">')
		file.write(str(dico))
		file.write("\n"+str(lastLenPage))
		file.close()

	#ARCHIVAGE DEJA EXISTANT
	else:
		if len(dico) < 1:
			file = open(chemin+"\\Sujet.html","r")
			fileStr = file.read()
			file.close()
			dico = getDicoFromStr(fileStr)

		#VERIFICATION DE LA MISE A JOUR
		listCom = []
		suf = range(1960,9999, 20)
		countPage = 0
		page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
		strpage=page.read()
		news = True
		file = open(chemin+"\\Sujet.html","r")
		fileList = file.readlines()
		file.close()
		lastLenPage = int(fileList[-1])
		while suf[countPage]!=lastPage:
			if len(strpage) == lastLenPage:
				news = False
			if len(strpage) != lastLenPage:
				news = True

			countPage+=1
			page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
			strpage=page.read()

		if not news:
			derTxt = "\n"+"BASE A JOUR!"
			texto.insert(END, derTxt)

		if news:
			derTxt = "\n"+"MISE A JOUR DE LA BASE..."
			texto.insert(END, derTxt)
			page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
			strpage=page.read()
			suf = range(1980,9999, 20)
			countPage = 0
			countComm = -1
			while suf[countPage]!=lastPage:
				createFPList(strpage)
				lastLenPage=len(strpage)
				#DICO DES COMM/AUTEUR
				b = strpage.split('href="/memb')
				for c in b:
					if c.startswith('re/'):
						countComm += 1
						e = c.split('">')
						i = e[1].split('</a>')
						auteur = i[0]
						f = c.split('<div class="comText')
						f1 = f[1]
						h = f1.split('</div>\r\n\t    <br clear="all" />') #h[0] = ComText uniquement
						h02 = h[0][2:]
						##REMOVE IMG
						while "<img" in h02:
							idS = h02.index("<img")
							idE = h02[idS:].index(">")
							h02=h02.replace(h02[idS:idS+idE+1],"")

						##REMOVE VID
						h3 = ""
						while "<iframe" in h02 :
							idS = h02.index("<iframe")
							idE = h02[idS+10:].index(">")
							h02=h02.replace(h02[idS:idS+idE+11],"")

						if auteur in dico:
							if listCom[countComm] not in dico[auteur]:
								dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
								derTxt = "\n"+"ECRITURE EN COURS"
								texto.insert(END, derTxt)
						else:
							dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

				countPage+=1
				page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
				strpage=page.read()

			file = open(chemin+"\\Sujet.html","w")
			file.write('<meta charset="UTF-8">')
			file.write(str(dico))
			file.write("\n"+str(lastLenPage))
			file.close()

	#RECHERCHER PAR AUTEUR L'EXTRAIT
	try:
		auteur = entreeAuteur.get() #Unicode si accent, string sinon
		try:
			auteur2 = unicodedata.normalize('NFKD', auteur).encode('ascii', 'ignore').lower()
		except:
			auteur2 = auteur.lower()
		pCurrent['mode'] = "indeterminate"
		pCurrent.start(1)
		listResult = []
		result = 0
		for key in dico.keys():
			if  auteur2 in key.lower() or auteur==u"*":
				if auteur != u"*":
					auteurSaved = key
				else:
					auteurSaved = "Auteur inconnu"

				for comm in dico[key]:
					try:
						if int(comm)<200000:
							page = comm
					except:
						None
					try:
						if int(comm)>200000:
							num = comm
					except:
						None
					try:
						entree = entreeText.get() #Unicode si accent, string sinon
						try:
							entree2 = unicodedata.normalize('NFKD', entree).encode('ascii', 'ignore').lower()
						except:
							entree2 = entree.lower()
						comm1 = comm
						for a in accents:
							comm1 = comm1.replace(a[0],a[1])
						comm2 = unicodedata.normalize('NFKD', comm1.decode('utf8').lower()).encode('ascii', 'ignore')
						if  entree2 in comm2 and len(comm.split())>1:
							derTxt = "\n"+"PAGE:"+str(int(page)/20)+"    FORUMPOST :"+str(num)
							texto.insert(END, derTxt)
							result += 1
							listResult.append([page,num,comm,key])
					except:
						None
			if  auteur2 in key.lower():
				return
		if auteur==u"*":
			return

		derTxt = "\n"+"AUTEUR NON TROUVVE"
		texto.insert(END, derTxt)
		pCurrent.stop()
	except:
		None


def check_thread():
	if thirdary_thread.is_alive():
		master.after(500, check_thread)
	else:
		derTxt = "\n"+"Nombre de resultats : "+str(result)
		texto.insert(END, derTxt)
		pCurrent.stop()
		if result > 100:
			if not tkMessageBox.askyesno("Continuer?", "Le script va ouvrir un nombre de page important, voulez vous continuer?"):
				return


		e = '''<meta charset="UTF-8">
		<!DOCTYPE html>
		<html>
		<head>
		<style>
		table {
			width:100%;
		}
		table, th, td {
			border: 1px solid black;
			border-collapse: collapse;
		}
		th, td {
			padding: 15px;
			text-align: left;
		}
		table#t01 tr:nth-child(even) {
			background-color: #e0dfe7;
		}
		table#t01 tr:nth-child(odd) {
		background-color: #fff;
		}

		</style>
		</head><table id="t01" cellpadding="3px" cellspacing="0px" rules="all" style="border:solid 1px black; border-collapse:collapse; text-align:center;">	'''
		pageWeb = open(chemin+"\\pageWeb.html","w")
		pageWeb.write(e)
		pageWeb.write('''<tr>
		<th colspan="2" style="width:140px;background-color:#9a9ace"><FONT color="#fff">'''+auteurSaved+''' - Extrait du texte recherché : '''+entree2+''' - <U>Nombre de résultat(s)</U> : '''+str(result)+'''</FONT></th>
	</tr>''')
		for list in listResult:
			txt = list[2]
			for b in balises:
				txt = txt.replace(b,"")
			for a in accents:
				txt = txt.replace(a[0],a[1])

			try:
				id1=txt[:550][::-1].index('a<')
			except:
				id1=9999
			try:
				id2=txt[:550][::-1].index('>a/')
			except:
				id2=9999

			if id2 > id1:
				txt2 = txt[:550-id1-1]
			else:
				txt2 = txt[:550]

			pageWeb.write('<tr><td>'+txt2+'<div align="right"><font face="verdana" color="orange" size="2">-<i><b>'+str(list[3])+'</b></i></font></div></td><td><a href="https://www.koreus.com/modules/newbb/topic160787-'+str(list[0])+'.html#forumpost'+str(list[1])+'"> Page '+str(int(list[0])/20)+' / ForumPost.'+str(list[1])+'</a></td></tr>')
		pageWeb.close()
		os.startfile("C:/Users/LeNa/Veganisme/pageWeb.html")

def init2():
	global thirdary_thread, derTxt, texto
	derTxt=""
	texto.delete(1.0, END)
	pCurrent['mode'] = "determinate"
	if len(entreeAuteur.get())==0 or len(entreeText.get())==0:
		derTxt = "VEUILLEZ SAISIR UN NOM D'AUTEUR ET UN EXTRAIT DU TEXTE QU'IL AURAIT SAISI"\
		+"\nTAPEZ * POUR REMPLACER LE NOM DE L'AUTEUR SI VOUS NE SAVEZ PAS"
		texto.insert(END, derTxt)
		return
	if len(entreeText.get())<5:
		derTxt = "\n"+"L'EXTRAIT DE TEXTE EST TROP COURT (MIN 5 CARACTERES)"
		texto.insert(END, derTxt)
		return
	thirdary_thread = threading.Thread(target=rechercher)
	thirdary_thread.start()
	master.after(50, check_thread)


t1 = Frame(master).pack()
auteur_label = Label(t1, text="Auteur :",bg='#535353',fg="white").pack(anchor="w")
entreeAuteur = Entry(t1, width=50)
entreeAuteur.pack(anchor="w")

t2 = Frame(master).pack()
text_label = Label(t2, text="Texte :",bg='#535353',fg="white").pack(anchor="w")
entreeText = Entry(t2, width=50)
entreeText.pack(anchor="w")

t3 = Frame(master).pack()
Rechercher = Button(t3, text ='Rechercher', command=init2).pack(fill=BOTH,padx=20,pady=10)

pCurrent = ttk.Progressbar(t3, orient='horizontal', mode='determinate', value=5, maximum=nbOfPages)
pCurrent.pack(fill=BOTH, pady=10)

scrollbar = Scrollbar(master)
scrollbar.pack(side=RIGHT, fill=Y)
texto = Text(master, wrap=WORD, yscrollcommand=scrollbar.set)
texto.pack()
scrollbar.config(command=texto.yview)

master.mainloop()
	# -- coding: utf8 --
	import urllib, webbrowser
	import unicodedata
	import threading
	from Tkinter import *
	import ttk
	import tkMessageBox
	import os
	import time
	import re

	master=Tk()
	master.configure(background='#535353')


	###VARIABLES###
	global derTxt, texto, auteurSaved
	auteurSaved = ""
	derTxt = ""
	displaySignature = False
	displayVideo = False
	displayImage = False
	listResult = []
	listCom = []
	dico = {}
	disableDisplay = True
	chemin = os.path.expanduser('~/Veganisme')
	balises=["<blockquote>",'<div class="xoopsQuote">',"<br />"]
	accents=[["\\xc3\\xa7","ç"],["\\xc3\\xae","î"],["\\xc3\\x87","ç"],["\\xe2\\x80\\x99","'"],["\\xc3\\xa9","é"],["\\xc3\\xa0","à"],["\\xc3\\xa8","è"],["\\xc3\\xb4","ô"],["\\xc3\\xb9","ù"],["\\xc3\\xaa","ê"],["\xc3\xa7","ç"],["\xc3\x87","ç"],["\xe2\x80\x99","'"],["\xc3\xa9","é"],["\xc3\xa0","à"],["\xc3\xa8","è"],["\xc3\xb4","ô"],["\xc3\xb9","ù"],["\xc3\xaa","ê"],["\xc3\xae","î"]]
	d = ['''<!DOCTYPE html>
	<style>
	table {
	width:100%;
	}
	table, th, td {
	border: 1px solid black;
	border-collapse: collapse;
	}
	th, td {
	padding: 15px;
	text-align: left;
	}
	table#t01 tr:nth-child(even) {
	background-color: #eee;
	}
	table#t01 tr:nth-child(odd) {
	background-color: #fff;
	}

	</style>
	<body>
	<table id="t01">
	''']
	###############

	pageTest=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
	strpageTest=pageTest.read()
	splitText = '''<b>(1)</b> <a href="/modules/newbb/topic160787-20.html">2</a> <a href="/modules/newbb/topic160787-40.html">3</a> <a href="/modules/newbb/topic160787-60.html">4</a> ... <a '''
	listSST = strpageTest.split(splitText)
	listEST = listSST[1].split("</a>")
	listNSST = listEST[0].split(">")
	nbOfPages = int(listNSST[1])

	def createFPList(strpage):
	#LISTE N° FORUMPOST
	global listCom #Liste de forumpost en string
	a = strpage.split('''<a id="f''')
	for l in a:
	if l.startswith('orumpost'):
	comNb= l.split('">')[0]
	listCom.append(comNb[8:])

	def getDicoFromStr(fileStr):
	#OBTIENT DICO DEPUIS FICHIER SUJET.HTML EN STR UTF8
	global auteurList, commentList, dico, der, derTxt
	dico = {}
	list = fileStr.split("': ['")
	auteurList = ["Olrik"]
	commentList = []
	for line in list[1:]:
	try:
	try:
	auteur = line[line.index("'], '")+5:]
	if auteur not in auteurList:
	auteurList.append(auteur)
	except:
	auteur = line[line.index("']}")+3:]
	if auteur not in auteurList:
	auteurList.append(auteur)
	try:
	commentList.append(line[:line.index("'], '")])
	except:
	commentList.append(line[:line.index("']}")])
	except:
	derTxt = "\n"+"Bug général"
	texto.insert(END, derTxt)

	id = 0
	for auteur in auteurList[:-1]:
	list = commentList[id].split("', '")
	pgGot = False
	for comm in list:
	if len(comm) == 0:
	continue
	try:
	if not pgGot:
	page = int(comm)
	pgGot = True
	elif pgGot:
	fpCom = int(comm)
	pgGot = False
	try:
	dico[auteur].extend([str(page),str(fpCom)])
	except:
	dico[auteur]=[str(page),str(fpCom)]
	except:
	try :
	dico[auteur].append(comm)
	except:
	dico[auteur]=comm
	id+=1

	return dico

	def rechercher():
	#ENREGISTRE LE SUJET/MET A JOUR L'ARCHIVAGE/RECHERCHE L'EXTRAIT DU TEXTE EN FONCTION DE L'AUTEUR
	global listCom, entreeAuteur, listResult, dico, texto, derTxt, auteurSaved, accents, result, entree2
	lastPage=int(nbOfPages)20 #LAST PAGE + (120)

	#PREMIERE UTILISATION (PAS D'ARCHIVE SUR LE DISQUE DUR)
	if not os.path.isfile(chemin+'\\Sujet.html'):
	if not os.path.exists(chemin):
	os.mkdir(chemin)
	file = open(chemin+"\\Sujet.html","w")
	file.close()

	listCom = []
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787.html')
	strpage=page.read()
	suf = range(0,9999, 20)
	countPage = 0
	countComm = -1
	while suf[countPage]!=lastPage:
	lastLenPage=len(strpage)
	createFPList(strpage)
	#DICO DES COMM/AUTEUR
	b = strpage.split('href="/memb')
	for c in b:
	if c.startswith('re/'):
	countComm += 1
	e = c.split('">')
	i = e[1].split('</a>')
	auteur = i[0]
	f = c.split('<div class="comText')
	f1 = f[1]
	h = f1.split('</div>\r\n\t <br clear="all" />') #h[0] = ComText uniquement
	h02 = h[0][2:]
	##REMOVE IMG
	while "<img" in h02:
	idS = h02.index("<img")
	idE = h02[idS:].index(">")
	h02=h02.replace(h02[idS:idS+idE+1],"")

	##REMOVE VID
	h3 = ""
	while "<iframe" in h02 :
	idS = h02.index("<iframe")
	idE = h02[idS+10:].index(">")
	h02=h02.replace(h02[idS:idS+idE+11],"")

	try:
	dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
	except:
	dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

	countPage+=1
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()
	pCurrent['value'] = countPage

	#ARCHIVAGE
	file = open(chemin+"\\Sujet.html","w")
	file.write('<meta charset="UTF-8">')
	file.write(str(dico))
	file.write("\n"+str(lastLenPage))
	file.close()

	#ARCHIVAGE DEJA EXISTANT
	else:
	if len(dico) < 1:
	file = open(chemin+"\\Sujet.html","r")
	fileStr = file.read()
	file.close()
	dico = getDicoFromStr(fileStr)

	#VERIFICATION DE LA MISE A JOUR
	listCom = []
	suf = range(1960,9999, 20)
	countPage = 0
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()
	news = True
	file = open(chemin+"\\Sujet.html","r")
	fileList = file.readlines()
	file.close()
	lastLenPage = int(fileList[-1])
	while suf[countPage]!=lastPage:
	if len(strpage) == lastLenPage:
	news = False
	if len(strpage) != lastLenPage:
	news = True

	countPage+=1
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()

	if not news:
	derTxt = "\n"+"BASE A JOUR!"
	texto.insert(END, derTxt)

	if news:
	derTxt = "\n"+"MISE A JOUR DE LA BASE..."
	texto.insert(END, derTxt)
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()
	suf = range(1980,9999, 20)
	countPage = 0
	countComm = -1
	while suf[countPage]!=lastPage:
	createFPList(strpage)
	lastLenPage=len(strpage)
	#DICO DES COMM/AUTEUR
	b = strpage.split('href="/memb')
	for c in b:
	if c.startswith('re/'):
	countComm += 1
	e = c.split('">')
	i = e[1].split('</a>')
	auteur = i[0]
	f = c.split('<div class="comText')
	f1 = f[1]
	h = f1.split('</div>\r\n\t <br clear="all" />') #h[0] = ComText uniquement
	h02 = h[0][2:]
	##REMOVE IMG
	while "<img" in h02:
	idS = h02.index("<img")
	idE = h02[idS:].index(">")
	h02=h02.replace(h02[idS:idS+idE+1],"")

	##REMOVE VID
	h3 = ""
	while "<iframe" in h02 :
	idS = h02.index("<iframe")
	idE = h02[idS+10:].index(">")
	h02=h02.replace(h02[idS:idS+idE+11],"")

	if auteur in dico:
	if listCom[countComm] not in dico[auteur]:
	dico[auteur].extend([str(suf[countPage]),listCom[countComm],h02])
	derTxt = "\n"+"ECRITURE EN COURS"
	texto.insert(END, derTxt)
	else:
	dico[auteur]=[str(suf[countPage]),listCom[countComm],h02]

	countPage+=1
	page=urllib.urlopen('https://www.koreus.com/modules/newbb/topic160787-'+str(suf[countPage])+'.html')
	strpage=page.read()

	file = open(chemin+"\\Sujet.html","w")
	file.write('<meta charset="UTF-8">')
	file.write(str(dico))
	file.write("\n"+str(lastLenPage))
	file.close()

	#RECHERCHER PAR AUTEUR L'EXTRAIT
	try:
	auteur = entreeAuteur.get() #Unicode si accent, string sinon
	try:
	auteur2 = unicodedata.normalize('NFKD', auteur).encode('ascii', 'ignore').lower()
	except:
	auteur2 = auteur.lower()
	pCurrent['mode'] = "indeterminate"
	pCurrent.start(1)
	listResult = []
	result = 0
	for key in dico.keys():
	if auteur2 in key.lower() or auteur==u"*":
	if auteur != u"*":
	auteurSaved = key
	else:
	auteurSaved = "Auteur inconnu"

	for comm in dico[key]:
	try:
	if int(comm)<200000:
	page = comm
	except:
	None
	try:
	if int(comm)>200000:
	num = comm
	except:
	None
	try:
	entree = entreeText.get() #Unicode si accent, string sinon
	try:
	entree2 = unicodedata.normalize('NFKD', entree).encode('ascii', 'ignore').lower()
	except:
	entree2 = entree.lower()
	comm1 = comm
	for a in accents:
	comm1 = comm1.replace(a[0],a[1])
	comm2 = unicodedata.normalize('NFKD', comm1.decode('utf8').lower()).encode('ascii', 'ignore')
	if entree2 in comm2 and len(comm.split())>1:
	derTxt = "\n"+"PAGE:"+str(int(page)/20)+" FORUMPOST :"+str(num)
	texto.insert(END, derTxt)
	result += 1
	listResult.append([page,num,comm,key])
	except:
	None
	if auteur2 in key.lower():
	return
	if auteur==u"*":
	return

	derTxt = "\n"+"AUTEUR NON TROUVVE"
	texto.insert(END, derTxt)
	pCurrent.stop()
	except:
	None


	def check_thread():
	if thirdary_thread.is_alive():
	master.after(500, check_thread)
	else:
	derTxt = "\n"+"Nombre de resultats : "+str(result)
	texto.insert(END, derTxt)
	pCurrent.stop()
	if result > 100:
	if not tkMessageBox.askyesno("Continuer?", "Le script va ouvrir un nombre de page important, voulez vous continuer?"):
	return


	e = '''<meta charset="UTF-8">
	<!DOCTYPE html>
	<html>
	<head>
	<style>
	table {
	width:100%;
	}
	table, th, td {
	border: 1px solid black;
	border-collapse: collapse;
	}
	th, td {
	padding: 15px;
	text-align: left;
	}
	table#t01 tr:nth-child(even) {
	background-color: #e0dfe7;
	}
	table#t01 tr:nth-child(odd) {
	background-color: #fff;
	}

	</style>
	</head><table id="t01" cellpadding="3px" cellspacing="0px" rules="all" style="border:solid 1px black; border-collapse:collapse; text-align:center;"> '''
	pageWeb = open(chemin+"\\pageWeb.html","w")
	pageWeb.write(e)
	pageWeb.write('''<tr>
	<th colspan="2" style="width:140px;background-color:#9a9ace"><FONT color="#fff">'''+auteurSaved+''' - Extrait du texte recherché : '''+entree2+''' - <U>Nombre de résultat(s)</U> : '''+str(result)+'''</FONT></th>
	</tr>''')
	for list in listResult:
	txt = list[2]
	for b in balises:
	txt = txt.replace(b,"")
	for a in accents:
	txt = txt.replace(a[0],a[1])

	try:
	id1=txt[:550][::-1].index('a<')
	except:
	id1=9999
	try:
	id2=txt[:550][::-1].index('>a/')
	except:
	id2=9999

	if id2 > id1:
	txt2 = txt[:550-id1-1]
	else:
	txt2 = txt[:550]

	pageWeb.write('<tr><td>'+txt2+'<div align="right"><font face="verdana" color="orange" size="2">-<i><b>'+str(list[3])+'</b></i></font></div></td><td><a href="https://www.koreus.com/modules/newbb/topic160787-'+str(list[0])+'.html#forumpost'+str(list[1])+'"> Page '+str(int(list[0])/20)+' / ForumPost.'+str(list[1])+'</a></td></tr>')
	pageWeb.close()
	os.startfile("C:/Users/LeNa/Veganisme/pageWeb.html")

	def init2():
	global thirdary_thread, derTxt, texto
	derTxt=""
	texto.delete(1.0, END)
	pCurrent['mode'] = "determinate"
	if len(entreeAuteur.get())==0 or len(entreeText.get())==0:
	derTxt = "VEUILLEZ SAISIR UN NOM D'AUTEUR ET UN EXTRAIT DU TEXTE QU'IL AURAIT SAISI"\
	+"\nTAPEZ * POUR REMPLACER LE NOM DE L'AUTEUR SI VOUS NE SAVEZ PAS"
	texto.insert(END, derTxt)
	return
	if len(entreeText.get())<5:
	derTxt = "\n"+"L'EXTRAIT DE TEXTE EST TROP COURT (MIN 5 CARACTERES)"
	texto.insert(END, derTxt)
	return
	thirdary_thread = threading.Thread(target=rechercher)
	thirdary_thread.start()
	master.after(50, check_thread)


	t1 = Frame(master).pack()
	auteur_label = Label(t1, text="Auteur :",bg='#535353',fg="white").pack(anchor="w")
	entreeAuteur = Entry(t1, width=50)
	entreeAuteur.pack(anchor="w")

	t2 = Frame(master).pack()
	text_label = Label(t2, text="Texte :",bg='#535353',fg="white").pack(anchor="w")
	entreeText = Entry(t2, width=50)
	entreeText.pack(anchor="w")

	t3 = Frame(master).pack()
	Rechercher = Button(t3, text ='Rechercher', command=init2).pack(fill=BOTH,padx=20,pady=10)

	pCurrent = ttk.Progressbar(t3, orient='horizontal', mode='determinate', value=5, maximum=nbOfPages)
	pCurrent.pack(fill=BOTH, pady=10)

	scrollbar = Scrollbar(master)
	scrollbar.pack(side=RIGHT, fill=Y)
	texto = Text(master, wrap=WORD, yscrollcommand=scrollbar.set)
	texto.pack()
	scrollbar.config(command=texto.yview)

	master.mainloop()