Last active
March 24, 2021 21:02
-
-
Save Azlirn/a7107212401f1865106a5e1476303f3e to your computer and use it in GitHub Desktop.
A simple script to remove strings from a given file. This script was originally built to remove domains from a text file but the concept can be applied to many applications.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import time | |
# In the current configuration, this script should be able to process: | |
# 200,000 rps (records per second) | |
# GLOBAL VARIABLE | |
emailDomains = ['@hotmail.com', '@yahoo.com', '@gmail.com', '@aol.com', '@hotmail.fr', '@live.com', '@yahoo.fr', | |
'@yahoo.com.tw', '@hotmail.co.uk', '@ymail.com', '@msn.com', '@breakthru.com', '@rediffmail.com', | |
'@live.fr', '@yahoo.co.in', '@yahoo.co.uk', '@yahoo.co.br', '@hotmail.es', '@hotmail.it', '@libero.it', | |
'@web.de', '@yahoo.in', '@outlook.com', '@yahoo.es', '@rocketmail.com', '@comcast.net', '@bol.com.br', | |
'@gmx.de', '@yahoo.com.mx', '@yahoo.it', '@mail.com', '@live.co.uk', '@live.com.mx', '@hotmail.de', | |
'@yahoo.co.id', '@yahoo.ca', '@yahoo.de', '@scbglobal.net', '@orange.fr', '@live.it', '@ig.com.br', | |
'@googlemail.com', '@aim.com', '@yahoo.com.ar', '@abv.bg', '@att.net', '@alice.it', '@yahoo.com.hk', | |
'@yahoo.com.au', '@hotmail.com.br', '@verizon.net', '@live.ca', '@hotmail.com.ar', '@excite.com', | |
'@laposte.net', '@btinternet.com', '@virgilio.it', '@wanadoo.fr', '@bellsouth.net', '@email.com', | |
'@icloud.com', '@yahoo.com.cn', '@facebook.com', '@cox.net', '@windowslive.com', '@tiscali.it', | |
'@live.nl', '@free.fr', '@freenet.de', '@seznam.cz', '@gmx.net', '@o2.pl', '@earthlink.net', | |
'@t-online.de', '@yahoo.com.vn', '@latinmail.com', '@live.com.ar', '@hotmail.ca', '@live.com.au', | |
'@yahoo.co.jp', '@me.com', '@yahoo.gr', '@gmx.at', '@yahoo.com.sg', '@live.cl', '@netscape.net', | |
'@juno.com', '@freemail.hu', '@gmx.xom', '@charter.net', '@live.de', '@uol.com.br', '@ovi.com', | |
'@live.com.pt', '@viola.fr', '@bigpond.com', '@sapo.pt', '@yahoo.com.ph', '@terra.com.br', '@inbox.lv', | |
'@mail.ru', '@yandex.ru', '@myspace', '@126.com', '@163.com', '@qq.com', '@roadrunner.com' | |
] | |
def rmDomain(oldfile, newfile): | |
start_time = time.time() | |
hitcounter = 0 | |
pcounter = 0 | |
try: | |
with open(oldfile) as oFile, open(newfile, 'w') as nFile: | |
try: | |
for line in oFile: | |
pcounter = pcounter + 1 | |
lowerLine = line.lower() | |
if not any(domain in lowerLine for domain in emailDomains): | |
nFile.write(line) | |
hitcounter = hitcounter + 1 | |
print '\r[*] - {%s} records processed...' % (pcounter), | |
except Exception as e: | |
print '[!] Error Occurred: %s' % e | |
# | |
# Uncomment the below if you would like the script to restart to 'main' if it encounters an error | |
# | |
# print '[*] Restarting script...' | |
# time.sleep(5) | |
# reload(main()) | |
except Exception as e: | |
print "[!] Error opening %s: %s" % (oldfile, e) | |
print "[!] Ensure the file %s exists in your current directory." % oldfile | |
print "[*] Also, check your spelling and be sure you add the extension to your file name!" | |
time.sleep(5) | |
reload(main()) | |
ctime = time.time() - start_time | |
print '\n[*] === COMPLETE === [*]' | |
print '[*] %s was saved' % newfile | |
print '[*] There are %s records in your saved file.' % hitcounter | |
print '[*] You processed %s total records.\n' % pcounter | |
print "[*] === Completed in %s seconds === [*]" % ctime | |
time.sleep(5) | |
exit() | |
def main(): | |
os.system('cls' if os.name == 'nt' else 'clear') | |
print ''' | |
,--. o ,---.| o | |
| |,---.,-.-.,---..,---. `---.|--- ,---..,---.,---.,---.,---. | |
| || || | |,---||| | || | || || ||---'| | |
`--' `---'` ' '`---^`` ' `---'`---'` `|---'|---'`---'` | |
| | | |
''' | |
print '\n[!] Currently this script only supports stripping text files.\n' | |
#TODO: Add flag to allow csv files to be processed | |
print '[*] Thanks to leakedsource.com for providing an awesome list of domains to ignore.\n' | |
oldfile = raw_input('{*} Enter the file (with extension) you would like to strip domains from: ') | |
if oldfile == '': | |
print '[!] You must define a filename' | |
time.sleep(2) | |
reload(main()) | |
newfile = raw_input('{*} Enter the name of the file (with extension) you would like me to save: ') | |
if newfile == '': | |
print '[!] You must define a filename' | |
time.sleep(2) | |
reload(main()) | |
print "\n[*] This script will remove records that contain the following strings: \n\n", emailDomains | |
raw_input("\n[!] Press any key to start...\n") | |
rmDomain(oldfile, newfile) | |
main() |
Great work. Thank you both for creating these utilities.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hi I modified your file, so that you can hand over a third file with strings separated by lines that should be removed from the main file.
https://gist.github.com/priintpar/8a54443e57255d5814cbc1bdf177fcdf
Thanks for your work.