Skip to content

Instantly share code, notes, and snippets.

@Azlirn
Last active March 24, 2021 21:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Azlirn/a7107212401f1865106a5e1476303f3e to your computer and use it in GitHub Desktop.
Save Azlirn/a7107212401f1865106a5e1476303f3e to your computer and use it in GitHub Desktop.
A simple script to remove strings from a given file. This script was originally built to remove domains from a text file but the concept can be applied to many applications.
import os
import time
# In the current configuration, this script should be able to process:
# 200,000 rps (records per second)
# GLOBAL VARIABLE
emailDomains = ['@hotmail.com', '@yahoo.com', '@gmail.com', '@aol.com', '@hotmail.fr', '@live.com', '@yahoo.fr',
'@yahoo.com.tw', '@hotmail.co.uk', '@ymail.com', '@msn.com', '@breakthru.com', '@rediffmail.com',
'@live.fr', '@yahoo.co.in', '@yahoo.co.uk', '@yahoo.co.br', '@hotmail.es', '@hotmail.it', '@libero.it',
'@web.de', '@yahoo.in', '@outlook.com', '@yahoo.es', '@rocketmail.com', '@comcast.net', '@bol.com.br',
'@gmx.de', '@yahoo.com.mx', '@yahoo.it', '@mail.com', '@live.co.uk', '@live.com.mx', '@hotmail.de',
'@yahoo.co.id', '@yahoo.ca', '@yahoo.de', '@scbglobal.net', '@orange.fr', '@live.it', '@ig.com.br',
'@googlemail.com', '@aim.com', '@yahoo.com.ar', '@abv.bg', '@att.net', '@alice.it', '@yahoo.com.hk',
'@yahoo.com.au', '@hotmail.com.br', '@verizon.net', '@live.ca', '@hotmail.com.ar', '@excite.com',
'@laposte.net', '@btinternet.com', '@virgilio.it', '@wanadoo.fr', '@bellsouth.net', '@email.com',
'@icloud.com', '@yahoo.com.cn', '@facebook.com', '@cox.net', '@windowslive.com', '@tiscali.it',
'@live.nl', '@free.fr', '@freenet.de', '@seznam.cz', '@gmx.net', '@o2.pl', '@earthlink.net',
'@t-online.de', '@yahoo.com.vn', '@latinmail.com', '@live.com.ar', '@hotmail.ca', '@live.com.au',
'@yahoo.co.jp', '@me.com', '@yahoo.gr', '@gmx.at', '@yahoo.com.sg', '@live.cl', '@netscape.net',
'@juno.com', '@freemail.hu', '@gmx.xom', '@charter.net', '@live.de', '@uol.com.br', '@ovi.com',
'@live.com.pt', '@viola.fr', '@bigpond.com', '@sapo.pt', '@yahoo.com.ph', '@terra.com.br', '@inbox.lv',
'@mail.ru', '@yandex.ru', '@myspace', '@126.com', '@163.com', '@qq.com', '@roadrunner.com'
]
def rmDomain(oldfile, newfile):
start_time = time.time()
hitcounter = 0
pcounter = 0
try:
with open(oldfile) as oFile, open(newfile, 'w') as nFile:
try:
for line in oFile:
pcounter = pcounter + 1
lowerLine = line.lower()
if not any(domain in lowerLine for domain in emailDomains):
nFile.write(line)
hitcounter = hitcounter + 1
print '\r[*] - {%s} records processed...' % (pcounter),
except Exception as e:
print '[!] Error Occurred: %s' % e
#
# Uncomment the below if you would like the script to restart to 'main' if it encounters an error
#
# print '[*] Restarting script...'
# time.sleep(5)
# reload(main())
except Exception as e:
print "[!] Error opening %s: %s" % (oldfile, e)
print "[!] Ensure the file %s exists in your current directory." % oldfile
print "[*] Also, check your spelling and be sure you add the extension to your file name!"
time.sleep(5)
reload(main())
ctime = time.time() - start_time
print '\n[*] === COMPLETE === [*]'
print '[*] %s was saved' % newfile
print '[*] There are %s records in your saved file.' % hitcounter
print '[*] You processed %s total records.\n' % pcounter
print "[*] === Completed in %s seconds === [*]" % ctime
time.sleep(5)
exit()
def main():
os.system('cls' if os.name == 'nt' else 'clear')
print '''
,--. o ,---.| o
| |,---.,-.-.,---..,---. `---.|--- ,---..,---.,---.,---.,---.
| || || | |,---||| | || | || || ||---'|
`--' `---'` ' '`---^`` ' `---'`---'` `|---'|---'`---'`
| |
'''
print '\n[!] Currently this script only supports stripping text files.\n'
#TODO: Add flag to allow csv files to be processed
print '[*] Thanks to leakedsource.com for providing an awesome list of domains to ignore.\n'
oldfile = raw_input('{*} Enter the file (with extension) you would like to strip domains from: ')
if oldfile == '':
print '[!] You must define a filename'
time.sleep(2)
reload(main())
newfile = raw_input('{*} Enter the name of the file (with extension) you would like me to save: ')
if newfile == '':
print '[!] You must define a filename'
time.sleep(2)
reload(main())
print "\n[*] This script will remove records that contain the following strings: \n\n", emailDomains
raw_input("\n[!] Press any key to start...\n")
rmDomain(oldfile, newfile)
main()
@priintpar
Copy link

Hi I modified your file, so that you can hand over a third file with strings separated by lines that should be removed from the main file.

https://gist.github.com/priintpar/8a54443e57255d5814cbc1bdf177fcdf

Thanks for your work.

@richlysakowski
Copy link

Great work. Thank you both for creating these utilities.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment