Skip to content

Instantly share code, notes, and snippets.

@YasserGersy
Created May 14, 2022 03:59
Show Gist options
  • Save YasserGersy/3ee2d33599bb31a35cab6c23c52ee505 to your computer and use it in GitHub Desktop.
Save YasserGersy/3ee2d33599bb31a35cab6c23c52ee505 to your computer and use it in GitHub Desktop.
Python script to search for specific strings in big files
#!/bin/python -e
import sys,os,glob,time
errors_count=0
max_id=100011265576762
debug=False
start=time.time()
def GetSecondsPassed():
global start
end=time.time()
r= end-start
r=int(r)
return r
def _contains_(line,term):
if line.lower().find(term)>=0:
return True
return False
def _contains_any(string,substring_list):
return any(substring in string for substring in substring_list)
def _contains_all(string,substring_list):
return all(substring in string for substring in substring_list)
def min(s,m):
if(s<m):
return m
return s
def dprint(s):
if debug:
print(s)
def listfiles(PATH,rext="*.*"):
all=rext=="*.*"
result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(PATH) for f in filenames if (os.path.splitext(f)[1] == rext) or (all)]
return result
def num_lines(p):
z=-1
try:
z= sum(1 for line in open(p))
except:# Exception,s:
errors_count=errors_count+1
pass
return z
def Readable_num(s):#1123456 > 1 123 456
str_num=str(s)
numx=''
counter3=0
tlen=len(str_num)
for i in range(tlen,0,-1):
numx=str_num[i-1]+numx
counter3=counter3+1
if counter3==3:
counter3=0
numx=" "+numx
return str(numx)#+" <"+s+">"
def printr(s,r):
print(s,end=r)
def check_fb_id(i1,i2):
try:
val1 = int(str(i1))
val2 = int(str(i1))
if val1 > val2:
print ( str(val1)+" is igger than max fbid "+str(val2) )
except ValueError:
pass #print("That's not an int!")
def main():
lst=["hello","hello0"]
#Banner and usage
print("--------------Reader v1.1-----------------------")
if len(sys.argv)<3:
print("Usage: python "+str(sys.argv) + " file.txt myem@gmail.com")
print(len(sys.argv))
exit()
#args
path=sys.argv[1]
userinput=sys.argv[2]
#User input to list , unique and non empty
terms=(userinput.strip()+",").split(",")
terms=list(dict.fromkeys(terms))
terms=[x for x in terms if x]
if os.path.isfile(userinput):
lines=open(userinput).splitlines()
for l in lines:
terms.append(l.trim())
#term=userinput
files=[]
count=0
progress=False
all_Files_lines_c=0
all_count=0
all_Files_c=0
errors_count=0
if(os.path.isfile(path)):
files=[path]
print("[+] Looking in single path ["+path+"]")
elif os.path.isdir(path) :
files=listfiles(path,"*.*")
print("[+] Looking for {"+str(terms)+"} in "+str( len(files) )+" Files in Directory: ["+path+"] "+" E:"+str(errors_count))
else:
print("[-] plz provide an existing file or directory ")
exit()
z=0
for t in terms:
check_fb_id(t,max)
dprint(files)
dprint("\n\n")
#looping files
for fl in files:
file1=open(fl,'r', encoding="utf-8")
count=0
all_Files_c+=1
print(" [+] Search for '"+str(terms)+"' at:'"+fl+"' ")
for term in terms:
term=str(term).lower()
while True:
count+=1
all_count+=1
all_Files_lines_c=all_Files_lines_c+1
#Getnextlinefromfile
line=''
try:
line=file1.readline()
dprint(line)
except Exception as ex:
errors_count=errors_count+1
dprint(str(ex))
break
#if line is empty
#end of file is reached
if not line:
dprint("~~~end of file-------------------------")
break
try:
line=str(line)
if _contains_(line,term):
ol=("\t [+] "+line)#.strip())
print(ol)
open('output.txt','a',encoding="utf-8").write(str(ol).strip()+"\n")
else:
ac=" All:"+Readable_num(all_Files_lines_c)
sp=" [speed:"+str( int( min(all_Files_lines_c,1)/min(GetSecondsPassed(),1)))+" Per second ]"#avoid div by zero
m=" [+] Searching line :"+Readable_num(count)+("" if count==all_Files_lines_c else ac )+sp
printr (m,"\r")
except (Exception) as s:
print("Error135:\n"+str(s))
errors_count=errors_count+1
file1.close()
dprint("Closing file ")
print("\n------------End Search--------")
print("------------Count:"+str(count)+"--------")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
errors_count=errors_count+1
print ('\n\nCancelled')
sys.exit(0)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment