Skip to content

Instantly share code, notes, and snippets.

@heetbeet
Last active August 20, 2019 20:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save heetbeet/bdfb12856a6163f5d12bd4d8bcb18ab5 to your computer and use it in GitHub Desktop.
Save heetbeet/bdfb12856a6163f5d12bd4d8bcb18ab5 to your computer and use it in GitHub Desktop.
Some functions to help with analysing cpp files.
import os
class ddict(dict):
def __init__(self, **kwds):
self.update(kwds)
self.__dict__ = self
def to_markers(lefts,
rights):
return [ddict(lhs = i,
lenl = len(i),
rhs = j,
lenr = len(j) ) for i,j in zip(lefts,
rights)]
def single_spacing(txt, also_ln=False):
txt_old = None
while(txt_old != txt):
txt_old = txt
txt = txt.replace(' ',' ')
txt = txt.replace('\t',' ')
if also_ln:
txt = txt.replace('\n',' ')
return txt
def remove_whitespace(txt, also_ln=False):
N = None
while(N!=len(txt)):
N = len(txt)
txt = txt.replace(' ','')
txt = txt.replace('\t','')
if also_ln:
txt = txt.replace('\n','')
return txt
def scrub_nonvarchars(txt):
return ''.join([' ' if i not in '\n_0123456789'
'abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' else i for i in txt])
def scrub_all_except_newline(txt):
return ''.join([' ' if i != '\n' else i for i in txt ])
def scrub_comments_and_strings(txt):
"""
A pre-process to make scraping easier. This function turns all the comments and strings
into empty text (spaces):
FROM: /* // */ cout << "hello \"world\"!" << R"(bla)"; //chingching
TO : cout << " " << R"( )";
"""
markers = to_markers(['/*', '"', 'R"('],
['*/', '"', ')"' ])
#Remove any escaped \" or \\\", but not \\" or \\\\" (uneven vs. even slashes)
txtout = txt
i = -1
while i<len(txtout)-1:
i+=1
if txtout[i] == '\\':
nrslashes = 0
for j in range(i, len(txtout)):
if txtout[j] == '\\':
nrslashes += 1
elif txtout[j] == '"':
if nrslashes%2==1:
txtout = txtout[:j-1]+' '+txtout[j+1:]
i=j
break
else:
i=j
break
#Match lefts with righs and clear the text inbetween
#be aware of // comments!!!!
i = -1
while(i<len(txtout)-1):
i+=1
nxtiter = False
for m in markers:
if m.lhs == txtout[i:i+m.lenl]:
#was the last seen \" farther back than the last seen //? then
#we are in a comment, skip this event
if txtout.rfind('//', 0, i+1) > txtout.rfind('\n', 0, i+1):
break #--,
#<-------------------'
i+=m.lenl
for j in range(i, len(txtout)):
if m.rhs == txtout[j:j+m.lenr]:
txtout = (txtout[:i] +
scrub_all_except_newline(txtout[i:j]) +
txtout[j:])
i = j+m.lenr-1 #will ++ just now
nxiter = True
break #-----+
if nxiter: #
break #
#<--------------------------+
#clear the // commented text and remove the lefover /* and */ signs
lines = txtout.split('\n')
for i, line in enumerate(lines):
idx = line.find('//')
if idx >= 0:
lines[i] = line[:idx] + ' '*(len(line)-idx)
txtout = '\n'.join(lines)
txtout = txtout.replace('/*', ' ')
txtout = txtout.replace('*/', ' ')
return txtout
def place_back_strings(txt_scrubbed,
txt_original):
"""
This function placed back the strings that was scrubbed away,
so you end up with only the comments scrubbed.
"""
markers = to_markers(['/*', '"', 'R"('],
['*/', '"', ')"' ])
#Match lefts with righs and clear the text inbetween
txtout = txt_scrubbed
i = -1
while(i<len(txtout)-1):
i+=1
nxtiter = False
for m in markers:
if m.lhs == txtout[i:i+m.lenl]:
i = i+m.lenl
for j in range(i+1, len(txtout)):
if m.rhs == txtout[j:j+m.lenr]:
txtout = (txtout[:i] +
txt_original[i:j] +
txtout[j:])
i = j+m.lenr-1 #will ++ just now
nxiter = True
break #-----+
if nxiter: #
break #
#<--------------------------+
return txtout
def txt_views(txt):
v = ddict()
v.orig = txt
v.clean = scrub_comments_and_strings(txt)
v.nocomments = place_back_strings(v.clean, txt)
v.vars = scrub_nonvarchars(v.clean)
return v
def view_lnsplit(v):
return ddict(**{k:v.split('\n') for k,v in v.items()})
def isint(txt):
try:
int(txt)
return True
except: return False
def split(txt):
splits = []
iskeep = False
for i, char in enumerate(txt):
if not iskeep and not char in (' ', '\t', '\n'):
iskeep = True
splits.append(ddict(i=i))
elif iskeep and char in (' ', '\t', '\n'):
iskeep = False
splits[-1].j = i
for s in splits:
s.str = txt[s.i:s.j]
return splits
Display the source blob
Display the rendered blob
Raw
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment