Skip to content

Instantly share code, notes, and snippets.

@heetbeet
Last active August 20, 2019 20:33
Show Gist options
  • Save heetbeet/bdfb12856a6163f5d12bd4d8bcb18ab5 to your computer and use it in GitHub Desktop.
Save heetbeet/bdfb12856a6163f5d12bd4d8bcb18ab5 to your computer and use it in GitHub Desktop.
Some functions to help with analysing cpp files.
import os
class ddict(dict):
def __init__(self, **kwds):
self.update(kwds)
self.__dict__ = self
def to_markers(lefts,
rights):
return [ddict(lhs = i,
lenl = len(i),
rhs = j,
lenr = len(j) ) for i,j in zip(lefts,
rights)]
def single_spacing(txt, also_ln=False):
txt_old = None
while(txt_old != txt):
txt_old = txt
txt = txt.replace(' ',' ')
txt = txt.replace('\t',' ')
if also_ln:
txt = txt.replace('\n',' ')
return txt
def remove_whitespace(txt, also_ln=False):
N = None
while(N!=len(txt)):
N = len(txt)
txt = txt.replace(' ','')
txt = txt.replace('\t','')
if also_ln:
txt = txt.replace('\n','')
return txt
def scrub_nonvarchars(txt):
return ''.join([' ' if i not in '\n_0123456789'
'abcdefghijklmnopqrstuvwxyz'
'ABCDEFGHIJKLMNOPQRSTUVWXYZ' else i for i in txt])
def scrub_all_except_newline(txt):
return ''.join([' ' if i != '\n' else i for i in txt ])
def scrub_comments_and_strings(txt):
"""
A pre-process to make scraping easier. This function turns all the comments and strings
into empty text (spaces):
FROM: /* // */ cout << "hello \"world\"!" << R"(bla)"; //chingching
TO : cout << " " << R"( )";
"""
markers = to_markers(['/*', '"', 'R"('],
['*/', '"', ')"' ])
#Remove any escaped \" or \\\", but not \\" or \\\\" (uneven vs. even slashes)
txtout = txt
i = -1
while i<len(txtout)-1:
i+=1
if txtout[i] == '\\':
nrslashes = 0
for j in range(i, len(txtout)):
if txtout[j] == '\\':
nrslashes += 1
elif txtout[j] == '"':
if nrslashes%2==1:
txtout = txtout[:j-1]+' '+txtout[j+1:]
i=j
break
else:
i=j
break
#Match lefts with righs and clear the text inbetween
#be aware of // comments!!!!
i = -1
while(i<len(txtout)-1):
i+=1
nxtiter = False
for m in markers:
if m.lhs == txtout[i:i+m.lenl]:
#was the last seen \" farther back than the last seen //? then
#we are in a comment, skip this event
if txtout.rfind('//', 0, i+1) > txtout.rfind('\n', 0, i+1):
break #--,
#<-------------------'
i+=m.lenl
for j in range(i, len(txtout)):
if m.rhs == txtout[j:j+m.lenr]:
txtout = (txtout[:i] +
scrub_all_except_newline(txtout[i:j]) +
txtout[j:])
i = j+m.lenr-1 #will ++ just now
nxiter = True
break #-----+
if nxiter: #
break #
#<--------------------------+
#clear the // commented text and remove the lefover /* and */ signs
lines = txtout.split('\n')
for i, line in enumerate(lines):
idx = line.find('//')
if idx >= 0:
lines[i] = line[:idx] + ' '*(len(line)-idx)
txtout = '\n'.join(lines)
txtout = txtout.replace('/*', ' ')
txtout = txtout.replace('*/', ' ')
return txtout
def place_back_strings(txt_scrubbed,
txt_original):
"""
This function placed back the strings that was scrubbed away,
so you end up with only the comments scrubbed.
"""
markers = to_markers(['/*', '"', 'R"('],
['*/', '"', ')"' ])
#Match lefts with righs and clear the text inbetween
txtout = txt_scrubbed
i = -1
while(i<len(txtout)-1):
i+=1
nxtiter = False
for m in markers:
if m.lhs == txtout[i:i+m.lenl]:
i = i+m.lenl
for j in range(i+1, len(txtout)):
if m.rhs == txtout[j:j+m.lenr]:
txtout = (txtout[:i] +
txt_original[i:j] +
txtout[j:])
i = j+m.lenr-1 #will ++ just now
nxiter = True
break #-----+
if nxiter: #
break #
#<--------------------------+
return txtout
def txt_views(txt):
v = ddict()
v.orig = txt
v.clean = scrub_comments_and_strings(txt)
v.nocomments = place_back_strings(v.clean, txt)
v.vars = scrub_nonvarchars(v.clean)
return v
def view_lnsplit(v):
return ddict(**{k:v.split('\n') for k,v in v.items()})
def isint(txt):
try:
int(txt)
return True
except: return False
def split(txt):
splits = []
iskeep = False
for i, char in enumerate(txt):
if not iskeep and not char in (' ', '\t', '\n'):
iskeep = True
splits.append(ddict(i=i))
elif iskeep and char in (' ', '\t', '\n'):
iskeep = False
splits[-1].j = i
for s in splits:
s.str = txt[s.i:s.j]
return splits
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 205,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"mcroset = set()\n",
"lookset = set(('FAILED',\n",
" 'THROW'))\n",
"\n",
"files = ['../src/'+f for f in next(os.walk('../src/'))[2]]\n",
"for f in files:\n",
" if not f.endswith('.test.cc'):\n",
" continue\n",
" \n",
" txt = open(f, 'r').read()\n",
" if '//donegtest' in txt: \n",
" continue\n",
" \n",
" v = txt_views(txt)\n",
" \n",
" for s in split(v.vars):\n",
" var = s.str\n",
" if (var.upper() == var and \n",
" not isint(var[0]) and \n",
" len(var)>2 and\n",
" not var[0]+var[-1] == '__' and\n",
" var in lookset):\n",
" \n",
" if var in lookset:\n",
" lidx = v.orig.rfind('\\n', 0, s.i)\n",
" if lidx == -1:\n",
" lidx = 0\n",
" #print('\\n***********************************************')\n",
" #print(('\\n'.join(v.orig[:s.i][::-1].split('\\n')[:5]))[::-1]+\n",
" # '|*-*-*-*-*-> '\n",
" # +'\\n'.join(v.orig[s.i:].split('\\n')[:4]))\n",
" break\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 305,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"dirichlet.test.cc\n",
"discretetable.test.cc\n",
"sigmapoints.test.cc\n",
"lbu_cg.test.cc\n",
"conditionaldirichlet.test.cc\n",
"clustergraph.test.cc\n",
"conditionalpolya.test.cc\n",
"bronkerbosch.test.cc\n",
"lbu2_cg.test.cc\n",
"lbp_cg.test.cc\n",
"polya.test.cc\n",
"normedgausscanonical.test.cc\n",
"sqrtmvg.test.cc\n",
"gausscanonical.test.cc\n"
]
}
],
"source": [
"from IPython.display import clear_output\n",
"\n",
"mcroset = set()\n",
"lookset = set(('FAILED',\n",
" 'THROW'))\n",
"import shutil\n",
"import tempfile\n",
"srcloc = '../src/'\n",
"tmploc = tempfile.gettempdir()+'/tmpreplacetests' \n",
"os.makedirs(tmploc, exist_ok=True)\n",
"\n",
"files = next(os.walk(srcloc))[2]\n",
"for f in files:\n",
" if not f.endswith('.test.cc'):\n",
" continue\n",
" \n",
" for i in range(3):\n",
" try:\n",
" txt = open(tmploc+'/'+f, 'r').read()\n",
" break\n",
" except:\n",
" shutil.copyfile(\n",
" srcloc+'/'+f,\n",
" tmploc+'/'+f,\n",
" )\n",
" \n",
" print(f)\n",
" if '//donegtest' in open(srcloc+'/'+f).read(): \n",
" continue\n",
" \n",
" \n",
" v = txt_views(txt)\n",
" vlines = view_lnsplit(v)\n",
" \n",
" outlines = list(vlines.orig)\n",
" \n",
" def get_testif():\n",
" i = -1\n",
" while(i <len(vlines.orig)-1):\n",
" i+=1\n",
"\n",
" if ('if(' in remove_whitespace(vlines.clean[i]) and \n",
" not 'if(true)' in remove_whitespace(vlines.clean[i])):\n",
" \n",
" for j in range(i,len(vlines.orig)):\n",
" if '}' in vlines.clean[j]:\n",
"\n",
" ctxt = '\\n'.join(vlines.clean[i:j+1])\n",
" otxt = '\\n'.join(vlines.orig[i:j+1])\n",
" if 'LOGERROR' in ctxt or 'THROW' in ctxt:\n",
" yield i,j+1, ctxt, otxt\n",
" i = j\n",
" break #--,\n",
" #<-------------------' \n",
" \n",
" def get_testing_mode():\n",
" i = -1\n",
" while(i <len(vlines.orig)-1):\n",
" i+=1\n",
"\n",
" if ('#ifdefTESTING_MODE' in remove_whitespace(vlines.clean[i])):\n",
" \n",
" for j in range(i,len(vlines.orig)):\n",
" if '#endif' in remove_whitespace(vlines.clean[j]):\n",
"\n",
" ctxt = '\\n'.join(vlines.clean[i:j+1])\n",
" otxt = '\\n'.join(vlines.orig[i:j+1])\n",
" \n",
" yield i,j+1, ctxt, otxt\n",
" \n",
" i = j\n",
" break #--,\n",
" #<-------------------' \n",
" \n",
" \n",
" def get_class_headers():\n",
" i = -1\n",
" while i <len(vlines.orig)-1:\n",
" i+=1\n",
"\n",
" cline = vlines.clean[i]\n",
"\n",
" if('class' in cline and\n",
" 'Test' in cline and\n",
" 'public' in cline):\n",
"\n",
" if cline.split('class')[1].strip().startswith('Test'):\n",
" testsuite = vlines.vars[i].split()[1].replace('Test','')\n",
" testname = vlines.vars[i].split()[3].replace('Test','')\n",
"\n",
" for j in range(i+1, len(vlines.orig)):\n",
" if 'virtualvoidrunTests()' in remove_whitespace(vlines.clean[j]):\n",
"\n",
" yield i, j+1, testsuite, testname\n",
"\n",
" i=j\n",
" break #--,\n",
" #<------------------------' \n",
" \n",
" def get_class_setups():\n",
" i = -1\n",
" while i <len(vlines.orig)-1:\n",
" i+=1\n",
"\n",
" cline = vlines.clean[i]\n",
"\n",
" if('class' in cline and\n",
" 'Test' in cline and\n",
" 'public' in cline):\n",
"\n",
" assert cline.split('class')[0] == '', \"Why isn't the class at the beginning of the file?\"\n",
" \n",
" if not cline.split('class')[1].strip().startswith('Test'):\n",
" for j in range(i+1, len(vlines.orig)):\n",
" if vlines.clean[j].startswith('}'):\n",
"\n",
" yield i, j+1\n",
"\n",
" break #--,\n",
" #<------------------------' \n",
" \n",
" for i,j in get_class_setups():\n",
" print('\\n*******************************')\n",
" print('REMOVING ------->')\n",
" \n",
" print('\\n'.join(vlines.orig[i:j]))\n",
" for idx in range(i,j):\n",
" outlines[idx] = None\n",
"\n",
" \n",
"\n",
" for i, j, ctxt, otxt in get_testif():\n",
" print('*************************')\n",
" print('REPLACING ------->')\n",
" print(otxt)\n",
" print()\n",
" expassrt = 'ASSERT_' if 'THROW' in ctxt else 'EXPECT_'\n",
" \n",
" message = ''\n",
" for line in otxt.split('\\n'):\n",
" if ('LOGERROR' in line) and ('expected' not in line.lower()) and 'failed' in line.lower():\n",
" \n",
" message = line.split('Testing')[1].split('\",')[1].split(');')[0]\n",
" \n",
" \n",
" \n",
" ctxt = ctxt.replace('dif', 'DIF___')\n",
" ctxt = ctxt.replace('abs<double>', 'abs')\n",
" ctxt = ctxt.replace('<double>', '___DOUBLE___')\n",
" \n",
" \n",
" chck = single_spacing(ctxt.split('if')[1].split('{')[0], also_ln=True)\n",
" print(chck)\n",
" \n",
" if 'DIF___' in remove_whitespace(chck) and '>' in remove_whitespace(chck):\n",
" testline = expassrt+'LT'+ (chck.strip().replace('>', ','))\n",
"\n",
" \n",
" elif '!almostEqual' in remove_whitespace(chck):\n",
" chck = chck.replace('()', '___emptybraces___')\n",
" lhs = chck.split('almostEqual')[1].strip()[1:]\n",
" lhs = ')'.join(lhs.split(')')[:-2])\n",
" lhs = [i.strip() for i in lhs.split(',')]\n",
" lhs[-1] = '1e-'+lhs[-1]\n",
" \n",
" testline = (expassrt+'NEAR( '+ ', '.join(lhs)+ ')').strip()\n",
" testline = testline.replace('___emptybraces___', '()')\n",
" \n",
" \n",
" \n",
" elif ('abs' and '>' in remove_whitespace(chck)):\n",
" chck = chck.replace('->', \"___VALUEAT____\")\n",
" chck = chck.strip()[1:-1]\n",
" \n",
" if 'std' in chck:\n",
" lhs, rhs = chck.replace('std::abs', '').split('>')\n",
" else:\n",
" lhs, rhs = chck.replace('abs', '').split('>')\n",
" \n",
" lhs = lhs.strip()\n",
" if lhs[0] == '(' and lhs[-1] == ')':\n",
" lhs = lhs[1:-1]\n",
" \n",
" if '-' in lhs:\n",
" lhs = ', '.join(lhs.split('-'))\n",
" else:\n",
" lhs = lhs+', 0'\n",
"\n",
" testline = (expassrt+'NEAR('+ lhs+','+rhs+')').strip()\n",
" testline = testline.replace('___VALUEAT____', \"->\")\n",
" testline = testline.replace('___emptybraces___', '()')\n",
" \n",
" \n",
" elif '!=' in remove_whitespace(chck):\n",
" testline = expassrt+'EQ'+ (chck.strip().replace('!=', ','))\n",
" \n",
" elif '==' in remove_whitespace(chck):\n",
" testline = expassrt+'NE'+ (chck.strip().replace('==', ','))\n",
" \n",
" elif '->isEqual' in remove_whitespace(chck) and '(!' in remove_whitespace(chck):\n",
" chck = chck.split('!')\n",
" chck = (''.join(chck[0:2])) + ('!'.join(chck[2:]))\n",
" testline = expassrt+'TRUE'+ (chck.strip())\n",
" \n",
" elif '>' in remove_whitespace(chck):\n",
" testline = expassrt+'LT'+ (chck.strip().replace('>', ','))\n",
" \n",
" elif '<' in remove_whitespace(chck):\n",
" testline = expassrt+'GT'+ (chck.strip().replace('<', ','))\n",
" \n",
" testline = testline.replace('DIF___', 'dif')\n",
" testline = testline.replace('___DOUBLE___', '<double>')\n",
" testline = (' '*(vlines.clean[i].find('if'))+ testline + \" << \"+ message +';')\n",
" print('WITH ------------>')\n",
" print(testline)\n",
" \n",
" outlines[i] = testline\n",
" for idx in range(i+1,j):\n",
" outlines[idx] = None\n",
"\n",
" for i, cline in enumerate(vlines.clean):\n",
" if 'REGISTER_TESTCASE' in cline or 'LOGDEBUG1' in cline:\n",
" print('\\n*******************************')\n",
" print('REMOVING ------->')\n",
" print(vlines.orig[i])\n",
" outlines[i] = None\n",
" \n",
" for i, j, testsuite, testname in get_class_headers():\n",
" term = ''\n",
" if vlines.clean[i].endswith('{'):\n",
" term = '{'\n",
" \n",
" outlines[i] = 'TEST(%s, %s)'%(testsuite, testname)+term+'{'\n",
" for idx in range(i+1,j):\n",
" outlines[idx] = None\n",
" \n",
" for i, ncline in enumerate(vlines.nocomments):\n",
" if '#include\"testing.hpp\"' in remove_whitespace(ncline):\n",
" print('\\n*******************************')\n",
" print('REPLACING ------->')\n",
" print(vlines.orig[i])\n",
" print('WITH ------->')\n",
" outlines[i] = '#include \"gtest/gtest.h\" //donegtest'\n",
" \n",
" print(outlines[i])\n",
" \n",
" for i, j, _, _ in get_testing_mode():\n",
" print('\\n*******************************')\n",
" print('REMOVING ------->')\n",
" print('\\n'.join(vlines.orig[i:j]))\n",
" for idx in range(i,j):\n",
" outlines[idx] = None\n",
" \n",
" #clear_output()\n",
" \n",
" output = '\\n'.join([i for i in outlines if i is not None])\n",
" #open(srcloc+'/'+f, 'w').write(output)\n",
" #print(output)\n",
"\n",
" break"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"'''\n",
"fpth = '../src/discretetable.test.cc'\n",
"lines = open(fpth, 'r').read().split('\\n')\n",
"lines_out = list(lines)\n",
"\n",
"for i, line in enumerate(lines):\n",
" if \"if(*\" in remove_whitespace(line) and '!=' in remove_whitespace(line):\n",
" print('*******')\n",
" sblines = []\n",
" j = -1\n",
" while not '}//if' in remove_whitespace(lines[i+j]):\n",
" j+=1\n",
" if 'Expect' in lines[i+j]:\n",
" continue\n",
" sblines.append(lines[i+j])\n",
" \n",
" frm = '\\n'.join(sblines)\n",
" to = ('\\n'.join(sblines).replace('if', 'EXPECT_EQ')\n",
" .replace(' !=', ',')\n",
" .replace('Testing\",', 'bobbob<<')\n",
" .replace(');', ';blabla').split('blabla')[0]\n",
" )\n",
" to = to.split('{')[0]+to.split('bobbob')[1]\n",
" \n",
" lines_out[i] = to\n",
" for k in range(i+1,i+j+1):\n",
" lines_out[k] = None\n",
" print(to)\n",
" \n",
" \n",
"str_out = '\\n'.join([i for i in lines_out if i is not None])\n",
"lines = open(fpth, 'w').write(str_out)\n",
"''';"
]
},
{
"cell_type": "code",
"execution_count": 196,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"sqrtmvg.cc\n",
"gausscanonical.cc\n",
"normedgausscanonical.cc\n"
]
}
],
"source": [
"def readfile(fname):\n",
" try:\n",
" return open(fname).read()\n",
" except:\n",
" return open(fname, encoding ='ISO-8859-1').read()\n",
" \n",
"\n",
"import os\n",
"for dir_, dirs, files in os.walk('../src/'):\n",
" for file in files:\n",
" if file.split('.')[-1] not in ('cc', 'hpp', 'tcc'):\n",
" continue\n",
" if file.endswith('ipynb'): continue\n",
" if os.path.split(dir_)[-1].startswith('.'): continue\n",
" \n",
" \n",
" txt = readfile(dir_+'/'+file)\n",
" v = txt_views(txt)\n",
" vlines = view_lnsplit(v)\n",
" \n",
" fail = False\n",
" for s in split(v.vars):\n",
" if s.str == \"FAIL\":\n",
" txt = txt[0:s.i] + '%`#]' + txt[s.j:]\n",
" fail = True\n",
" \n",
" if fail:\n",
" txt = txt.replace('%`#]', 'FAIL_PATREC')\n",
" print(file)\n",
" open(dir_+'/'+file, 'w').write(txt)\n"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "'module' object is not callable",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-53-948d99da8f11>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0minspect\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0minspect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mTypeError\u001b[0m: 'module' object is not callable"
]
}
],
"source": [
"import inspect\n",
"(str)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment