heetbeet/Untitled.ipynb

## cpp_code_helpers.py
import os

class ddict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self

def to_markers(lefts,
               rights):
    return [ddict(lhs  = i,
                  lenl = len(i),
                  rhs  = j,
                  lenr = len(j) ) for i,j in zip(lefts,
                                                 rights)]

def single_spacing(txt, also_ln=False):
    txt_old = None
    while(txt_old != txt):
        txt_old = txt

        txt = txt.replace('  ',' ')
        txt = txt.replace('\t',' ')
        if also_ln:
            txt = txt.replace('\n',' ')
    return txt

def remove_whitespace(txt, also_ln=False):
    N = None
    while(N!=len(txt)):
        N = len(txt)

        txt = txt.replace(' ','')
        txt = txt.replace('\t','')
        if also_ln:
            txt = txt.replace('\n','')
    return txt


def scrub_nonvarchars(txt):
    return ''.join([' ' if i not in '\n_0123456789'
                                    'abcdefghijklmnopqrstuvwxyz'
                                    'ABCDEFGHIJKLMNOPQRSTUVWXYZ' else i for i in txt])

def scrub_all_except_newline(txt):
    return ''.join([' ' if i != '\n' else i for i in txt ])


def scrub_comments_and_strings(txt):
    """
    A pre-process to make scraping easier. This function turns all the comments and strings
    into empty text (spaces):
    FROM: /* // */ cout << "hello \"world\"!" << R"(bla)"; //chingching
    TO  :          cout << "                " << R"(   )";
    """

    markers = to_markers(['/*', '"', 'R"('],
                         ['*/', '"', ')"' ])

    #Remove any escaped \" or \\\", but not \\" or \\\\" (uneven vs. even slashes)
    txtout = txt
    i = -1
    while i<len(txtout)-1:
        i+=1

        if txtout[i] == '\\':
            nrslashes = 0
            for j in range(i, len(txtout)):

                if txtout[j] == '\\':
                    nrslashes += 1

                elif txtout[j] == '"':
                    if nrslashes%2==1:
                        txtout = txtout[:j-1]+'  '+txtout[j+1:]
                    i=j
                    break
                else:
                    i=j
                    break


    #Match lefts with righs and clear the text inbetween
    #be aware of // comments!!!!
    i = -1
    while(i<len(txtout)-1):
        i+=1

        nxtiter = False
        for m in markers:
            if m.lhs == txtout[i:i+m.lenl]:
                #was the last seen \" farther back than the last seen //? then
                #we are in a comment, skip this event
                if txtout.rfind('//', 0, i+1) > txtout.rfind('\n', 0, i+1):
                    break #--,
        #<-------------------'

                i+=m.lenl
                for j in range(i, len(txtout)):
                    if m.rhs == txtout[j:j+m.lenr]:
                        txtout = (txtout[:i] +
                                  scrub_all_except_newline(txtout[i:j]) +
                                  txtout[j:])

                        i = j+m.lenr-1 #will ++ just now

                        nxiter = True
                        break #-----+
                if nxiter:          #
                    break           #
        #<--------------------------+

    #clear the // commented text and remove the lefover /* and */ signs
    lines = txtout.split('\n')
    for i, line in enumerate(lines):
        idx = line.find('//')
        if idx >= 0:
            lines[i] = line[:idx] + ' '*(len(line)-idx)
    txtout = '\n'.join(lines)

    txtout = txtout.replace('/*', '  ')
    txtout = txtout.replace('*/', '  ')

    return txtout


def place_back_strings(txt_scrubbed,
                       txt_original):
    """
    This function placed back the strings that was scrubbed away,
    so you end up with only the comments scrubbed.
    """
    markers = to_markers(['/*', '"', 'R"('],
                         ['*/', '"', ')"' ])


    #Match lefts with righs and clear the text inbetween
    txtout = txt_scrubbed
    i = -1
    while(i<len(txtout)-1):
        i+=1

        nxtiter = False
        for m in markers:
            if m.lhs == txtout[i:i+m.lenl]:
                i = i+m.lenl
                for j in range(i+1, len(txtout)):
                    if m.rhs == txtout[j:j+m.lenr]:
                        txtout = (txtout[:i] +
                                  txt_original[i:j] +
                                  txtout[j:])

                        i = j+m.lenr-1 #will ++ just now

                        nxiter = True
                        break #-----+
                if nxiter:          #
                    break           #
        #<--------------------------+

    return txtout

def txt_views(txt):
    v = ddict()
    v.orig = txt
    v.clean = scrub_comments_and_strings(txt)
    v.nocomments = place_back_strings(v.clean, txt)
    v.vars = scrub_nonvarchars(v.clean)

    return v

def view_lnsplit(v):
    return ddict(**{k:v.split('\n') for k,v in v.items()})


def isint(txt):
    try:
        int(txt)
        return True
    except: return False

def split(txt):
    splits = []
    iskeep = False
    for i, char in enumerate(txt):
        if not iskeep and not char in (' ', '\t', '\n'):
            iskeep = True
            splits.append(ddict(i=i))
        elif iskeep and char in (' ', '\t', '\n'):
            iskeep = False
            splits[-1].j = i

    for s in splits:
        s.str = txt[s.i:s.j]

    return splits

## Untitled.ipynb
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 205,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 103,
   "metadata": {},
   "outputs": [],
   "source": [
    "mcroset = set()\n",
    "lookset = set(('FAILED',\n",
    "               'THROW'))\n",
    "\n",
    "files = ['../src/'+f for f in next(os.walk('../src/'))[2]]\n",
    "for f in files:\n",
    "    if not f.endswith('.test.cc'):\n",
    "        continue\n",
    "        \n",
    "    txt = open(f, 'r').read()\n",
    "    if '//donegtest' in txt: \n",
    "        continue\n",
    "        \n",
    "    v = txt_views(txt)\n",
    "    \n",
    "    for s in split(v.vars):\n",
    "        var = s.str\n",
    "        if (var.upper() == var and \n",
    "            not isint(var[0]) and \n",
    "            len(var)>2 and\n",
    "            not var[0]+var[-1] == '__' and\n",
    "            var in lookset):\n",
    "            \n",
    "            if var in lookset:\n",
    "                lidx = v.orig.rfind('\\n', 0, s.i)\n",
    "                if lidx == -1:\n",
    "                    lidx = 0\n",
    "            #print('\\n***********************************************')\n",
    "            #print(('\\n'.join(v.orig[:s.i][::-1].split('\\n')[:5]))[::-1]+\n",
    "            #      '|*-*-*-*-*-> '\n",
    "            #       +'\\n'.join(v.orig[s.i:].split('\\n')[:4]))\n",
    "    break\n",
    "                    \n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 305,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dirichlet.test.cc\n",
      "discretetable.test.cc\n",
      "sigmapoints.test.cc\n",
      "lbu_cg.test.cc\n",
      "conditionaldirichlet.test.cc\n",
      "clustergraph.test.cc\n",
      "conditionalpolya.test.cc\n",
      "bronkerbosch.test.cc\n",
      "lbu2_cg.test.cc\n",
      "lbp_cg.test.cc\n",
      "polya.test.cc\n",
      "normedgausscanonical.test.cc\n",
      "sqrtmvg.test.cc\n",
      "gausscanonical.test.cc\n"
     ]
    }
   ],
   "source": [
    "from IPython.display import clear_output\n",
    "\n",
    "mcroset = set()\n",
    "lookset = set(('FAILED',\n",
    "               'THROW'))\n",
    "import shutil\n",
    "import tempfile\n",
    "srcloc = '../src/'\n",
    "tmploc = tempfile.gettempdir()+'/tmpreplacetests'    \n",
    "os.makedirs(tmploc, exist_ok=True)\n",
    "\n",
    "files = next(os.walk(srcloc))[2]\n",
    "for f in files:\n",
    "    if not f.endswith('.test.cc'):\n",
    "        continue\n",
    "        \n",
    "    for i in range(3):\n",
    "        try:\n",
    "            txt = open(tmploc+'/'+f, 'r').read()\n",
    "            break\n",
    "        except:\n",
    "            shutil.copyfile(\n",
    "                srcloc+'/'+f,\n",
    "                tmploc+'/'+f,\n",
    "            )\n",
    "    \n",
    "    print(f)\n",
    "    if '//donegtest' in open(srcloc+'/'+f).read(): \n",
    "        continue\n",
    "        \n",
    "    \n",
    "    v = txt_views(txt)\n",
    "    vlines = view_lnsplit(v)\n",
    "    \n",
    "    outlines = list(vlines.orig)\n",
    "    \n",
    "    def get_testif():\n",
    "        i = -1\n",
    "        while(i <len(vlines.orig)-1):\n",
    "            i+=1\n",
    "\n",
    "            if ('if(' in remove_whitespace(vlines.clean[i]) and \n",
    "                not 'if(true)' in remove_whitespace(vlines.clean[i])):\n",
    "                \n",
    "                for j in range(i,len(vlines.orig)):\n",
    "                    if '}' in vlines.clean[j]:\n",
    "\n",
    "                        ctxt = '\\n'.join(vlines.clean[i:j+1])\n",
    "                        otxt = '\\n'.join(vlines.orig[i:j+1])\n",
    "                        if 'LOGERROR' in ctxt or 'THROW' in ctxt:\n",
    "                            yield i,j+1, ctxt, otxt\n",
    "                        i = j\n",
    "                        break #--,\n",
    "            #<-------------------' \n",
    "            \n",
    "    def get_testing_mode():\n",
    "        i = -1\n",
    "        while(i <len(vlines.orig)-1):\n",
    "            i+=1\n",
    "\n",
    "            if ('#ifdefTESTING_MODE' in remove_whitespace(vlines.clean[i])):\n",
    "                \n",
    "                for j in range(i,len(vlines.orig)):\n",
    "                    if '#endif' in remove_whitespace(vlines.clean[j]):\n",
    "\n",
    "                        ctxt = '\\n'.join(vlines.clean[i:j+1])\n",
    "                        otxt = '\\n'.join(vlines.orig[i:j+1])\n",
    "                        \n",
    "                        yield i,j+1, ctxt, otxt\n",
    "                        \n",
    "                        i = j\n",
    "                        break #--,\n",
    "            #<-------------------' \n",
    "            \n",
    "        \n",
    "    def get_class_headers():\n",
    "        i = -1\n",
    "        while i <len(vlines.orig)-1:\n",
    "            i+=1\n",
    "\n",
    "            cline = vlines.clean[i]\n",
    "\n",
    "            if('class' in cline and\n",
    "               'Test' in cline and\n",
    "               'public' in cline):\n",
    "\n",
    "                if cline.split('class')[1].strip().startswith('Test'):\n",
    "                    testsuite = vlines.vars[i].split()[1].replace('Test','')\n",
    "                    testname  = vlines.vars[i].split()[3].replace('Test','')\n",
    "\n",
    "                    for j in range(i+1, len(vlines.orig)):\n",
    "                        if 'virtualvoidrunTests()' in remove_whitespace(vlines.clean[j]):\n",
    "\n",
    "                            yield i, j+1, testsuite, testname\n",
    "\n",
    "                            i=j\n",
    "                            break #--,\n",
    "           #<------------------------' \n",
    "        \n",
    "    def get_class_setups():\n",
    "        i = -1\n",
    "        while i <len(vlines.orig)-1:\n",
    "            i+=1\n",
    "\n",
    "            cline = vlines.clean[i]\n",
    "\n",
    "            if('class' in cline and\n",
    "               'Test' in cline and\n",
    "               'public' in cline):\n",
    "\n",
    "                assert cline.split('class')[0] == '', \"Why isn't the class at the beginning of the file?\"\n",
    "                \n",
    "                if not cline.split('class')[1].strip().startswith('Test'):\n",
    "                    for j in range(i+1, len(vlines.orig)):\n",
    "                        if vlines.clean[j].startswith('}'):\n",
    "\n",
    "                            yield i, j+1\n",
    "\n",
    "                            break #--,\n",
    "           #<------------------------' \n",
    "        \n",
    "    for i,j in get_class_setups():\n",
    "        print('\\n*******************************')\n",
    "        print('REMOVING ------->')\n",
    "        \n",
    "        print('\\n'.join(vlines.orig[i:j]))\n",
    "        for idx in range(i,j):\n",
    "            outlines[idx] = None\n",
    "\n",
    "    \n",
    "\n",
    "    for i, j, ctxt, otxt in get_testif():\n",
    "        print('*************************')\n",
    "        print('REPLACING ------->')\n",
    "        print(otxt)\n",
    "        print()\n",
    "        expassrt = 'ASSERT_' if 'THROW' in ctxt else 'EXPECT_'\n",
    "        \n",
    "        message = ''\n",
    "        for line in otxt.split('\\n'):\n",
    "            if ('LOGERROR' in line) and ('expected' not in line.lower()) and 'failed' in line.lower():\n",
    "                \n",
    "                message = line.split('Testing')[1].split('\",')[1].split(');')[0]\n",
    "                \n",
    "        \n",
    "        \n",
    "        ctxt = ctxt.replace('dif', 'DIF___')\n",
    "        ctxt = ctxt.replace('abs<double>', 'abs')\n",
    "        ctxt = ctxt.replace('<double>', '___DOUBLE___')\n",
    "        \n",
    "        \n",
    "        chck = single_spacing(ctxt.split('if')[1].split('{')[0], also_ln=True)\n",
    "        print(chck)\n",
    "        \n",
    "        if 'DIF___' in remove_whitespace(chck) and '>' in remove_whitespace(chck):\n",
    "            testline = expassrt+'LT'+ (chck.strip().replace('>', ','))\n",
    "\n",
    "            \n",
    "        elif '!almostEqual' in remove_whitespace(chck):\n",
    "            chck = chck.replace('()', '___emptybraces___')\n",
    "            lhs = chck.split('almostEqual')[1].strip()[1:]\n",
    "            lhs = ')'.join(lhs.split(')')[:-2])\n",
    "            lhs = [i.strip() for i in lhs.split(',')]\n",
    "            lhs[-1] = '1e-'+lhs[-1]\n",
    "            \n",
    "            testline = (expassrt+'NEAR( '+ ', '.join(lhs)+ ')').strip()\n",
    "            testline = testline.replace('___emptybraces___', '()')\n",
    "            \n",
    "            \n",
    "        \n",
    "        elif ('abs' and '>' in remove_whitespace(chck)):\n",
    "            chck = chck.replace('->', \"___VALUEAT____\")\n",
    "            chck = chck.strip()[1:-1]\n",
    "            \n",
    "            if 'std' in chck:\n",
    "                lhs, rhs = chck.replace('std::abs', '').split('>')\n",
    "            else:\n",
    "                lhs, rhs = chck.replace('abs', '').split('>')\n",
    "                \n",
    "            lhs = lhs.strip()\n",
    "            if lhs[0] == '(' and lhs[-1] == ')':\n",
    "                lhs = lhs[1:-1]\n",
    "            \n",
    "            if '-' in lhs:\n",
    "                lhs = ', '.join(lhs.split('-'))\n",
    "            else:\n",
    "                lhs = lhs+', 0'\n",
    "\n",
    "            testline = (expassrt+'NEAR('+ lhs+','+rhs+')').strip()\n",
    "            testline = testline.replace('___VALUEAT____', \"->\")\n",
    "            testline = testline.replace('___emptybraces___', '()')\n",
    "            \n",
    "            \n",
    "        elif '!=' in remove_whitespace(chck):\n",
    "            testline = expassrt+'EQ'+ (chck.strip().replace('!=', ','))\n",
    "            \n",
    "        elif '==' in remove_whitespace(chck):\n",
    "            testline = expassrt+'NE'+ (chck.strip().replace('==', ','))\n",
    "            \n",
    "        elif '->isEqual' in remove_whitespace(chck) and '(!' in remove_whitespace(chck):\n",
    "            chck = chck.split('!')\n",
    "            chck = (''.join(chck[0:2])) + ('!'.join(chck[2:]))\n",
    "            testline = expassrt+'TRUE'+ (chck.strip())\n",
    "            \n",
    "        elif '>' in remove_whitespace(chck):\n",
    "            testline = expassrt+'LT'+ (chck.strip().replace('>', ','))\n",
    "        \n",
    "        elif '<' in remove_whitespace(chck):\n",
    "            testline = expassrt+'GT'+ (chck.strip().replace('<', ','))\n",
    "            \n",
    "        testline = testline.replace('DIF___', 'dif')\n",
    "        testline = testline.replace('___DOUBLE___', '<double>')\n",
    "        testline = (' '*(vlines.clean[i].find('if'))+ testline + \" << \"+ message +';')\n",
    "        print('WITH ------------>')\n",
    "        print(testline)\n",
    "        \n",
    "        outlines[i] = testline\n",
    "        for idx in range(i+1,j):\n",
    "            outlines[idx] = None\n",
    "\n",
    "    for i, cline in enumerate(vlines.clean):\n",
    "        if 'REGISTER_TESTCASE' in cline or 'LOGDEBUG1' in cline:\n",
    "            print('\\n*******************************')\n",
    "            print('REMOVING ------->')\n",
    "            print(vlines.orig[i])\n",
    "            outlines[i] = None\n",
    "    \n",
    "    for i, j, testsuite, testname in get_class_headers():\n",
    "        term = ''\n",
    "        if vlines.clean[i].endswith('{'):\n",
    "            term = '{'\n",
    "        \n",
    "        outlines[i] = 'TEST(%s, %s)'%(testsuite, testname)+term+'{'\n",
    "        for idx in range(i+1,j):\n",
    "            outlines[idx] = None\n",
    "            \n",
    "    for i, ncline in enumerate(vlines.nocomments):\n",
    "        if '#include\"testing.hpp\"' in remove_whitespace(ncline):\n",
    "            print('\\n*******************************')\n",
    "            print('REPLACING ------->')\n",
    "            print(vlines.orig[i])\n",
    "            print('WITH ------->')\n",
    "            outlines[i] = '#include \"gtest/gtest.h\" //donegtest'\n",
    "            \n",
    "            print(outlines[i])\n",
    "        \n",
    "    for i, j, _, _ in get_testing_mode():\n",
    "        print('\\n*******************************')\n",
    "        print('REMOVING ------->')\n",
    "        print('\\n'.join(vlines.orig[i:j]))\n",
    "        for idx in range(i,j):\n",
    "            outlines[idx] = None\n",
    "        \n",
    "    #clear_output()\n",
    "        \n",
    "    output = '\\n'.join([i for i in outlines if i is not None])\n",
    "    #open(srcloc+'/'+f, 'w').write(output)\n",
    "    #print(output)\n",
    "\n",
    "    break"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {},
   "outputs": [],
   "source": [
    "'''\n",
    "fpth = '../src/discretetable.test.cc'\n",
    "lines = open(fpth, 'r').read().split('\\n')\n",
    "lines_out = list(lines)\n",
    "\n",
    "for i, line in enumerate(lines):\n",
    "    if \"if(*\" in remove_whitespace(line) and '!=' in remove_whitespace(line):\n",
    "        print('*******')\n",
    "        sblines = []\n",
    "        j = -1\n",
    "        while not '}//if' in remove_whitespace(lines[i+j]):\n",
    "            j+=1\n",
    "            if 'Expect' in lines[i+j]:\n",
    "                continue\n",
    "            sblines.append(lines[i+j])\n",
    "        \n",
    "        frm = '\\n'.join(sblines)\n",
    "        to = ('\\n'.join(sblines).replace('if', 'EXPECT_EQ')\n",
    "                                .replace(' !=', ',')\n",
    "                                .replace('Testing\",', 'bobbob<<')\n",
    "                                .replace(');', ';blabla').split('blabla')[0]\n",
    "             )\n",
    "        to = to.split('{')[0]+to.split('bobbob')[1]\n",
    "        \n",
    "        lines_out[i] = to\n",
    "        for k in range(i+1,i+j+1):\n",
    "            lines_out[k] = None\n",
    "        print(to)\n",
    "        \n",
    "        \n",
    "str_out = '\\n'.join([i for i in lines_out if i is not None])\n",
    "lines = open(fpth, 'w').write(str_out)\n",
    "''';"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 196,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "sqrtmvg.cc\n",
      "gausscanonical.cc\n",
      "normedgausscanonical.cc\n"
     ]
    }
   ],
   "source": [
    "def readfile(fname):\n",
    "    try:\n",
    "        return open(fname).read()\n",
    "    except:\n",
    "        return open(fname, encoding ='ISO-8859-1').read()\n",
    "        \n",
    "\n",
    "import os\n",
    "for dir_, dirs, files in os.walk('../src/'):\n",
    "    for file in files:\n",
    "        if file.split('.')[-1] not in ('cc', 'hpp', 'tcc'):\n",
    "            continue\n",
    "        if file.endswith('ipynb'): continue\n",
    "        if os.path.split(dir_)[-1].startswith('.'): continue\n",
    "            \n",
    "        \n",
    "        txt = readfile(dir_+'/'+file)\n",
    "        v = txt_views(txt)\n",
    "        vlines = view_lnsplit(v)\n",
    "        \n",
    "        fail = False\n",
    "        for s in split(v.vars):\n",
    "            if s.str == \"FAIL\":\n",
    "                txt = txt[0:s.i] + '%`#]' + txt[s.j:]\n",
    "                fail = True\n",
    "            \n",
    "        if fail:\n",
    "            txt = txt.replace('%`#]', 'FAIL_PATREC')\n",
    "            print(file)\n",
    "            open(dir_+'/'+file, 'w').write(txt)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "'module' object is not callable",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-53-948d99da8f11>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0minspect\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0minspect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m: 'module' object is not callable"
     ]
    }
   ],
   "source": [
    "import inspect\n",
    "(str)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
	import os

	class ddict(dict):
	def __init__(self, **kwds):
	self.update(kwds)
	self.__dict__ = self

	def to_markers(lefts,
	rights):
	return [ddict(lhs = i,
	lenl = len(i),
	rhs = j,
	lenr = len(j) ) for i,j in zip(lefts,
	rights)]

	def single_spacing(txt, also_ln=False):
	txt_old = None
	while(txt_old != txt):
	txt_old = txt

	txt = txt.replace(' ',' ')
	txt = txt.replace('\t',' ')
	if also_ln:
	txt = txt.replace('\n',' ')
	return txt

	def remove_whitespace(txt, also_ln=False):
	N = None
	while(N!=len(txt)):
	N = len(txt)

	txt = txt.replace(' ','')
	txt = txt.replace('\t','')
	if also_ln:
	txt = txt.replace('\n','')
	return txt


	def scrub_nonvarchars(txt):
	return ''.join([' ' if i not in '\n_0123456789'
	'abcdefghijklmnopqrstuvwxyz'
	'ABCDEFGHIJKLMNOPQRSTUVWXYZ' else i for i in txt])

	def scrub_all_except_newline(txt):
	return ''.join([' ' if i != '\n' else i for i in txt ])


	def scrub_comments_and_strings(txt):
	"""
	A pre-process to make scraping easier. This function turns all the comments and strings
	into empty text (spaces):
	FROM: /* // */ cout << "hello \"world\"!" << R"(bla)"; //chingching
	TO : cout << " " << R"( )";
	"""

	markers = to_markers(['/*', '"', 'R"('],
	['*/', '"', ')"' ])

	#Remove any escaped \" or \\\", but not \\" or \\\\" (uneven vs. even slashes)
	txtout = txt
	i = -1
	while i<len(txtout)-1:
	i+=1

	if txtout[i] == '\\':
	nrslashes = 0
	for j in range(i, len(txtout)):

	if txtout[j] == '\\':
	nrslashes += 1

	elif txtout[j] == '"':
	if nrslashes%2==1:
	txtout = txtout[:j-1]+' '+txtout[j+1:]
	i=j
	break
	else:
	i=j
	break


	#Match lefts with righs and clear the text inbetween
	#be aware of // comments!!!!
	i = -1
	while(i<len(txtout)-1):
	i+=1

	nxtiter = False
	for m in markers:
	if m.lhs == txtout[i:i+m.lenl]:
	#was the last seen \" farther back than the last seen //? then
	#we are in a comment, skip this event
	if txtout.rfind('//', 0, i+1) > txtout.rfind('\n', 0, i+1):
	break #--,
	#<-------------------'

	i+=m.lenl
	for j in range(i, len(txtout)):
	if m.rhs == txtout[j:j+m.lenr]:
	txtout = (txtout[:i] +
	scrub_all_except_newline(txtout[i:j]) +
	txtout[j:])

	i = j+m.lenr-1 #will ++ just now

	nxiter = True
	break #-----+
	if nxiter: #
	break #
	#<--------------------------+

	#clear the // commented text and remove the lefover /* and */ signs
	lines = txtout.split('\n')
	for i, line in enumerate(lines):
	idx = line.find('//')
	if idx >= 0:
	lines[i] = line[:idx] + ' '*(len(line)-idx)
	txtout = '\n'.join(lines)

	txtout = txtout.replace('/*', ' ')
	txtout = txtout.replace('*/', ' ')

	return txtout


	def place_back_strings(txt_scrubbed,
	txt_original):
	"""
	This function placed back the strings that was scrubbed away,
	so you end up with only the comments scrubbed.
	"""
	markers = to_markers(['/*', '"', 'R"('],
	['*/', '"', ')"' ])


	#Match lefts with righs and clear the text inbetween
	txtout = txt_scrubbed
	i = -1
	while(i<len(txtout)-1):
	i+=1

	nxtiter = False
	for m in markers:
	if m.lhs == txtout[i:i+m.lenl]:
	i = i+m.lenl
	for j in range(i+1, len(txtout)):
	if m.rhs == txtout[j:j+m.lenr]:
	txtout = (txtout[:i] +
	txt_original[i:j] +
	txtout[j:])

	i = j+m.lenr-1 #will ++ just now

	nxiter = True
	break #-----+
	if nxiter: #
	break #
	#<--------------------------+

	return txtout

	def txt_views(txt):
	v = ddict()
	v.orig = txt
	v.clean = scrub_comments_and_strings(txt)
	v.nocomments = place_back_strings(v.clean, txt)
	v.vars = scrub_nonvarchars(v.clean)

	return v

	def view_lnsplit(v):
	return ddict(**{k:v.split('\n') for k,v in v.items()})


	def isint(txt):
	try:
	int(txt)
	return True
	except: return False

	def split(txt):
	splits = []
	iskeep = False
	for i, char in enumerate(txt):
	if not iskeep and not char in (' ', '\t', '\n'):
	iskeep = True
	splits.append(ddict(i=i))
	elif iskeep and char in (' ', '\t', '\n'):
	iskeep = False
	splits[-1].j = i

	for s in splits:
	s.str = txt[s.i:s.j]

	return splits
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 205,
	"metadata": {},
	"outputs": [],
	"source": []
	},
	{
	"cell_type": "code",
	"execution_count": 103,
	"metadata": {},
	"outputs": [],
	"source": [
	"mcroset = set()\n",
	"lookset = set(('FAILED',\n",
	" 'THROW'))\n",
	"\n",
	"files = ['../src/'+f for f in next(os.walk('../src/'))[2]]\n",
	"for f in files:\n",
	" if not f.endswith('.test.cc'):\n",
	" continue\n",
	" \n",
	" txt = open(f, 'r').read()\n",
	" if '//donegtest' in txt: \n",
	" continue\n",
	" \n",
	" v = txt_views(txt)\n",
	" \n",
	" for s in split(v.vars):\n",
	" var = s.str\n",
	" if (var.upper() == var and \n",
	" not isint(var[0]) and \n",
	" len(var)>2 and\n",
	" not var[0]+var[-1] == '__' and\n",
	" var in lookset):\n",
	" \n",
	" if var in lookset:\n",
	" lidx = v.orig.rfind('\\n', 0, s.i)\n",
	" if lidx == -1:\n",
	" lidx = 0\n",
	" #print('\\n***********************************************')\n",
	" #print(('\\n'.join(v.orig[:s.i][::-1].split('\\n')[:5]))[::-1]+\n",
	" # '\|----*-> '\n",
	" # +'\\n'.join(v.orig[s.i:].split('\\n')[:4]))\n",
	" break\n",
	" \n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 305,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"dirichlet.test.cc\n",
	"discretetable.test.cc\n",
	"sigmapoints.test.cc\n",
	"lbu_cg.test.cc\n",
	"conditionaldirichlet.test.cc\n",
	"clustergraph.test.cc\n",
	"conditionalpolya.test.cc\n",
	"bronkerbosch.test.cc\n",
	"lbu2_cg.test.cc\n",
	"lbp_cg.test.cc\n",
	"polya.test.cc\n",
	"normedgausscanonical.test.cc\n",
	"sqrtmvg.test.cc\n",
	"gausscanonical.test.cc\n"
	]
	}
	],
	"source": [
	"from IPython.display import clear_output\n",
	"\n",
	"mcroset = set()\n",
	"lookset = set(('FAILED',\n",
	" 'THROW'))\n",
	"import shutil\n",
	"import tempfile\n",
	"srcloc = '../src/'\n",
	"tmploc = tempfile.gettempdir()+'/tmpreplacetests' \n",
	"os.makedirs(tmploc, exist_ok=True)\n",
	"\n",
	"files = next(os.walk(srcloc))[2]\n",
	"for f in files:\n",
	" if not f.endswith('.test.cc'):\n",
	" continue\n",
	" \n",
	" for i in range(3):\n",
	" try:\n",
	" txt = open(tmploc+'/'+f, 'r').read()\n",
	" break\n",
	" except:\n",
	" shutil.copyfile(\n",
	" srcloc+'/'+f,\n",
	" tmploc+'/'+f,\n",
	" )\n",
	" \n",
	" print(f)\n",
	" if '//donegtest' in open(srcloc+'/'+f).read(): \n",
	" continue\n",
	" \n",
	" \n",
	" v = txt_views(txt)\n",
	" vlines = view_lnsplit(v)\n",
	" \n",
	" outlines = list(vlines.orig)\n",
	" \n",
	" def get_testif():\n",
	" i = -1\n",
	" while(i <len(vlines.orig)-1):\n",
	" i+=1\n",
	"\n",
	" if ('if(' in remove_whitespace(vlines.clean[i]) and \n",
	" not 'if(true)' in remove_whitespace(vlines.clean[i])):\n",
	" \n",
	" for j in range(i,len(vlines.orig)):\n",
	" if '}' in vlines.clean[j]:\n",
	"\n",
	" ctxt = '\\n'.join(vlines.clean[i:j+1])\n",
	" otxt = '\\n'.join(vlines.orig[i:j+1])\n",
	" if 'LOGERROR' in ctxt or 'THROW' in ctxt:\n",
	" yield i,j+1, ctxt, otxt\n",
	" i = j\n",
	" break #--,\n",
	" #<-------------------' \n",
	" \n",
	" def get_testing_mode():\n",
	" i = -1\n",
	" while(i <len(vlines.orig)-1):\n",
	" i+=1\n",
	"\n",
	" if ('#ifdefTESTING_MODE' in remove_whitespace(vlines.clean[i])):\n",
	" \n",
	" for j in range(i,len(vlines.orig)):\n",
	" if '#endif' in remove_whitespace(vlines.clean[j]):\n",
	"\n",
	" ctxt = '\\n'.join(vlines.clean[i:j+1])\n",
	" otxt = '\\n'.join(vlines.orig[i:j+1])\n",
	" \n",
	" yield i,j+1, ctxt, otxt\n",
	" \n",
	" i = j\n",
	" break #--,\n",
	" #<-------------------' \n",
	" \n",
	" \n",
	" def get_class_headers():\n",
	" i = -1\n",
	" while i <len(vlines.orig)-1:\n",
	" i+=1\n",
	"\n",
	" cline = vlines.clean[i]\n",
	"\n",
	" if('class' in cline and\n",
	" 'Test' in cline and\n",
	" 'public' in cline):\n",
	"\n",
	" if cline.split('class')[1].strip().startswith('Test'):\n",
	" testsuite = vlines.vars[i].split()[1].replace('Test','')\n",
	" testname = vlines.vars[i].split()[3].replace('Test','')\n",
	"\n",
	" for j in range(i+1, len(vlines.orig)):\n",
	" if 'virtualvoidrunTests()' in remove_whitespace(vlines.clean[j]):\n",
	"\n",
	" yield i, j+1, testsuite, testname\n",
	"\n",
	" i=j\n",
	" break #--,\n",
	" #<------------------------' \n",
	" \n",
	" def get_class_setups():\n",
	" i = -1\n",
	" while i <len(vlines.orig)-1:\n",
	" i+=1\n",
	"\n",
	" cline = vlines.clean[i]\n",
	"\n",
	" if('class' in cline and\n",
	" 'Test' in cline and\n",
	" 'public' in cline):\n",
	"\n",
	" assert cline.split('class')[0] == '', \"Why isn't the class at the beginning of the file?\"\n",
	" \n",
	" if not cline.split('class')[1].strip().startswith('Test'):\n",
	" for j in range(i+1, len(vlines.orig)):\n",
	" if vlines.clean[j].startswith('}'):\n",
	"\n",
	" yield i, j+1\n",
	"\n",
	" break #--,\n",
	" #<------------------------' \n",
	" \n",
	" for i,j in get_class_setups():\n",
	" print('\\n*******************************')\n",
	" print('REMOVING ------->')\n",
	" \n",
	" print('\\n'.join(vlines.orig[i:j]))\n",
	" for idx in range(i,j):\n",
	" outlines[idx] = None\n",
	"\n",
	" \n",
	"\n",
	" for i, j, ctxt, otxt in get_testif():\n",
	" print('*************************')\n",
	" print('REPLACING ------->')\n",
	" print(otxt)\n",
	" print()\n",
	" expassrt = 'ASSERT_' if 'THROW' in ctxt else 'EXPECT_'\n",
	" \n",
	" message = ''\n",
	" for line in otxt.split('\\n'):\n",
	" if ('LOGERROR' in line) and ('expected' not in line.lower()) and 'failed' in line.lower():\n",
	" \n",
	" message = line.split('Testing')[1].split('\",')[1].split(');')[0]\n",
	" \n",
	" \n",
	" \n",
	" ctxt = ctxt.replace('dif', 'DIF___')\n",
	" ctxt = ctxt.replace('abs<double>', 'abs')\n",
	" ctxt = ctxt.replace('<double>', '___DOUBLE___')\n",
	" \n",
	" \n",
	" chck = single_spacing(ctxt.split('if')[1].split('{')[0], also_ln=True)\n",
	" print(chck)\n",
	" \n",
	" if 'DIF___' in remove_whitespace(chck) and '>' in remove_whitespace(chck):\n",
	" testline = expassrt+'LT'+ (chck.strip().replace('>', ','))\n",
	"\n",
	" \n",
	" elif '!almostEqual' in remove_whitespace(chck):\n",
	" chck = chck.replace('()', '___emptybraces___')\n",
	" lhs = chck.split('almostEqual')[1].strip()[1:]\n",
	" lhs = ')'.join(lhs.split(')')[:-2])\n",
	" lhs = [i.strip() for i in lhs.split(',')]\n",
	" lhs[-1] = '1e-'+lhs[-1]\n",
	" \n",
	" testline = (expassrt+'NEAR( '+ ', '.join(lhs)+ ')').strip()\n",
	" testline = testline.replace('___emptybraces___', '()')\n",
	" \n",
	" \n",
	" \n",
	" elif ('abs' and '>' in remove_whitespace(chck)):\n",
	" chck = chck.replace('->', \"___VALUEAT____\")\n",
	" chck = chck.strip()[1:-1]\n",
	" \n",
	" if 'std' in chck:\n",
	" lhs, rhs = chck.replace('std::abs', '').split('>')\n",
	" else:\n",
	" lhs, rhs = chck.replace('abs', '').split('>')\n",
	" \n",
	" lhs = lhs.strip()\n",
	" if lhs[0] == '(' and lhs[-1] == ')':\n",
	" lhs = lhs[1:-1]\n",
	" \n",
	" if '-' in lhs:\n",
	" lhs = ', '.join(lhs.split('-'))\n",
	" else:\n",
	" lhs = lhs+', 0'\n",
	"\n",
	" testline = (expassrt+'NEAR('+ lhs+','+rhs+')').strip()\n",
	" testline = testline.replace('___VALUEAT____', \"->\")\n",
	" testline = testline.replace('___emptybraces___', '()')\n",
	" \n",
	" \n",
	" elif '!=' in remove_whitespace(chck):\n",
	" testline = expassrt+'EQ'+ (chck.strip().replace('!=', ','))\n",
	" \n",
	" elif '==' in remove_whitespace(chck):\n",
	" testline = expassrt+'NE'+ (chck.strip().replace('==', ','))\n",
	" \n",
	" elif '->isEqual' in remove_whitespace(chck) and '(!' in remove_whitespace(chck):\n",
	" chck = chck.split('!')\n",
	" chck = (''.join(chck[0:2])) + ('!'.join(chck[2:]))\n",
	" testline = expassrt+'TRUE'+ (chck.strip())\n",
	" \n",
	" elif '>' in remove_whitespace(chck):\n",
	" testline = expassrt+'LT'+ (chck.strip().replace('>', ','))\n",
	" \n",
	" elif '<' in remove_whitespace(chck):\n",
	" testline = expassrt+'GT'+ (chck.strip().replace('<', ','))\n",
	" \n",
	" testline = testline.replace('DIF___', 'dif')\n",
	" testline = testline.replace('___DOUBLE___', '<double>')\n",
	" testline = (' '*(vlines.clean[i].find('if'))+ testline + \" << \"+ message +';')\n",
	" print('WITH ------------>')\n",
	" print(testline)\n",
	" \n",
	" outlines[i] = testline\n",
	" for idx in range(i+1,j):\n",
	" outlines[idx] = None\n",
	"\n",
	" for i, cline in enumerate(vlines.clean):\n",
	" if 'REGISTER_TESTCASE' in cline or 'LOGDEBUG1' in cline:\n",
	" print('\\n*******************************')\n",
	" print('REMOVING ------->')\n",
	" print(vlines.orig[i])\n",
	" outlines[i] = None\n",
	" \n",
	" for i, j, testsuite, testname in get_class_headers():\n",
	" term = ''\n",
	" if vlines.clean[i].endswith('{'):\n",
	" term = '{'\n",
	" \n",
	" outlines[i] = 'TEST(%s, %s)'%(testsuite, testname)+term+'{'\n",
	" for idx in range(i+1,j):\n",
	" outlines[idx] = None\n",
	" \n",
	" for i, ncline in enumerate(vlines.nocomments):\n",
	" if '#include\"testing.hpp\"' in remove_whitespace(ncline):\n",
	" print('\\n*******************************')\n",
	" print('REPLACING ------->')\n",
	" print(vlines.orig[i])\n",
	" print('WITH ------->')\n",
	" outlines[i] = '#include \"gtest/gtest.h\" //donegtest'\n",
	" \n",
	" print(outlines[i])\n",
	" \n",
	" for i, j, _, _ in get_testing_mode():\n",
	" print('\\n*******************************')\n",
	" print('REMOVING ------->')\n",
	" print('\\n'.join(vlines.orig[i:j]))\n",
	" for idx in range(i,j):\n",
	" outlines[idx] = None\n",
	" \n",
	" #clear_output()\n",
	" \n",
	" output = '\\n'.join([i for i in outlines if i is not None])\n",
	" #open(srcloc+'/'+f, 'w').write(output)\n",
	" #print(output)\n",
	"\n",
	" break"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 49,
	"metadata": {},
	"outputs": [],
	"source": [
	"'''\n",
	"fpth = '../src/discretetable.test.cc'\n",
	"lines = open(fpth, 'r').read().split('\\n')\n",
	"lines_out = list(lines)\n",
	"\n",
	"for i, line in enumerate(lines):\n",
	" if \"if(*\" in remove_whitespace(line) and '!=' in remove_whitespace(line):\n",
	" print('*******')\n",
	" sblines = []\n",
	" j = -1\n",
	" while not '}//if' in remove_whitespace(lines[i+j]):\n",
	" j+=1\n",
	" if 'Expect' in lines[i+j]:\n",
	" continue\n",
	" sblines.append(lines[i+j])\n",
	" \n",
	" frm = '\\n'.join(sblines)\n",
	" to = ('\\n'.join(sblines).replace('if', 'EXPECT_EQ')\n",
	" .replace(' !=', ',')\n",
	" .replace('Testing\",', 'bobbob<<')\n",
	" .replace(');', ';blabla').split('blabla')[0]\n",
	" )\n",
	" to = to.split('{')[0]+to.split('bobbob')[1]\n",
	" \n",
	" lines_out[i] = to\n",
	" for k in range(i+1,i+j+1):\n",
	" lines_out[k] = None\n",
	" print(to)\n",
	" \n",
	" \n",
	"str_out = '\\n'.join([i for i in lines_out if i is not None])\n",
	"lines = open(fpth, 'w').write(str_out)\n",
	"''';"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 196,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"sqrtmvg.cc\n",
	"gausscanonical.cc\n",
	"normedgausscanonical.cc\n"
	]
	}
	],
	"source": [
	"def readfile(fname):\n",
	" try:\n",
	" return open(fname).read()\n",
	" except:\n",
	" return open(fname, encoding ='ISO-8859-1').read()\n",
	" \n",
	"\n",
	"import os\n",
	"for dir_, dirs, files in os.walk('../src/'):\n",
	" for file in files:\n",
	" if file.split('.')[-1] not in ('cc', 'hpp', 'tcc'):\n",
	" continue\n",
	" if file.endswith('ipynb'): continue\n",
	" if os.path.split(dir_)[-1].startswith('.'): continue\n",
	" \n",
	" \n",
	" txt = readfile(dir_+'/'+file)\n",
	" v = txt_views(txt)\n",
	" vlines = view_lnsplit(v)\n",
	" \n",
	" fail = False\n",
	" for s in split(v.vars):\n",
	" if s.str == \"FAIL\":\n",
	" txt = txt[0:s.i] + '%`#]' + txt[s.j:]\n",
	" fail = True\n",
	" \n",
	" if fail:\n",
	" txt = txt.replace('%`#]', 'FAIL_PATREC')\n",
	" print(file)\n",
	" open(dir_+'/'+file, 'w').write(txt)\n"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 53,
	"metadata": {},
	"outputs": [
	{
	"ename": "TypeError",
	"evalue": "'module' object is not callable",
	"output_type": "error",
	"traceback": [
	"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
	"\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
	"\u001b[0;32m<ipython-input-53-948d99da8f11>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0minspect\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0minspect\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
	"\u001b[0;31mTypeError\u001b[0m: 'module' object is not callable"
	]
	}
	],
	"source": [
	"import inspect\n",
	"(str)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": []
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.7.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}