heetbeet/Untitled.ipynb

## cpp_code_helpers.py
import os

class ddict(dict):
    def __init__(self, **kwds):
        self.update(kwds)
        self.__dict__ = self

def to_markers(lefts,
               rights):
    return [ddict(lhs  = i,
                  lenl = len(i),
                  rhs  = j,
                  lenr = len(j) ) for i,j in zip(lefts,
                                                 rights)]

def single_spacing(txt, also_ln=False):
    txt_old = None
    while(txt_old != txt):
        txt_old = txt

        txt = txt.replace('  ',' ')
        txt = txt.replace('\t',' ')
        if also_ln:
            txt = txt.replace('\n',' ')
    return txt

def remove_whitespace(txt, also_ln=False):
    N = None
    while(N!=len(txt)):
        N = len(txt)

        txt = txt.replace(' ','')
        txt = txt.replace('\t','')
        if also_ln:
            txt = txt.replace('\n','')
    return txt


def scrub_nonvarchars(txt):
    return ''.join([' ' if i not in '\n_0123456789'
                                    'abcdefghijklmnopqrstuvwxyz'
                                    'ABCDEFGHIJKLMNOPQRSTUVWXYZ' else i for i in txt])

def scrub_all_except_newline(txt):
    return ''.join([' ' if i != '\n' else i for i in txt ])


def scrub_comments_and_strings(txt):
    """
    A pre-process to make scraping easier. This function turns all the comments and strings
    into empty text (spaces):
    FROM: /* // */ cout << "hello \"world\"!" << R"(bla)"; //chingching
    TO  :          cout << "                " << R"(   )";
    """

    markers = to_markers(['/*', '"', 'R"('],
                         ['*/', '"', ')"' ])

    #Remove any escaped \" or \\\", but not \\" or \\\\" (uneven vs. even slashes)
    txtout = txt
    i = -1
    while i<len(txtout)-1:
        i+=1

        if txtout[i] == '\\':
            nrslashes = 0
            for j in range(i, len(txtout)):

                if txtout[j] == '\\':
                    nrslashes += 1

                elif txtout[j] == '"':
                    if nrslashes%2==1:
                        txtout = txtout[:j-1]+'  '+txtout[j+1:]
                    i=j
                    break
                else:
                    i=j
                    break


    #Match lefts with righs and clear the text inbetween
    #be aware of // comments!!!!
    i = -1
    while(i<len(txtout)-1):
        i+=1

        nxtiter = False
        for m in markers:
            if m.lhs == txtout[i:i+m.lenl]:
                #was the last seen \" farther back than the last seen //? then
                #we are in a comment, skip this event
                if txtout.rfind('//', 0, i+1) > txtout.rfind('\n', 0, i+1):
                    break #--,
        #<-------------------'

                i+=m.lenl
                for j in range(i, len(txtout)):
                    if m.rhs == txtout[j:j+m.lenr]:
                        txtout = (txtout[:i] +
                                  scrub_all_except_newline(txtout[i:j]) +
                                  txtout[j:])

                        i = j+m.lenr-1 #will ++ just now

                        nxiter = True
                        break #-----+
                if nxiter:          #
                    break           #
        #<--------------------------+

    #clear the // commented text and remove the lefover /* and */ signs
    lines = txtout.split('\n')
    for i, line in enumerate(lines):
        idx = line.find('//')
        if idx >= 0:
            lines[i] = line[:idx] + ' '*(len(line)-idx)
    txtout = '\n'.join(lines)

    txtout = txtout.replace('/*', '  ')
    txtout = txtout.replace('*/', '  ')

    return txtout


def place_back_strings(txt_scrubbed,
                       txt_original):
    """
    This function placed back the strings that was scrubbed away,
    so you end up with only the comments scrubbed.
    """
    markers = to_markers(['/*', '"', 'R"('],
                         ['*/', '"', ')"' ])


    #Match lefts with righs and clear the text inbetween
    txtout = txt_scrubbed
    i = -1
    while(i<len(txtout)-1):
        i+=1

        nxtiter = False
        for m in markers:
            if m.lhs == txtout[i:i+m.lenl]:
                i = i+m.lenl
                for j in range(i+1, len(txtout)):
                    if m.rhs == txtout[j:j+m.lenr]:
                        txtout = (txtout[:i] +
                                  txt_original[i:j] +
                                  txtout[j:])

                        i = j+m.lenr-1 #will ++ just now

                        nxiter = True
                        break #-----+
                if nxiter:          #
                    break           #
        #<--------------------------+

    return txtout

def txt_views(txt):
    v = ddict()
    v.orig = txt
    v.clean = scrub_comments_and_strings(txt)
    v.nocomments = place_back_strings(v.clean, txt)
    v.vars = scrub_nonvarchars(v.clean)

    return v

def view_lnsplit(v):
    return ddict(**{k:v.split('\n') for k,v in v.items()})


def isint(txt):
    try:
        int(txt)
        return True
    except: return False

def split(txt):
    splits = []
    iskeep = False
    for i, char in enumerate(txt):
        if not iskeep and not char in (' ', '\t', '\n'):
            iskeep = True
            splits.append(ddict(i=i))
        elif iskeep and char in (' ', '\t', '\n'):
            iskeep = False
            splits[-1].j = i

    for s in splits:
        s.str = txt[s.i:s.j]

    return splits

## Untitled.ipynb

      
Display the source blob

    
Display the rendered blob

    
    Raw
  

              Untitled.ipynb
            
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
	import os

	class ddict(dict):
	def __init__(self, **kwds):
	self.update(kwds)
	self.__dict__ = self

	def to_markers(lefts,
	rights):
	return [ddict(lhs = i,
	lenl = len(i),
	rhs = j,
	lenr = len(j) ) for i,j in zip(lefts,
	rights)]

	def single_spacing(txt, also_ln=False):
	txt_old = None
	while(txt_old != txt):
	txt_old = txt

	txt = txt.replace(' ',' ')
	txt = txt.replace('\t',' ')
	if also_ln:
	txt = txt.replace('\n',' ')
	return txt

	def remove_whitespace(txt, also_ln=False):
	N = None
	while(N!=len(txt)):
	N = len(txt)

	txt = txt.replace(' ','')
	txt = txt.replace('\t','')
	if also_ln:
	txt = txt.replace('\n','')
	return txt


	def scrub_nonvarchars(txt):
	return ''.join([' ' if i not in '\n_0123456789'
	'abcdefghijklmnopqrstuvwxyz'
	'ABCDEFGHIJKLMNOPQRSTUVWXYZ' else i for i in txt])

	def scrub_all_except_newline(txt):
	return ''.join([' ' if i != '\n' else i for i in txt ])


	def scrub_comments_and_strings(txt):
	"""
	A pre-process to make scraping easier. This function turns all the comments and strings
	into empty text (spaces):
	FROM: /* // */ cout << "hello \"world\"!" << R"(bla)"; //chingching
	TO : cout << " " << R"( )";
	"""

	markers = to_markers(['/*', '"', 'R"('],
	['*/', '"', ')"' ])

	#Remove any escaped \" or \\\", but not \\" or \\\\" (uneven vs. even slashes)
	txtout = txt
	i = -1
	while i<len(txtout)-1:
	i+=1

	if txtout[i] == '\\':
	nrslashes = 0
	for j in range(i, len(txtout)):

	if txtout[j] == '\\':
	nrslashes += 1

	elif txtout[j] == '"':
	if nrslashes%2==1:
	txtout = txtout[:j-1]+' '+txtout[j+1:]
	i=j
	break
	else:
	i=j
	break


	#Match lefts with righs and clear the text inbetween
	#be aware of // comments!!!!
	i = -1
	while(i<len(txtout)-1):
	i+=1

	nxtiter = False
	for m in markers:
	if m.lhs == txtout[i:i+m.lenl]:
	#was the last seen \" farther back than the last seen //? then
	#we are in a comment, skip this event
	if txtout.rfind('//', 0, i+1) > txtout.rfind('\n', 0, i+1):
	break #--,
	#<-------------------'

	i+=m.lenl
	for j in range(i, len(txtout)):
	if m.rhs == txtout[j:j+m.lenr]:
	txtout = (txtout[:i] +
	scrub_all_except_newline(txtout[i:j]) +
	txtout[j:])

	i = j+m.lenr-1 #will ++ just now

	nxiter = True
	break #-----+
	if nxiter: #
	break #
	#<--------------------------+

	#clear the // commented text and remove the lefover /* and */ signs
	lines = txtout.split('\n')
	for i, line in enumerate(lines):
	idx = line.find('//')
	if idx >= 0:
	lines[i] = line[:idx] + ' '*(len(line)-idx)
	txtout = '\n'.join(lines)

	txtout = txtout.replace('/*', ' ')
	txtout = txtout.replace('*/', ' ')

	return txtout


	def place_back_strings(txt_scrubbed,
	txt_original):
	"""
	This function placed back the strings that was scrubbed away,
	so you end up with only the comments scrubbed.
	"""
	markers = to_markers(['/*', '"', 'R"('],
	['*/', '"', ')"' ])


	#Match lefts with righs and clear the text inbetween
	txtout = txt_scrubbed
	i = -1
	while(i<len(txtout)-1):
	i+=1

	nxtiter = False
	for m in markers:
	if m.lhs == txtout[i:i+m.lenl]:
	i = i+m.lenl
	for j in range(i+1, len(txtout)):
	if m.rhs == txtout[j:j+m.lenr]:
	txtout = (txtout[:i] +
	txt_original[i:j] +
	txtout[j:])

	i = j+m.lenr-1 #will ++ just now

	nxiter = True
	break #-----+
	if nxiter: #
	break #
	#<--------------------------+

	return txtout

	def txt_views(txt):
	v = ddict()
	v.orig = txt
	v.clean = scrub_comments_and_strings(txt)
	v.nocomments = place_back_strings(v.clean, txt)
	v.vars = scrub_nonvarchars(v.clean)

	return v

	def view_lnsplit(v):
	return ddict(**{k:v.split('\n') for k,v in v.items()})


	def isint(txt):
	try:
	int(txt)
	return True
	except: return False

	def split(txt):
	splits = []
	iskeep = False
	for i, char in enumerate(txt):
	if not iskeep and not char in (' ', '\t', '\n'):
	iskeep = True
	splits.append(ddict(i=i))
	elif iskeep and char in (' ', '\t', '\n'):
	iskeep = False
	splits[-1].j = i

	for s in splits:
	s.str = txt[s.i:s.j]

	return splits