yoki/find_replace.py

## find_replace.py
s.index(s2, i, j) #Index of first occurrence of s2 in s after index i and before index j
s.find(s2) #Find and return lowest index of s2 in s
s.index(s2) #Return lowest index of s2 in s (but raise ValueError if not found)
s.replace(s2, s3) #Replace s2 with s3 in s
s.replace(s2, s3, count) #Replace s2 with s3 in s at most count times
s.rfind(s2) #Return highest index of s2 in s
s.rindex(s2) #Return highest index of s2 in s (raise ValueError if not found)

#===================================================
#Regexp
#===================================================

#-----------------------
## match
#-----------------------
str = 'an example word:cat!!'
match = re.search(r'word:(\w\w\w)', str)
if match:
  print('found', match.group()) ## 'found word:cat'
  print('matched part is ', match.group(1)) ## 'matched part is cat'
else:
  print('did not find')

#-----------------------
## search location
#-----------------------
match = re.search(r'iii', 'piiig') #=>  found, match.group() == "iii"
match = re.search(r'igs', 'piiig') #=>  not found, match == None


#-----------------------
# Find all
#-----------------------
## Suppose we have a text with many email addresses
str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'

## Here re.findall() returns a list of all the found email strings
emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['alice@google.com', 'bob@abc.com']
for email in emails:
  # do something with each found email string
  print email

#-----------------------
# replace
#-----------------------

str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'
## re.sub(pat, replacement, str) -- returns new string with all replacements,
## \1 is group(1), \2 group(2) in the replacement
print re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\1@yo-yo-dyne.com', str)
## purple alice@yo-yo-dyne.com, blah monkey bob@yo-yo-dyne.com blah dishwasher

## string.py
#############
# concat
############
# Array Concat
parts = ['Is', 'Chicago', 'Not', 'Chicago?']
' '.join(parts) #=> 'Is Chicago Not Chicago?'

# for print function
print(a + ':' + b + ':' + c)       # Ugly
print(':'.join([a, b, c]))         # Still ugly
print(a, b, c, sep=':')            # Better


#############
# stripping
############
s = '   hello world  \n'
s.strip() #=> 'hello world'

t = '-----hello====='
t.lstrip('-') #=> 'hello====='

## Sanitizing
# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_discussion_31


#############
# literals
#############
"""select * from yuho_text
where element_id = 'BusinessRisksTextBlock'
and date > '2015-00-00'
and doc_name = '有価証券報告書'"""
	s.index(s2, i, j) #Index of first occurrence of s2 in s after index i and before index j
	s.find(s2) #Find and return lowest index of s2 in s
	s.index(s2) #Return lowest index of s2 in s (but raise ValueError if not found)
	s.replace(s2, s3) #Replace s2 with s3 in s
	s.replace(s2, s3, count) #Replace s2 with s3 in s at most count times
	s.rfind(s2) #Return highest index of s2 in s
	s.rindex(s2) #Return highest index of s2 in s (raise ValueError if not found)

	#===================================================
	#Regexp
	#===================================================

	#-----------------------
	## match
	#-----------------------
	str = 'an example word:cat!!'
	match = re.search(r'word:(\w\w\w)', str)
	if match:
	print('found', match.group()) ## 'found word:cat'
	print('matched part is ', match.group(1)) ## 'matched part is cat'
	else:
	print('did not find')

	#-----------------------
	## search location
	#-----------------------
	match = re.search(r'iii', 'piiig') #=> found, match.group() == "iii"
	match = re.search(r'igs', 'piiig') #=> not found, match == None


	#-----------------------
	# Find all
	#-----------------------
	## Suppose we have a text with many email addresses
	str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'

	## Here re.findall() returns a list of all the found email strings
	emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['alice@google.com', 'bob@abc.com']
	for email in emails:
	# do something with each found email string
	print email

	#-----------------------
	# replace
	#-----------------------

	str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'
	## re.sub(pat, replacement, str) -- returns new string with all replacements,
	## \1 is group(1), \2 group(2) in the replacement
	print re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\1@yo-yo-dyne.com', str)
	## purple alice@yo-yo-dyne.com, blah monkey bob@yo-yo-dyne.com blah dishwasher
	#############
	# concat
	############
	# Array Concat
	parts = ['Is', 'Chicago', 'Not', 'Chicago?']
	' '.join(parts) #=> 'Is Chicago Not Chicago?'

	# for print function
	print(a + ':' + b + ':' + c) # Ugly
	print(':'.join([a, b, c])) # Still ugly
	print(a, b, c, sep=':') # Better


	#############
	# stripping
	############
	s = ' hello world \n'
	s.strip() #=> 'hello world'

	t = '-----hello====='
	t.lstrip('-') #=> 'hello====='

	## Sanitizing
	# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_discussion_31


	#############
	# literals
	#############
	"""select * from yuho_text
	where element_id = 'BusinessRisksTextBlock'
	and date > '2015-00-00'
	and doc_name = '有価証券報告書'"""