Skip to content

Instantly share code, notes, and snippets.

@yoki
Last active December 10, 2022 05:17
Show Gist options
  • Save yoki/e6911902d0fd410806ac72af96357182 to your computer and use it in GitHub Desktop.
Save yoki/e6911902d0fd410806ac72af96357182 to your computer and use it in GitHub Desktop.
Python String
s.index(s2, i, j) #Index of first occurrence of s2 in s after index i and before index j
s.find(s2) #Find and return lowest index of s2 in s
s.index(s2) #Return lowest index of s2 in s (but raise ValueError if not found)
s.replace(s2, s3) #Replace s2 with s3 in s
s.replace(s2, s3, count) #Replace s2 with s3 in s at most count times
s.rfind(s2) #Return highest index of s2 in s
s.rindex(s2) #Return highest index of s2 in s (raise ValueError if not found)
#===================================================
#Regexp
#===================================================
#-----------------------
## match
#-----------------------
str = 'an example word:cat!!'
match = re.search(r'word:(\w\w\w)', str)
if match:
print('found', match.group()) ## 'found word:cat'
print('matched part is ', match.group(1)) ## 'matched part is cat'
else:
print('did not find')
#-----------------------
## search location
#-----------------------
match = re.search(r'iii', 'piiig') #=> found, match.group() == "iii"
match = re.search(r'igs', 'piiig') #=> not found, match == None
#-----------------------
# Find all
#-----------------------
## Suppose we have a text with many email addresses
str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'
## Here re.findall() returns a list of all the found email strings
emails = re.findall(r'[\w\.-]+@[\w\.-]+', str) ## ['alice@google.com', 'bob@abc.com']
for email in emails:
# do something with each found email string
print email
#-----------------------
# replace
#-----------------------
str = 'purple alice@google.com, blah monkey bob@abc.com blah dishwasher'
## re.sub(pat, replacement, str) -- returns new string with all replacements,
## \1 is group(1), \2 group(2) in the replacement
print re.sub(r'([\w\.-]+)@([\w\.-]+)', r'\1@yo-yo-dyne.com', str)
## purple alice@yo-yo-dyne.com, blah monkey bob@yo-yo-dyne.com blah dishwasher
#############
# concat
############
# Array Concat
parts = ['Is', 'Chicago', 'Not', 'Chicago?']
' '.join(parts) #=> 'Is Chicago Not Chicago?'
# for print function
print(a + ':' + b + ':' + c) # Ugly
print(':'.join([a, b, c])) # Still ugly
print(a, b, c, sep=':') # Better
#############
# stripping
############
s = ' hello world \n'
s.strip() #=> 'hello world'
t = '-----hello====='
t.lstrip('-') #=> 'hello====='
## Sanitizing
# http://chimera.labs.oreilly.com/books/1230000000393/ch02.html#_discussion_31
#############
# literals
#############
"""select * from yuho_text
where element_id = 'BusinessRisksTextBlock'
and date > '2015-00-00'
and doc_name = '有価証券報告書'"""
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment