Last active
September 8, 2016 20:15
-
-
Save ctufts/14e75172303dec0218ba to your computer and use it in GitHub Desktop.
Regular expression examples in R
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(?<!@|#)\b\w+ : Remove all words starting with @ or # (remove hashtags and user handles from twitter) | |
(?<!@|#)\b\w{2,} : Same as above but only keep words with length of 2 or greater |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Output of different grep commands in R #### | |
# Using a single string | |
# Is a number present? | |
example <- c("ABCD5EFG6") | |
# check if number is present | |
grep("[0-9]", example) | |
grepl("[0-9]", example) | |
# What location is the first number at | |
regexpr("[0-9]", example) | |
# Find the location of all digits | |
gregexpr("[0-9]", example) | |
# vector of strings | |
# Is a number present | |
example <- c("ABCD5EFG6","A7") | |
# check if number is present | |
grep("[0-9]", example) | |
grepl("[0-9]", example) | |
# What location is the first number at | |
regexpr("[0-9]", example) | |
# Find the location of all digits | |
gregexpr("[0-9]", example) | |
# Letters #### | |
# look for lower case letters | |
gregexpr("[a-z]", example) | |
# look for letters regardless of case | |
gregexpr("[a-z]", example, ignore.case = T) | |
# look for upper case letters | |
gregexpr("[A-Z]", example) | |
# Words #### | |
# find a word | |
example <- "two words" | |
gregexpr("\\w", example) | |
gregexpr("[0-9A-Za-z_]", example) | |
# find non-word character | |
gregexpr("\\W", example) | |
gregexpr("[^0-9A-Za-z_]", example) | |
# Spaces #### | |
# find spaces | |
gregexpr("\\s", example) | |
# find non-spaces | |
gregexpr("\\S", example) | |
# find digits #### | |
example <- "I'm looking for I95" | |
# find digits | |
gregexpr("\\d", example) | |
# find non-digits | |
gregexpr("\\D", example) | |
example <- "I95 or I676" | |
# find two consecutive digits | |
gregexpr("\\d\\d", example) | |
gregexpr("\\d{2}", example) | |
# find 3 consecutive digits | |
gregexpr("\\d\\d\\d", example) | |
gregexpr("\\d{3}", example) | |
# full stop "." #### | |
# find a then any character then e | |
example <- "What are your ages?" | |
gregexpr("a.e", example) | |
example <- "matter" | |
# no match found | |
gregexpr("a.e", example) | |
# Multipliers #### | |
example <- "cccccccc aaaaa" | |
gregexpr("c{3}", example) | |
gregexpr("c{2}", example) | |
# range multipliers | |
example <- "Chris Cris Chhris, Chhhhhris" | |
gregexpr("Ch{0,1}ris", example) | |
# ? is the same as using {0,1} | |
gregexpr("Ch?ris",example) | |
# 0 or more occurrences of h | |
gregexpr("Ch{0,}ris", example) | |
gregexpr("Ch*ris", example) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment