Last active
May 29, 2017 10:18
-
-
Save luannguyenkhoa/84d096a14df4674f792536b5d59fe601 to your computer and use it in GitHub Desktop.
Get all properly formatted urls from a text by combining NSDataDetector vs NSRegularExpression
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
struct Regex { | |
static let urlRegex: NSRegularExpression? = { | |
do { | |
return try NSRegularExpression(pattern: "(http(s)?:\\/\\/)?(www\\.)?[-a-zA-Z0-9\\@:%_\\+~#=]{2,256}\\.[a-z]{2,6}\\b([-a-zA-Z0-9\\@:%_\\+.~#?&//=]*)", options: .caseInsensitive) | |
} catch { | |
return nil | |
} | |
}() | |
static let urlDetector: NSDataDetector? = { | |
do { | |
return try NSDataDetector(types: NSTextCheckingResult.CheckingType.link.rawValue) | |
} catch let err { | |
print(err) | |
return nil | |
} | |
}() | |
} | |
@discardableResult static func regexUrl(text: String) -> [NSRange] { | |
let textLength = (text as NSString).length | |
var matches = [NSRange]() | |
/// Firstly, detecting all matches by NSDataDetector that will detect maybe properly formmated urls the most | |
Regex.urlDetector?.matches(in: text, options: [], range: NSRange(location: 0, length: textLength)).forEach({ (res) in | |
let sub = (text as NSString).substring(with: res.range) | |
let components = sub.components(separatedBy: ".") | |
/// Exceed this improperly url (www.google) | |
if let urlPrefix = components.first?.lowercased(), (urlPrefix.contains("www") || urlPrefix[urlPrefix.index(urlPrefix.endIndex, offsetBy: -1)] == "/") && components.count < 3 { | |
print("ignored: ", sub) | |
} else if validateUrl(url: sub) { | |
/// Additional, there are some improperly urls that is acceptable by NSDataDetector, so we need to re-validate with NSRegularExpression | |
/// to ensure that it's actually properly. | |
matches.append(res.range) | |
print(sub) | |
} | |
}) | |
return matches | |
} | |
static func validateUrl(url: String) -> Bool { | |
if let regex = Regex.urlRegex { | |
return regex.firstMatch(in: url, options: [], range: NSRange(location: 0, length: (url as NSString).length)) != nil | |
} | |
return false | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment