Skip to content

Instantly share code, notes, and snippets.

@cristhianleonli
Created January 8, 2019 21:08
Show Gist options
  • Save cristhianleonli/01c2961e4141a66279421fedd37e7699 to your computer and use it in GitHub Desktop.
Save cristhianleonli/01c2961e4141a66279421fedd37e7699 to your computer and use it in GitHub Desktop.
import Foundation
class EntitiesFinder {
/**
Find if in the given `string` exists any term from the `array`
- Parameter array: set of strings to find with in the string
- Parameter string: target text where to find the entities
- Returns: array of ranges with every found entity, Range => (location, length)
*/
class func findEntitiesRanges(for array: Set<String>, in string: String) -> [NSRange] {
let words = string.lowercased().split(separator: " ").map { String($0) }
var response = [EntityNode]()
var previous = EntityNode()
for (i, word) in words.enumerated() {
var node = EntityNode(word: word)
if i == 0 {
node.allContent = word
node.isCompound = false
let isExact = node.stopCharacter ? node.withoutLast : node.allContent
node.isExact = array.firstIndex(of: isExact) != nil
previous = node
continue
}
// by default, the index is the same as previous's + previous length
node.startIndex = previous.startIndex + previous.allContent.count + 1
// exists the same word in entities
let wordToFind = node.stopCharacter ? node.withoutLast : node.content
node.isExact = array.firstIndex(of: wordToFind) != nil
let temp = previous.allContent + " " + (node.stopCharacter ? node.withoutLast : node.content)
if !array.filter({ $0.starts(with: temp) }).isEmpty {
// is compound entity
node.isCompound = true
node.allContent = "\(previous.allContent) \(node.content)"
node.startIndex = previous.startIndex
previous = node
} else {
// non-compund entity
if previous.isCompound || previous.isExact {
response.append(previous)
}
node.allContent = word
previous = node
}
}
// last word
if previous.isCompound || previous.isExact {
response.append(previous)
}
return response.map { node in
let count = node.allContent.count - (node.stopCharacter ? 1 : 0)
return NSRange(location: node.startIndex, length: count)
}
}
}
import Foundation
struct EntityNode {
var content = ""
var startIndex = 0
var isCompound = false
var isExact = false
var allContent = ""
/// checks if the string contains a comma in the last position
var stopCharacter: Bool {
let range = NSRange(location: content.count - 1, length: 1)
let regex = try! NSRegularExpression(pattern: "[a-z]")
return regex.firstMatch(in: content, options: [], range: range) == nil
}
/// returns the same string but removing the last character
var withoutLast: String {
return String(content[content.startIndex..<content.index(content.startIndex, offsetBy: content.count - 1)])
}
init() {
}
init(word: String) {
self.content = word
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment