This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
from flashtext.keyword import KeywordProcessor | |
import random | |
import string | |
import re | |
import time | |
def get_word_of_length(str_length): | |
# generate a random word of given length | |
return ''.join(random.choice(string.ascii_lowercase) for _ in range(str_length)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
from flashtext.keyword import KeywordProcessor | |
import random | |
import string | |
import re | |
import time | |
def get_word_of_length(str_length): | |
# generate a random word of given length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import guidedlda | |
X = guidedlda.datasets.load_data(guidedlda.datasets.NYT) | |
vocab = guidedlda.datasets.load_vocab(guidedlda.datasets.NYT) | |
word2id = dict((v, idx) for idx, v in enumerate(vocab)) | |
print(X.shape) | |
print(X.sum()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class Trie(): | |
"""Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern. | |
The corresponding Regex should match much faster than a simple Regex union.""" | |
def __init__(self): | |
self.data = {} | |
def add(self, word): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
class Trie(): | |
"""Regex::Trie in Python. Creates a Trie out of a list of words. The trie can be exported to a Regex pattern. | |
The corresponding Regex should match much faster than a simple Regex union.""" | |
def __init__(self): | |
self.data = {} | |
def add(self, word): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install flashtext | |
from flashtext.keyword import KeywordProcessor | |
keyword_processor = KeywordProcessor() | |
keyword_processor.add_keyword('Big Apple', 'New York') | |
keyword_processor.add_keyword('Bay Area') | |
keywords_found = keyword_processor.extract_keywords('I love Big Apple and Bay Area.') | |
keywords_found | |
# ['New York', 'Bay Area'] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
from flashtext.keyword import KeywordProcessor | |
import random | |
import string | |
import re | |
from automaton import Automaton | |
import time | |
def get_word_of_length(str_length): | |
# generate a random word of given length |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/python | |
from flashtext.keyword import KeywordProcessor | |
import random | |
import string | |
import regex | |
import time | |
def get_word_of_length(str_length): | |
# generate a random word of given length | |
return ''.join(random.choice(string.ascii_lowercase) for _ in range(str_length)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// compare the results with FlashText here https://gist.github.com/vi3k6i5/604eefd92866d081cfa19f862224e4a0 | |
import java.util.regex.*; | |
import java.lang.StringBuilder; | |
import java.util.*; | |
public class RegexBenchmark { | |
public static String getWordOfLength(int length) { | |
String SALTCHARS = "abcdefghijklmnopqrstuvwxyz1234567890"; | |
StringBuilder salt = new StringBuilder(); |
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
NewerOlder