Skip to content

Instantly share code, notes, and snippets.

Paul Meinshausen PMeinshausen

Block or report user

Report or block PMeinshausen

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
View chiangMaiCrawler.py
from selenium import webdriver
browser = webdriver.Firefox()
# The list of book titles is in a pandas dataframe
# named 'data', in the column 'title'
for row in range(len(data)):
browser.get("http://www.amazon.com")
time.sleep(1)
View TF_IDF.sql
--Syntax
SELECT * FROM TF_IDF(
ON TF
(
ON {table|view|(query)} PARTITION BY docid
[FORMULA('bool'|'log'|'augment'|'normal')]
) AS TF PARTITION BY term
[ON (SELECT term, COUNT(distinct docid) FROM input_table
GROUP BY term) AS docperterm PARTITION BY term]
View Sentenizer.sql
--Syntax (version 1.0)
select * from Sentenizer(
on input_table
TEXTCOLUMN('text_column_name')
[ACCUMULATE('accumulate_column_names')]);
View PoSTagger.sql
--Syntax (version 1.0)
SELECT * from PosTagger(
ON input_table
TEXTCOLUMN('text_column_name')]
[ACCUMULATE('accumulate_column_names')]
);
View nGram.sql
--Syntax (version 1.3)
SELECT *
FROM nGram
(
ON {table_name | view_name | (query)}
TEXT_COLUMN('column_name')
[DELIMITER('delimiter_regular_expression')]
GRAMS(gram_number)
[OVERLAPPING({'true'|'false'})]
[CASE_INSENSITIVE({'true'|'false'})]
View NB_TextClassifier.sql
--Syntax (version 1.0)
CREATE TABLE model_table_name ( PARTITION KEY(token) ) AS
SELECT token, SUM( category_1 ) AS category_1, ... ,
SUM( category_n ) AS category_n FROM
NaiveBayesText(
ON input_table
TEXT_COLUMN( text_column )
CATEGORY_COLUMN( category_column )
CATEGORIES( category_1, ... , category_n )
View Levenshtein.sql
--Syntax (version 1.1)
--Use a SELECT statement to call the Levenshtein distance function:
SELECT *
FROM ldist
(
ON table_name
SOURCE (column1 [, column2,...])
TARGET(column1)
[THRESHOLD(value)]
View WMAVG.sql
--Syntax (version 1.1)
SELECT * FROM WMAVG(
ON {table_name|view_name|(query)}
PARTITION BY partition_column
ORDER BY order_by_column
COLUMNS('column_names')
RETURN_ALL('true'|'false')
WINDOW_SIZE('window_size')
);
View VWAP.sql
--Syntax (version 1.1)
SELECT * FROM VWAP(
ON {table_name | view_name | (query)}
PARTITION BY expression [, ...]
ORDER BY date_column
[PRICE('price_column')]
[VOLUME ('volume_column')]
[TIMEINTERVAL('number_of_seconds')]
[DT('date_column')]
);
View Sample.sql
--Syntax (version 1.0)
--Unconditional sampling, single sample rate
select * from sample(
ON ...
SAMPLEFRACTION('fraction')
[Seed('seed')]
--Unconditional sampling, total approximate sample size
select * from sample (
You can’t perform that action at this time.