Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
selected_words = [ | |
'receipt', | |
'card', | |
'refund', | |
'month', | |
'monthly', | |
'plan', | |
'profit', | |
'charged', | |
'charge', |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import graphlab | |
#remove html tags | |
docs['words'] = docs['body'].apply(lambda doc: re.sub("<[^>]*>", "", doc)) | |
#remove punctuation, whitespace and lowercase it all | |
docs['words'] = docs['words'].apply(lambda doc: re.sub("[\W\d]", " ", doc.lower().strip())) | |
docs = graphlab.SFrame(docs) | |
docs['word_counts'] = graphlab.text_analytics.count_words(docs['words']) | |
docs_tfidf = graphlab.text_analytics.tf_idf(docs['words']) | |
docs['top10'] = docs_tfidf['docs'].apply(lambda t: " ".join(sorted(t, key=t.get, reverse=True)[1:10])) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create or replace function experiment_result_p_value(control_size float, control_conversion float, experiment_size float, experiment_conversion float) | |
returns float | |
stable | |
as $$ | |
from scipy.stats import chi2_contingency | |
from numpy import array | |
observed = array([ | |
[control_size - control_conversion, control_conversion], | |
[experiment_size - experiment_conversion, experiment_conversion] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import math | |
import re | |
import pandas as pd | |
from collections import Counter | |
from sklearn.datasets import fetch_20newsgroups | |
#get a subset of the dataset | |
categories = [ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This example uses Scala. Please see the MLlib documentation for a Java example. | |
Try running this code in the Spark shell. It may produce different topics each time (since LDA includes some randomization), but it should give topics similar to those listed above. | |
This example is paired with a blog post on LDA in Spark: http://databricks.com/blog | |
Spark: http://spark.apache.org/ | |
*/ | |
import scala.collection.mutable |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import luigi | |
from buffer_redshift_etl import * | |
class ExtractActionsTakenTask(BufferRedshiftETLExtractTask): | |
def etl_name(self): | |
return 'actions-taken' | |
class TransformActionsTakenTask(BufferRedshiftETLTransformTask): | |
def etl_name(self): | |
return 'actions-taken' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
REGISTER '../udfs/jython/actions_taken.py' USING jython AS actions_taken; | |
REGISTER '../udfs/python/actions_taken.py' USING streaming_python AS actions_taken1; | |
raw = load '$OUTPUT_PATH/extract-actions-taken' | |
using PigStorage() | |
as ( | |
user_id:chararray, | |
visitor_id:chararray, | |
client_id:chararray, | |
last_modified:chararray, |