Skip to content

Instantly share code, notes, and snippets.

View gcsfred's full-sized avatar

Gustavo Frederico gcsfred

View GitHub Profile
@gcsfred
gcsfred / config.conf
Created February 3, 2020 16:18
Sample integration between Elasticsearch and Amazon Personalize - entire file
[DEFAULT]
product_ranking_start = 10
product_ranking_steps_down = 0.2
cat_ranking_start = 10
cat_ranking_steps_down = 0.2
product_recommendations_campaignArn=arn:aws:personalize:us-east-2:11123456:campaign/es-test03-hrnn
product_rankings_campaignArn=arn:aws:personalize:us-east-2:222789:campaign/es-test03-rank
property1_recommendations_campaignArn=arn:aws:personalize:us-east-2:3333456:campaign/es-test03-cat-hrnn
property1_rankings_campaignArn=arn:aws:personalize:us-east-2:444789:campaign/es-test03-cat-rank
@gcsfred
gcsfred / query_es_fragment.py
Last active February 3, 2020 16:13
Query Elasticsearch with boosts and weights given by Amazon Personalize
def query_es(text_search, category_boost_pairs, product_id_weight_pairs):
client = Elasticsearch()
the_body = {
"query": {
"function_score": {
"query": {
"bool": {
"should": arrange_json_array(
transform_category_boost(category_boost_pairs), {
"match": {
@gcsfred
gcsfred / _get_ranking_fragment.py
Created February 3, 2020 16:07
Retrieve product ranking from Amazon (fragment)
def get_product_ranking(config, user, input_list):
_log_info('Retrieving product ranking')
answer = _get_ranking(config['DEFAULT']['product_rankings_campaignArn'], user, input_list,
float(config['DEFAULT']['product_ranking_start']),
float(config['DEFAULT']['product_ranking_steps_down']))
return answer
def _get_ranking(campaign, user, input_list, start, steps_down):
@gcsfred
gcsfred / _get_recommendations_fragment.py
Created February 3, 2020 16:01
Get product recommendations from Amazon Personalize (fragment)
def get_product_recommendations(config, user):
_log_info('Retrieving product recommendations')
return _get_recommendations(user, config['DEFAULT']['product_recommendations_campaignArn'])
def _get_recommendations(user, campaign):
personalize = boto3.client('personalize-runtime', 'us-east-2')
response = personalize.get_recommendations(campaignArn=campaign, userId=user)
answer = []
_log_info('Recommended items:')
@gcsfred
gcsfred / search_with_personalization_fragment.py
Created February 3, 2020 15:49
Search with personalization (fragment)
def search_with_personalization(user, search):
config = configparser.ConfigParser()
config.read('config.conf')
categories = get_category_recommendations(config, user)
ranked_categories = get_category_ranking(config, user, categories)
products = get_product_recommendations(config, user)
ranked_products = get_product_ranking(config, user, products)
query_es(search, ranked_categories, ranked_products)
@gcsfred
gcsfred / UDF_variable_num_columns.py
Last active November 19, 2018 15:37
Creating a new column in a DataFrame based on a variable number of other columns.
import pyspark.sql.functions as f
import pyspark.sql.types as t
# ...
data_frame = data_frame.withColumn('columnB', data_frame['columnA'])
data_frame = data_frame.withColumn('columnC', data_frame['columnA'])
attrs = ['columnA', 'columnB', 'columnC']
@gcsfred
gcsfred / udf_two_columns_concat.py
Created November 17, 2018 19:32
Concatenate two columns of a DataFrame using UDF
import pyspark.sql.functions as f
import pyspark.sql.types as t
# ...
def udf_concat_vec(a, b):
# a and b of type SparseVector
return np.concatenate((a.toArray(), b.toArray())).tolist()
my_udf_concat_vec = f.UserDefinedFunction(udf_concat_vec, t.ArrayType(t.FloatType()))
@gcsfred
gcsfred / dataframe_using_pandas_udf.py
Created November 15, 2018 13:02
dataframe creating a column using pandas_udf
dataframe = dataframe.withColumn('description_vec', pandas_nlp('description'))
@gcsfred
gcsfred / pandas_udf_nlp.py
Created November 15, 2018 12:55
define a pandas_udf annotated function that vectorizes a column of text from a DataFrame
import pandas as pd
from pyspark.sql.functions import pandas_udf, PandasUDFType
import spacy
#...
# nlp = spacy.load('en_core_web_lg')
nlp = spacy.load('en_core_web_sm')
#...
# Use pandas_udf to define a Pandas UDF
@gcsfred
gcsfred / dataframe_using_pandas_udf.py
Created November 15, 2018 12:43
dataframe using pandas_udf and one hot encode
dataframe = dataframe.withColumn('ACOLUMN_not_null', pandas_not_null('ACOLUMN'))
dataframe = one_hot_encode(dataframe, "ACOLUMN_not_null", "ACOLUMN_one_hot")