Skip to content

Instantly share code, notes, and snippets.

View flolas's full-sized avatar
🧭

Felipe Lolas flolas

🧭
  • Santiago, Chile
View GitHub Profile
@flolas
flolas / unpack_pandas.py
Last active July 25, 2016 22:27
Unpack Pandas series to multiple Pandas series(unpacking cols)
def unpack_col(col_to_unpack, df_to_append = None, header = 'col', sep=',', na_value=''):
import pandas as pd
unpacked_cols = col_to_unpack.fillna(na_value).apply(lambda x: pd.Series(x.split(','))).fillna(na_value)
#add dynamic columns names based on # of rows and parameter header passed for prefix (header_#)
col_names = []
for i in unpacked_cols.columns:
col_names.append(header + '_' + str(i))
unpacked_cols.columns = col_names
if isinstance(df_to_append, pd.DataFrame):
#return df concatenated with previously unpacked columns
@flolas
flolas / bteq.py
Last active January 12, 2021 02:05
"""
Code that goes along with the Airflow tutorial located at:
https://github.com/airbnb/airflow/blob/master/airflow/example_dags/tutorial.py
"""
from airflow import DAG
from datetime import datetime, timedelta
from airflow.operators.docker_operator import DockerOperator
start = datetime.combine(datetime.today() - timedelta(2), datetime.min.time())
@flolas
flolas / multiproc_trim.py
Created August 24, 2016 03:40
Multiprocessing Trim Rows Python
#!/usr/bin/env python
# -*- coding: UTF-8 -*-
# multiproc_trim.py
"""A program that reads integer values from a CSV file and writes out their
sums to another CSV file, using multiple processes if desired.
"""
import csv
import multiprocessing
import optparse
FROM ubuntu
ENV LANGUAGE en_US.UTF-8
ENV LANG en_US.UTF-8
ENV LC_ALL en_US.UTF-8
ENV LC_CTYPE en_US.UTF-8
ENV LC_MESSAGES en_US.UTF-8
ENV LC_ALL en_US.UTF-8
RUN apt update && \
@flolas
flolas / Event.json
Created March 30, 2017 16:41
EventStreaming
{
context : {
session : <int>,
id: <int>
},
timestamp: <timestamp(6)>,
event: <str>,
subevent: <str>,
fields: [
{
from airflow.contrib.operators.teradata_operator import TeradataOperator
task = TeradataOperator(sql='query.sql', teradata_conn_id='teradata-prod', task_id='Ejecuta_query_sql', dag=dag)
from airflow.contrib.operators.teradata_operator import TeradataOperator
task = TeradataOperator(sql='query.sql', teradata_conn_id='teradata-prod', task_id='Ejecuta_query_sql', dag=dag)
sudo apt-get -y update
sudo apt-get -y install default-jre libc6-i386 libc6-dev-i386 curl unzip vim netcat telnet
sudo useradd james
sudo mkdir /home/james
sudo chown -R james:james /home/james
sudo adduser james sudo
mkdir /opt/james
sudo curl -o /opt/james/james-server-app-3.0.0-app.zip http://www.apache.org/dist/james/server/3.0.0/james-server-app-3.0.0-app.zip
sudo unzip /opt/james/james-server-app-3.0.0-app.zip -d /opt/james
sudo chown -R james:james /opt/james
@flolas
flolas / index.html
Last active January 3, 2018 15:46
PBI
<!DOCTYPE html>
<html>
<head>
<title>Minimal sample using ADAL.JS</title>
<meta charset="utf-8" />
<script src="https://secure.aadcdn.microsoftonline-p.com/lib/1.0.11/js/adal.min.js"></script>
<script src="powerbi.js"></script>
<script src="https://code.jquery.com/jquery-3.2.1.min.js" integrity="sha256-hwg4gsxgFZhOsEEamdOYGBf13FyQuiTwlAQgxVSNgt4=" crossorigin="anonymous"></script>
<script>window.jQuery || document.write('<script src="js/vendor/jquery-3.2.1.min.js"><\/script>')</script>
</head>
@flolas
flolas / teradata_hook.sql
Created January 12, 2018 12:55
Teradata Hook for Apache Airflow using PyTd (Teradata Python Module)
# -*- coding: utf-8 -*-
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,