Favio André Vázquez FavioVazquez

## .gitignore
*.md
*.html
*.pdf
libraries

## Optimus.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                FavioVazquez
                / Optimus.ipynb
            
            
              Created
              August 7, 2017 01:24
            
              
                Optimus Example
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## optimus1.py
# Import optimus
import optimus as op
# Instanciate DataFrameTransfomer
transformer = op.DataFrameTransformer(df)
# Get original dataFrame to show it.
transformer.show()
# Chaining function transformations
transformer.trim_col("*")
           .remove_special_chars("*")
           .clear_accents("*")

## optimus2.py
# Import optimus
import optimus as op
# Choose a column for analyzing
detector = op.OutlierDetector(df,"num")
# With the outliers() method you can use MAD to detect if there is an outlier in your column
detector.outliers()
# And with the run() method you can see which values are not outliers
detector.run()
# Finally with the delete_outliers() method you can delete existing outliers in your column.
# This will modify the dataframe we have used when instantiating the OutlierDetector

## optimus3.py
# Import optimus
import optimus as op
transformer = op.DataFrameTransformer(df)
# Choose the columns to run the analysis and the names of the columns for the output
transformer.impute_missing(["a","b"],["out_a","out_B"],strategy="mean").show()
# Choose the columns to run the analysis and the names of the columns for the output
transformer.impute_missing(["a","b"],["out_a","out_B"],strategy="median").show()

## optimus3.py
# Import optimus
import optimus as op
# Instance of Utilities class
tools = op.Utilities()
# Reading df from web
url = "https://raw.githubusercontent.com/ironmussa/Optimus-examples/master/examples/foo.csv"
df = tools.read_dataset_url(path=url)

## OpKd1.py
# Import optimus
import optimus as op
# Instance of Utilities class
tools = op.Utilities()
# Reading DF from web
url = "https://raw.githubusercontent.com/ironmussa/Optimus-examples/master/examples/foo.csv"
df = tools.read_dataset_url(path=url)

## OpKd2.py
# Instance of transformer class
transformer = op.DataFrameTransformer(df)

## simple_spark.py
import findspark
findspark.init("/opt/spark")
import random
from pyspark import SparkContext
sc = SparkContext(appName="EstimatePi")
def inside(p):
    x, y = random.random(), random.random()
    return x*x + y*y < 1
NUM_SAMPLES = 1000000
count = sc.parallelize(range(0, NUM_SAMPLES)) \

## load_photos.py
!curl -O http://download.tensorflow.org/example_images/flower_photos.tgz
!tar xzf flower_photos.tgz
!mkdir flower_photos/sample
	# Import optimus
	import optimus as op
	# Instanciate DataFrameTransfomer
	transformer = op.DataFrameTransformer(df)
	# Get original dataFrame to show it.
	transformer.show()
	# Chaining function transformations
	transformer.trim_col("*")
	.remove_special_chars("*")
	.clear_accents("*")
	# Import optimus
	import optimus as op
	# Choose a column for analyzing
	detector = op.OutlierDetector(df,"num")
	# With the outliers() method you can use MAD to detect if there is an outlier in your column
	detector.outliers()
	# And with the run() method you can see which values are not outliers
	detector.run()
	# Finally with the delete_outliers() method you can delete existing outliers in your column.
	# This will modify the dataframe we have used when instantiating the OutlierDetector
	# Import optimus
	import optimus as op
	# Instance of Utilities class
	tools = op.Utilities()
	# Reading df from web
	url = "https://raw.githubusercontent.com/ironmussa/Optimus-examples/master/examples/foo.csv"
	df = tools.read_dataset_url(path=url)
	# Instance of transformer class
	transformer = op.DataFrameTransformer(df)
	import findspark
	findspark.init("/opt/spark")
	import random
	from pyspark import SparkContext
	sc = SparkContext(appName="EstimatePi")
	def inside(p):
	x, y = random.random(), random.random()
	return xx + yy < 1
	NUM_SAMPLES = 1000000
	count = sc.parallelize(range(0, NUM_SAMPLES)) \
	!curl -O http://download.tensorflow.org/example_images/flower_photos.tgz
	!tar xzf flower_photos.tgz
	!mkdir flower_photos/sample