Khalil micaleel

## mnist_example.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              9 stars
            
          
                blakewest
                / mnist_example.ipynb
            
            
              Created
              February 9, 2018 17:26
            
              
                Example implementation of a Convolutional Neural Net for the MNIST data
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## docker_cleanup.sh
# delete all containers
docker rm $(docker ps -a -q)

# delete images without tags
docker rmi $(docker images | grep '^<none>' | awk '{print $3}')

# clean up volumnes
docker volume prune

## spark_tips_and_tricks.md

      
              1 file
            
          
              20 forks
            
          
              1 comment
            
          
              75 stars
            
          
                dusenberrymw
                / spark_tips_and_tricks.md
            
            
              Last active
              June 28, 2024 12:37
            
              
                Tips and tricks for Apache Spark.
              
          
    Spark Tips & Tricks

Misc. Tips & Tricks


If values are integers in [0, 255], Parquet will automatically compress to use 1 byte unsigned integers, thus decreasing the size of saved DataFrame by a factor of 8.
Partition DataFrames to have evenly-distributed, ~128MB partition sizes (empirical finding).  Always err on the higher side w.r.t. number of partitions.
Pay particular attention to the number of partitions when using flatMap, especially if the following operation will result in high memory usage. The flatMap op usually results in a DataFrame with a [much] larger number of rows, yet the number of partitions will remain the same. Thus, if a subsequent op causes a large expansion of memory usage (i.e. converting a DataFrame of indices to a DataFrame of large Vectors), the memory usage per partition may become too high. In this case, it is beneficial to repartition the output of flatMap to a number of partitions that will safely allow for appropriate partition memory sizes, based upon the


## KaggleNDCG.scala
package com.github.jongwook

import net.recommenders.rival.core.DataModel
import net.recommenders.rival.evaluation.metric.ranking.NDCG
import net.recommenders.rival.evaluation.metric.ranking.NDCG.TYPE
import org.apache.spark.SparkConf
import org.apache.spark.mllib.evaluation.RankingMetrics
import org.apache.spark.sql.SparkSession

import scala.util.{Failure, Success, Random, Try}

## sgd.py
import tensorflow as tf
from random import randint, seed
seed(42)

current_x = tf.placeholder(tf.float32)

x = tf.Variable(2.1, name='x', dtype=tf.float32)
log_x = tf.log(x)
result = current_x * tf.square(log_x)

## gd_with_data.py
import tensorflow as tf

x_1 = tf.placeholder(tf.float32)
x_2 = tf.placeholder(tf.float32)
x_3 = tf.placeholder(tf.float32)

x = tf.Variable(2, name='x', dtype=tf.float32)
log_x = tf.log(x)
result = (x_1 + x_2 + x_3) * tf.square(log_x)

## gd_simple.py
import tensorflow as tf

x = tf.Variable(2, name='x', dtype=tf.float32)
log_x = tf.log(x)
log_x_squared = tf.square(log_x)

optimizer = tf.train.GradientDescentOptimizer(0.5)
train = optimizer.minimize(log_x_squared)

init = tf.initialize_all_variables()

## A Few Useful Things to Know about Machine Learning.md

      
              1 file
            
          
              16 forks
            
          
              4 comments
            
          
              66 stars
            
          
                shagunsodhani
                / A Few Useful Things to Know about Machine Learning.md
            
            
              Last active
              May 9, 2024 09:49
            
          
    A Few Useful Things to Know about Machine Learning

The paper presents some key lessons and "folk wisdom" that machine learning researchers and practitioners have learnt from experience and which are hard to find in textbooks.
1. Learning = Representation + Evaluation + Optimization

All machine learning algorithms have three components:

Representation for a  learner is the set if classifiers/functions that can be possibly learnt. This set is called hypothesis space. If a function is not in hypothesis space, it can not be learnt.
Evaluation function tells how good the machine learning model is.
Optimisation is the method to search for the most optimal learning model.


## Makefile
# Shell to use with Make
SHELL := /bin/bash

# Set important Paths
PROJECT := # Set to your project name
LOCALPATH := $(CURDIR)/$(PROJECT)
PYTHONPATH := $(LOCALPATH)/
PYTHON_BIN := $(VIRTUAL_ENV)/bin

# Export targets not associated with files

## customer-segmentation.py
import pandas as pd
# http://blog.yhathq.com/static/misc/data/WineKMC.xlsx
df_offers = pd.read_excel("./WineKMC.xlsx", sheetname=0)
df_offers.columns = ["offer_id", "campaign", "varietal", "min_qty", "discount", "origin", "past_peak"]
df_offers.head()

df_transactions = pd.read_excel("./WineKMC.xlsx", sheetname=1)
df_transactions.columns = ["customer_name", "offer_id"]
df_transactions['n'] = 1
df_transactions.head()
	# delete all containers
	docker rm $(docker ps -a -q)

	# delete images without tags
	docker rmi $(docker images \| grep '^<none>' \| awk '{print $3}')

	# clean up volumnes
	docker volume prune
	package com.github.jongwook

	import net.recommenders.rival.core.DataModel
	import net.recommenders.rival.evaluation.metric.ranking.NDCG
	import net.recommenders.rival.evaluation.metric.ranking.NDCG.TYPE
	import org.apache.spark.SparkConf
	import org.apache.spark.mllib.evaluation.RankingMetrics
	import org.apache.spark.sql.SparkSession

	import scala.util.{Failure, Success, Random, Try}
	import tensorflow as tf
	from random import randint, seed
	seed(42)

	current_x = tf.placeholder(tf.float32)

	x = tf.Variable(2.1, name='x', dtype=tf.float32)
	log_x = tf.log(x)
	result = current_x * tf.square(log_x)
	import tensorflow as tf

	x_1 = tf.placeholder(tf.float32)
	x_2 = tf.placeholder(tf.float32)
	x_3 = tf.placeholder(tf.float32)

	x = tf.Variable(2, name='x', dtype=tf.float32)
	log_x = tf.log(x)
	result = (x_1 + x_2 + x_3) * tf.square(log_x)
	# Shell to use with Make
	SHELL := /bin/bash

	# Set important Paths
	PROJECT := # Set to your project name
	LOCALPATH := $(CURDIR)/$(PROJECT)
	PYTHONPATH := $(LOCALPATH)/
	PYTHON_BIN := $(VIRTUAL_ENV)/bin

	# Export targets not associated with files
	import pandas as pd
	# http://blog.yhathq.com/static/misc/data/WineKMC.xlsx
	df_offers = pd.read_excel("./WineKMC.xlsx", sheetname=0)
	df_offers.columns = ["offer_id", "campaign", "varietal", "min_qty", "discount", "origin", "past_peak"]
	df_offers.head()

	df_transactions = pd.read_excel("./WineKMC.xlsx", sheetname=1)
	df_transactions.columns = ["customer_name", "offer_id"]
	df_transactions['n'] = 1
	df_transactions.head()