Kirill Pavlov pavlov99

## gist:222f379ca275a2b3f512
https://confluence.atlassian.com/display/JIRA/Logging+Work+on+an+Issue#LoggingWorkonanIssue-Loggingworkwhileresolvingorclosinganissue
https://confluence.atlassian.com/display/JIRAKB/Set+Remaining+Estimate+to+0+on+post+function

## README.txt
List of currenies is generated using python.

Dependencies
============

  pip install pycountry pytz requests

## gist:a9af5871a1db578de16e
The /user/ directory is owned by "hdfs" with 755 permissions. As a result only hdfs can write to that directory. Unlike unix/linux, hdfs is the superuser and not root. So you would need to do this:

sudo -u hdfs hadoop fs -mkdir /user/,,myfile,,
sudo -u hdfs hadoop fs -put myfile.txt /user/,,/,,

If you want to create a home directory for root so you can store files in his directory, do:

sudo -u hdfs hadoop fs -mkdir /user/root
sudo -u hdfs hadoop fs -chown root /user/root

## Graph.scala
class Graph[T] {
  type Vertex = T
  type GraphMap = Map[Vertex,List[Vertex]]
  var g:GraphMap = Map()

  def BFS(start: Vertex): List[List[Vertex]] = {

    def BFS0(elems: List[Vertex],visited: List[List[Vertex]]): List[List[Vertex]] = {
      val newNeighbors = elems.flatMap(g(_)).filterNot(visited.flatten.contains).distinct
      if (newNeighbors.isEmpty)

## gist:369492916e44ddb1de06
df1.unionAll(df2.select(fd1.columns.map(df1(_)): _*))

## 0-apache-spark-presentation.md

      
              6 files
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                pavlov99
                / 0-apache-spark-presentation.md
            
            
              Last active
              May 13, 2016 03:38
            
              
                Apache Spark in data science presentation
              
          
    This gist consists of Spark presentation examples.

  
## combinations.scala
def combine[T](s: Seq[T]): Seq[Seq[T]] =
  for {
    len <- 1 to s.length
    combinations <- s combinations len
  } yield combinations

println(combine(List('a', 'b', 'c')))

## 1-schedule.scala
val schedule = sqlContext.read
  .format("com.databricks.spark.csv")
  .option("header", "true")
  .option("inferSchema", "true")
  .load("lookup-example/san-jose-schedule-2016-2017.csv")
  .select(
    to_date(
      unix_timestamp($"START_DATE", "MM/dd/yyyy").cast("timestamp")
    ) as "date",
    when(

## blog-lookup-table-1-schedule.scala
val schedule = sqlContext.read
  .format("com.databricks.spark.csv")
  .option("header", "true")
  .option("inferSchema", "true")
  .load("lookup-example/san-jose-schedule-2016-2017.csv")
  .select(
    to_date(
      unix_timestamp($"START_DATE", "MM/dd/yyyy").cast("timestamp")
    ) as "date",
    when(

## blog-lookup-table-2-external-table.hive
CREATE EXTERNAL TABLE IF NOT EXISTS lookup_example_nhl_ext(
    team String,
    division String,
    conference String)
  COMMENT 'NHL teams'
  ROW FORMAT DELIMITED
  FIELDS TERMINATED BY ','
  LINES TERMINATED BY '\n'
  STORED AS TEXTFILE
  LOCATION 'hdfs:///user/<user>/lookup-example/nhl-lookup'
	https://confluence.atlassian.com/display/JIRA/Logging+Work+on+an+Issue#LoggingWorkonanIssue-Loggingworkwhileresolvingorclosinganissue
	https://confluence.atlassian.com/display/JIRAKB/Set+Remaining+Estimate+to+0+on+post+function
	List of currenies is generated using python.

	Dependencies
	============

	pip install pycountry pytz requests
	The /user/ directory is owned by "hdfs" with 755 permissions. As a result only hdfs can write to that directory. Unlike unix/linux, hdfs is the superuser and not root. So you would need to do this:

	sudo -u hdfs hadoop fs -mkdir /user/,,myfile,,
	sudo -u hdfs hadoop fs -put myfile.txt /user/,,/,,

	If you want to create a home directory for root so you can store files in his directory, do:

	sudo -u hdfs hadoop fs -mkdir /user/root
	sudo -u hdfs hadoop fs -chown root /user/root
	class Graph[T] {
	type Vertex = T
	type GraphMap = Map[Vertex,List[Vertex]]
	var g:GraphMap = Map()

	def BFS(start: Vertex): List[List[Vertex]] = {

	def BFS0(elems: List[Vertex],visited: List[List[Vertex]]): List[List[Vertex]] = {
	val newNeighbors = elems.flatMap(g(_)).filterNot(visited.flatten.contains).distinct
	if (newNeighbors.isEmpty)
	def combine[T](s: Seq[T]): Seq[Seq[T]] =
	for {
	len <- 1 to s.length
	combinations <- s combinations len
	} yield combinations

	println(combine(List('a', 'b', 'c')))
	val schedule = sqlContext.read
	.format("com.databricks.spark.csv")
	.option("header", "true")
	.option("inferSchema", "true")
	.load("lookup-example/san-jose-schedule-2016-2017.csv")
	.select(
	to_date(
	unix_timestamp($"START_DATE", "MM/dd/yyyy").cast("timestamp")
	) as "date",
	when(
	CREATE EXTERNAL TABLE IF NOT EXISTS lookup_example_nhl_ext(
	team String,
	division String,
	conference String)
	COMMENT 'NHL teams'
	ROW FORMAT DELIMITED
	FIELDS TERMINATED BY ','
	LINES TERMINATED BY '\n'
	STORED AS TEXTFILE
	LOCATION 'hdfs:///user/<user>/lookup-example/nhl-lookup'