Frank Dai soulmachine

## gist:a61b5d3f690b117b037c9654623bd451
git branch -m old_branch new_branch         # Rename branch locally
git push origin :old_branch                 # Delete the old branch
git push --set-upstream origin new_branch   # Push the new branch, set local branch to track the new remote

## ubuntu-mount-new-disk.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                soulmachine
                / ubuntu-mount-new-disk.md
            
            
              Created
              August 17, 2016 08:42
                — forked from gaoyifan/ubuntu-mount-new-disk.md
            
              
                ubuntu 添加新硬盘
              
          
    ubuntu 添加新硬盘

查看硬盘：
# fdisk -l
...
Disk /dev/sdb: 274.9 GB, 274877906944 bytes
255 heads, 63 sectors/track, 33418 cylinders, total 	536870912 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes


## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                soulmachine
                / keybase.md
            
            
              Created
              July 25, 2016 18:04
            
          
    Keybase proof

I hereby claim:

I am soulmachine on github.
I am soulmachine (https://keybase.io/soulmachine) on keybase.
I have a public key whose fingerprint is 1F53 41FB 3CD9 D064 FB2E  0D4D 5B13 D2E7 709F 1721

To claim this, I am signing this object:

  
## UserDailySnapshot.scala
package com.cmcm.bdp

import java.io.FileInputStream
import java.net.InetAddress

import com.databricks.spark.avro._
import com.maxmind.geoip2.DatabaseReader
import org.apache.spark._
import org.apache.spark.sql.{DataFrame, Row}
import org.apache.spark.sql.expressions.Window

## MyConcurrentHashMap.java
import java.util.concurrent.locks.ReentrantLock;

public class MyConcurrentHashMap<K,V> {
    private static final int DEFAULT_CONCURRENCY_LEVEL = 16;

    /**
     * The default initial capacity - MUST be a power of two.
     */
    static final int DEFAULT_INITIAL_CAPACITY = 256;

## MyArrayBlockingQueue.java
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.ReentrantLock;

// Reference: http://docs.oracle.com/javase/7/docs/api/java/util/concurrent/locks/Condition.html
public class MyArrayBlockingQueue<E> {
    final ReentrantLock lock = new ReentrantLock();
    final Condition notFull  = lock.newCondition();
    final Condition notEmpty = lock.newCondition();

    final Object[] items;

## WordCountPercentage.java
package me.soulmachine;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;

## WordCount.java
package me.soulmachine;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;

## Mutual.scala
package com.yanjiuyanjiu.weibo

import org.apache.spark.SparkContext._
import org.apache.spark.{SparkContext, Logging}


/**
 * 查找互相关注的关系。
 *
 * 输入为一个文本文件，每行的格式为 userId1, userId2, userId3,..., userIdN，表示 userId1 关注了 userId2, userId3, ..., userIdN

## run.scala
def run(C: Int, D: Int, data: RDD[LabeledPoint]) = {
    val partitionCounts = data.mapPartitions { iterator =>
      val localCountPerLabel = mutable.Map.empty[Int, Int].withDefaultValue(0)
      val localSummedObservations = mutable.Map.empty[Int, Array[Double]]
        .withDefaultValue(Array.fill(D)(0.0))
      iterator.foreach {
        case LabeledPoint(label, features) =>
          val y = label.toInt
          localCountPerLabel(y) += 1
          localSummedObservations(y) = localSummedObservations(y).zip(features)
	git branch -m old_branch new_branch # Rename branch locally
	git push origin :old_branch # Delete the old branch
	git push --set-upstream origin new_branch # Push the new branch, set local branch to track the new remote
	package com.cmcm.bdp

	import java.io.FileInputStream
	import java.net.InetAddress

	import com.databricks.spark.avro._
	import com.maxmind.geoip2.DatabaseReader
	import org.apache.spark._
	import org.apache.spark.sql.{DataFrame, Row}
	import org.apache.spark.sql.expressions.Window
	import java.util.concurrent.locks.ReentrantLock;

	public class MyConcurrentHashMap<K,V> {
	private static final int DEFAULT_CONCURRENCY_LEVEL = 16;

	/**
	* The default initial capacity - MUST be a power of two.
	*/
	static final int DEFAULT_INITIAL_CAPACITY = 256;
	import java.util.concurrent.locks.Condition;
	import java.util.concurrent.locks.ReentrantLock;

	// Reference: http://docs.oracle.com/javase/7/docs/api/java/util/concurrent/locks/Condition.html
	public class MyArrayBlockingQueue<E> {
	final ReentrantLock lock = new ReentrantLock();
	final Condition notFull = lock.newCondition();
	final Condition notEmpty = lock.newCondition();

	final Object[] items;
	package me.soulmachine;

	import org.apache.hadoop.conf.Configuration;
	import org.apache.hadoop.conf.Configured;
	import org.apache.hadoop.fs.Path;
	import org.apache.hadoop.io.LongWritable;
	import org.apache.hadoop.io.Text;
	import org.apache.hadoop.mapreduce.Job;
	import org.apache.hadoop.mapreduce.Mapper;
	import org.apache.hadoop.mapreduce.Reducer;
	package com.yanjiuyanjiu.weibo

	import org.apache.spark.SparkContext._
	import org.apache.spark.{SparkContext, Logging}


	/**
	* 查找互相关注的关系。
	*
	* 输入为一个文本文件，每行的格式为 userId1, userId2, userId3,..., userIdN，表示 userId1 关注了 userId2, userId3, ..., userIdN
	def run(C: Int, D: Int, data: RDD[LabeledPoint]) = {
	val partitionCounts = data.mapPartitions { iterator =>
	val localCountPerLabel = mutable.Map.empty[Int, Int].withDefaultValue(0)
	val localSummedObservations = mutable.Map.empty[Int, Array[Double]]
	.withDefaultValue(Array.fill(D)(0.0))
	iterator.foreach {
	case LabeledPoint(label, features) =>
	val y = label.toInt
	localCountPerLabel(y) += 1
	localSummedObservations(y) = localSummedObservations(y).zip(features)