查看硬盘:
# fdisk -l
...
Disk /dev/sdb: 274.9 GB, 274877906944 bytes
255 heads, 63 sectors/track, 33418 cylinders, total 536870912 sectors
Units = sectors of 1 * 512 = 512 bytes
Sector size (logical/physical): 512 bytes / 512 bytes
git branch -m old_branch new_branch # Rename branch locally | |
git push origin :old_branch # Delete the old branch | |
git push --set-upstream origin new_branch # Push the new branch, set local branch to track the new remote |
I hereby claim:
To claim this, I am signing this object:
package com.cmcm.bdp | |
import java.io.FileInputStream | |
import java.net.InetAddress | |
import com.databricks.spark.avro._ | |
import com.maxmind.geoip2.DatabaseReader | |
import org.apache.spark._ | |
import org.apache.spark.sql.{DataFrame, Row} | |
import org.apache.spark.sql.expressions.Window |
import java.util.concurrent.locks.ReentrantLock; | |
public class MyConcurrentHashMap<K,V> { | |
private static final int DEFAULT_CONCURRENCY_LEVEL = 16; | |
/** | |
* The default initial capacity - MUST be a power of two. | |
*/ | |
static final int DEFAULT_INITIAL_CAPACITY = 256; |
import java.util.concurrent.locks.Condition; | |
import java.util.concurrent.locks.ReentrantLock; | |
// Reference: http://docs.oracle.com/javase/7/docs/api/java/util/concurrent/locks/Condition.html | |
public class MyArrayBlockingQueue<E> { | |
final ReentrantLock lock = new ReentrantLock(); | |
final Condition notFull = lock.newCondition(); | |
final Condition notEmpty = lock.newCondition(); | |
final Object[] items; |
package me.soulmachine; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.conf.Configured; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.LongWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; | |
import org.apache.hadoop.mapreduce.Reducer; |
package me.soulmachine; | |
import org.apache.hadoop.conf.Configuration; | |
import org.apache.hadoop.conf.Configured; | |
import org.apache.hadoop.fs.Path; | |
import org.apache.hadoop.io.IntWritable; | |
import org.apache.hadoop.io.LongWritable; | |
import org.apache.hadoop.io.Text; | |
import org.apache.hadoop.mapreduce.Job; | |
import org.apache.hadoop.mapreduce.Mapper; |
package com.yanjiuyanjiu.weibo | |
import org.apache.spark.SparkContext._ | |
import org.apache.spark.{SparkContext, Logging} | |
/** | |
* 查找互相关注的关系。 | |
* | |
* 输入为一个文本文件,每行的格式为 userId1, userId2, userId3,..., userIdN,表示 userId1 关注了 userId2, userId3, ..., userIdN |
def run(C: Int, D: Int, data: RDD[LabeledPoint]) = { | |
val partitionCounts = data.mapPartitions { iterator => | |
val localCountPerLabel = mutable.Map.empty[Int, Int].withDefaultValue(0) | |
val localSummedObservations = mutable.Map.empty[Int, Array[Double]] | |
.withDefaultValue(Array.fill(D)(0.0)) | |
iterator.foreach { | |
case LabeledPoint(label, features) => | |
val y = label.toInt | |
localCountPerLabel(y) += 1 | |
localSummedObservations(y) = localSummedObservations(y).zip(features) |