Han Ju darkjh

## alternative, using regex
    val pattern = java.util.regex.Pattern.compile ("""(?xs) ("(.*?)"|) ; ("(.*?)"|) (?: \r?\n | \z ) """)
    val matcher = pattern.matcher (input)
    while (matcher.find) {
      val col1 = matcher.group (2)
      val col2 = matcher.group (4)
      // ...
    }

## crawler.scala
import io.Source
import scala.actors.Actor._

// Regex to pick up external links; very simplified, so it'll miss some
val linkRegex = "(?i)<a.+?href=\"(http.+?)\".*?>(.+?)</a>".r

object PageLoader {
  def load(url: String) = {
    try {
      Source.fromURL(url).mkString

## dump_stack.go
// Thanks to zeebo on #go-nuts
package main

import (
	"os"
	"os/signal"
	"runtime"
	"syscall"
)

## disable_dynamic_mapping.sh
curl -XDELETE localhost:9200/test
curl -XPUT localhost:9200/test -d '{
  "index.mapper.dynamic": false
}'
#{"ok":true,"acknowledged":true}
curl -XPUT localhost:9200/test/test/1 -d '{"foo":"bar"}'
#{"error":"TypeMissingException[[test] type[test] missing: trying to auto create mapping, but dynamic mapping is disabled]","status":404}

## DataScienceInScala.scala
object BenchmarkCommon {
  import scala.util.Random

  val DatasetSize = 10000
  val Iterations = 10000
  val ArrayPoolSize = 1000
  val ArrayPool = {
    def randomArray(): Array[Int] = {
      val array = new Array[Int](DatasetSize)


## StreamingHLL.scala
import spark.streaming.StreamingContext._
import spark.streaming.{Seconds, StreamingContext}
import spark.SparkContext._
import spark.storage.StorageLevel
import spark.streaming.examples.twitter.TwitterInputDStream
import com.twitter.algebird.HyperLogLog._
import com.twitter.algebird._

/**
 * Example of using HyperLogLog monoid from Twitter's Algebird together with Spark Streaming's

## gist:5143799
// create an index with an analyzer "myindex"
curl -X PUT localhost:9200/myindex -d '
{
  "settings" : {`
    "index":{
      "number_of_replicas":0,
      "number_of_shards":1,
      "analysis":{
        "analyzer":{
          "first":{

## spark-config.json
{
    "home": null,
    "local_dir": null,
    "buffer_size": 65536,
    "kryo": {
        "buffer_size_mb": 10,
        "registrator": null
    },
    "parallelism": null,
    "test": {

## kafka.md

      
              1 file
            
          
              67 forks
            
          
              5 comments
            
          
              151 stars
            
          
                ashrithr
                / kafka.md
            
            
              Last active
              March 14, 2024 21:16
            
              
                kafka introduction
              
          
    Introduction to Kafka

Kafka acts as a kind of write-ahead log (WAL) that records messages to a persistent store (disk) and allows subscribers to read and apply these changes to their own stores in a system appropriate time-frame.
Terminology:

Producers send messages to brokers
Consumers read messages from brokers
Messages are sent to a topic


## pypi-release-checklist.md

      
              1 file
            
          
              70 forks
            
          
              13 comments
            
          
              274 stars
            
          
                audreyfeldroy
                / pypi-release-checklist.md
            
            
              Last active
              February 23, 2023 15:03
            
              
                My PyPI Release Checklist
              
          
 Update HISTORY.md
 Commit the changes:

git add HISTORY.md
git commit -m "Changelog for upcoming release 0.1.1."


 Update version number (can also be minor or major)

bumpversion patch
	val pattern = java.util.regex.Pattern.compile ("""(?xs) ("(.?)"\|) ; ("(.?)"\|) (?: \r?\n \| \z ) """)
	val matcher = pattern.matcher (input)
	while (matcher.find) {
	val col1 = matcher.group (2)
	val col2 = matcher.group (4)
	// ...
	}
	import io.Source
	import scala.actors.Actor._

	// Regex to pick up external links; very simplified, so it'll miss some
	val linkRegex = "(?i)<a.+?href=\"(http.+?)\".*?>(.+?)</a>".r

	object PageLoader {
	def load(url: String) = {
	try {
	Source.fromURL(url).mkString
	// Thanks to zeebo on #go-nuts
	package main

	import (
	"os"
	"os/signal"
	"runtime"
	"syscall"
	)
	curl -XDELETE localhost:9200/test
	curl -XPUT localhost:9200/test -d '{
	"index.mapper.dynamic": false
	}'
	#{"ok":true,"acknowledged":true}
	curl -XPUT localhost:9200/test/test/1 -d '{"foo":"bar"}'
	#{"error":"TypeMissingException[[test] type[test] missing: trying to auto create mapping, but dynamic mapping is disabled]","status":404}
	object BenchmarkCommon {
	import scala.util.Random

	val DatasetSize = 10000
	val Iterations = 10000
	val ArrayPoolSize = 1000
	val ArrayPool = {
	def randomArray(): Array[Int] = {
	val array = new Array[Int](DatasetSize)
	import spark.streaming.StreamingContext._
	import spark.streaming.{Seconds, StreamingContext}
	import spark.SparkContext._
	import spark.storage.StorageLevel
	import spark.streaming.examples.twitter.TwitterInputDStream
	import com.twitter.algebird.HyperLogLog._
	import com.twitter.algebird._

	/**
	* Example of using HyperLogLog monoid from Twitter's Algebird together with Spark Streaming's
	// create an index with an analyzer "myindex"
	curl -X PUT localhost:9200/myindex -d '
	{
	"settings" : {`
	"index":{
	"number_of_replicas":0,
	"number_of_shards":1,
	"analysis":{
	"analyzer":{
	"first":{
	{
	"home": null,
	"local_dir": null,
	"buffer_size": 65536,
	"kryo": {
	"buffer_size_mb": 10,
	"registrator": null
	},
	"parallelism": null,
	"test": {