Skip to content

Instantly share code, notes, and snippets.

<div style="white-space:nowrap;font-family:Helvetica, Arial;font-size: 14px;vertical-align:middle;white-space:nowrap">
<h1 style="white-space:nowrap;font-size:18px; color:#36465d;line-height: 1;margin: 0 0 8px 0">Renaud Jollet De Lorenzo</h1>
<h4 style="white-space:nowrap;font-size:14px; color:#333;line-height: 1;margin: 0 0 8px 0;font-weight: 400;">Master Student | <a href="http://blog.renaudjollet.info" style="white-space:nowrap;text-decoration: none; color: #333">blog.renaudjollet.info</a></h4>
<h6 style="white-space:nowrap;font-size:14px; color:#999;line-height: 1;margin: 0;font-weight: 400;"><a href="https://keybase.io/rjollet" style="white-space:nowrap;text-decoration: none; color: #999">BE2A D008 9411 A141</a> | <span style="white-space:nowrap;text-decoration:none!important;color: #999!important">+886 9 780 184 30</span></h6>
<div style="white-space:nowrap;margin-top: 8px">
<a style="white-space:nowrap;display:inline-block;" href="http://blog.renaudjollet.info">
<img
@rjollet
rjollet / export_text.js
Created March 3, 2017 06:41
export tweet with text from mongo to csv
// mongo <host>/<dbname> -u <username> -p <password> export.js > out.csv
// mongo local --quiet export_text.js > tweets_text.csv
print("id\tlang\ttext");
db.sample_stream.find({}, { 'tweet.id_str': 1, 'tweet.lang': 1,'tweet.text': 1, _id: 0}).forEach(function(doc){
print(doc.tweet.id_str + "\t" + doc.tweet.lang + "\t" + doc.tweet.text.replace(/\t|\n|\r/g, ' '));
});
@rjollet
rjollet / Spark_jupyter_anaconda.sh
Last active February 12, 2017 07:04
conda environment to use spark with jupyter
conda create --name spark python=3
source activate spark
conda install jupyter
pip install --pre toree
jupyter toree install --user
@rjollet
rjollet / export.js
Created February 8, 2017 05:48
export mongo to csv file
// mongo <host>/<dbname> -u <username> -p <password> export.js > out.csv
// mongo local --quiet export.js > out.csv
print("id, in_reply_to_status_id, user_id");
db.sample_stream.find({}, { 'tweet.id_str': 1, 'tweet.in_reply_to_status_id_str': 1 , 'tweet.user.id_str': 1, _id: 0}).forEach(function(doc){
print(doc.tweet.id_str + "," + doc.tweet.in_reply_to_status_id_str + "," + doc.tweet.user.id_str);
});
require 'yaml'
require 'csv'
def hashs_to_array(hashs)
array = []
hashs.each do |hash|
array << hash.keys if array == []
array << hash.values
end
array
require 'yaml'
require 'csv'
def array_to_hashs(array)
keys = array[0]
data = array[1..-1]
data.map { |values| Hash[keys.zip(values)] }
end
array = CSV.read(ARGV[0], col_sep: "\t")
@rjollet
rjollet / fizzbuzz.rb
Last active September 23, 2016 14:27
fizzbuzz
def fizzbuzz(size, &output)
(1..size).map do |number|
res = ''
res << 'Fizz' if (number % 3).zero?
res << 'Buzz' if (number % 5).zero?
res = res == '' ? number : res
yield res if output
res
end
end

Keybase proof

I hereby claim:

  • I am rjollet on github.
  • I am rjollet (https://keybase.io/rjollet) on keybase.
  • I have a public key whose fingerprint is 602D 9B5A D59A BD02 419D DCF6 BE2A D008 9411 A141

To claim this, I am signing this object:

import json
import argparse
parser = argparse.ArgumentParser(description='Compare two corpus of words each corpus is a text file containing one word per line.')
parser.add_argument("--corpus1", help="file contening the corpus1")
parser.add_argument("--corpus2", help="file file contening the corpus2")
args = parser.parse_args()
if(args.corpus1 and args.corpus2):
corpus1 = set(open(args.corpus1,'r'))
class ShortStringPacker
## Packs a short string into a Fixnum
# Arguments:
# str - String object
# Returns: a Fixnum object
def self.pack(str, nb_bits=5)
res = 0
str.each_char.with_index do |char, index|
res = res | ((char.ord - 'a'.ord + 1) << nb_bits*(str.length-(index+1)))
end