I hereby claim:
- I am rjollet on github.
- I am rjollet (https://keybase.io/rjollet) on keybase.
- I have a public key whose fingerprint is 602D 9B5A D59A BD02 419D DCF6 BE2A D008 9411 A141
To claim this, I am signing this object:
<div style="white-space:nowrap;font-family:Helvetica, Arial;font-size: 14px;vertical-align:middle;white-space:nowrap"> | |
<h1 style="white-space:nowrap;font-size:18px; color:#36465d;line-height: 1;margin: 0 0 8px 0">Renaud Jollet De Lorenzo</h1> | |
<h4 style="white-space:nowrap;font-size:14px; color:#333;line-height: 1;margin: 0 0 8px 0;font-weight: 400;">Master Student | <a href="http://blog.renaudjollet.info" style="white-space:nowrap;text-decoration: none; color: #333">blog.renaudjollet.info</a></h4> | |
<h6 style="white-space:nowrap;font-size:14px; color:#999;line-height: 1;margin: 0;font-weight: 400;"><a href="https://keybase.io/rjollet" style="white-space:nowrap;text-decoration: none; color: #999">BE2A D008 9411 A141</a> | <span style="white-space:nowrap;text-decoration:none!important;color: #999!important">+886 9 780 184 30</span></h6> | |
<div style="white-space:nowrap;margin-top: 8px"> | |
<a style="white-space:nowrap;display:inline-block;" href="http://blog.renaudjollet.info"> | |
<img |
// mongo <host>/<dbname> -u <username> -p <password> export.js > out.csv | |
// mongo local --quiet export_text.js > tweets_text.csv | |
print("id\tlang\ttext"); | |
db.sample_stream.find({}, { 'tweet.id_str': 1, 'tweet.lang': 1,'tweet.text': 1, _id: 0}).forEach(function(doc){ | |
print(doc.tweet.id_str + "\t" + doc.tweet.lang + "\t" + doc.tweet.text.replace(/\t|\n|\r/g, ' ')); | |
}); |
conda create --name spark python=3 | |
source activate spark | |
conda install jupyter | |
pip install --pre toree | |
jupyter toree install --user |
// mongo <host>/<dbname> -u <username> -p <password> export.js > out.csv | |
// mongo local --quiet export.js > out.csv | |
print("id, in_reply_to_status_id, user_id"); | |
db.sample_stream.find({}, { 'tweet.id_str': 1, 'tweet.in_reply_to_status_id_str': 1 , 'tweet.user.id_str': 1, _id: 0}).forEach(function(doc){ | |
print(doc.tweet.id_str + "," + doc.tweet.in_reply_to_status_id_str + "," + doc.tweet.user.id_str); | |
}); |
require 'yaml' | |
require 'csv' | |
def hashs_to_array(hashs) | |
array = [] | |
hashs.each do |hash| | |
array << hash.keys if array == [] | |
array << hash.values | |
end | |
array |
require 'yaml' | |
require 'csv' | |
def array_to_hashs(array) | |
keys = array[0] | |
data = array[1..-1] | |
data.map { |values| Hash[keys.zip(values)] } | |
end | |
array = CSV.read(ARGV[0], col_sep: "\t") |
def fizzbuzz(size, &output) | |
(1..size).map do |number| | |
res = '' | |
res << 'Fizz' if (number % 3).zero? | |
res << 'Buzz' if (number % 5).zero? | |
res = res == '' ? number : res | |
yield res if output | |
res | |
end | |
end |
I hereby claim:
To claim this, I am signing this object:
import json | |
import argparse | |
parser = argparse.ArgumentParser(description='Compare two corpus of words each corpus is a text file containing one word per line.') | |
parser.add_argument("--corpus1", help="file contening the corpus1") | |
parser.add_argument("--corpus2", help="file file contening the corpus2") | |
args = parser.parse_args() | |
if(args.corpus1 and args.corpus2): | |
corpus1 = set(open(args.corpus1,'r')) |
class ShortStringPacker | |
## Packs a short string into a Fixnum | |
# Arguments: | |
# str - String object | |
# Returns: a Fixnum object | |
def self.pack(str, nb_bits=5) | |
res = 0 | |
str.each_char.with_index do |char, index| | |
res = res | ((char.ord - 'a'.ord + 1) << nb_bits*(str.length-(index+1))) | |
end |