Skip to content

Instantly share code, notes, and snippets.

@caulagi
Last active December 20, 2016 20:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save caulagi/ec063cd338310dcf922de82dd43a074a to your computer and use it in GitHub Desktop.
Save caulagi/ec063cd338310dcf922de82dd43a074a to your computer and use it in GitHub Desktop.
A simple counter in Rust and Python integration
$ cargo new words
$ cargo build --release
$ python benchmark.py
import os
import re
import time
from cffi import FFI
from collections import Counter
ffi = FFI()
ffi.cdef("""
int most_common(const char *, int);
""")
C = ffi.dlopen("target/release/libcounter.dylib")
def benchmark_rs(path, n=10):
path = bytes(path.encode('utf-8'))
start = time.time()
C.most_common(path, n)
return time.time() - start
def words(text):
return re.findall(r'\w+', text.lower())
def benchmark(path, n=10):
start = time.time()
with open(path) as fp:
Counter(words(fp.read())).most_common(n)
return time.time() - start
if __name__ == "__main__":
time_rs = time_py = 0
base_dir = 'data'
for f in os.listdir(base_dir):
path = os.path.join(base_dir, f)
time_rs += benchmark_rs(path)
time_py += benchmark(path)
print("Time in rust: %s" % time_rs)
print("Time in python: %s" % time_py)
[package]
name = "words"
version = "0.1.0"
authors = ["Pradip Caulagi <caulagi@gmail.com>"]
[dependencies]
libc = "*"
[lib]
name = "counter"
crate-type = ["dylib"]
extern crate libc;
use std::clone::Clone;
use std::collections::HashMap;
use std::ffi::CStr;
use std::fs::File;
use std::hash::Hash;
use std::io::Read;
use std::str;
use libc::c_char;
#[no_mangle]
pub extern "C" fn most_common(c_buf: *const c_char, n: i32) -> i32 {
let buf = unsafe { CStr::from_ptr(c_buf).to_bytes() };
let path = str::from_utf8(buf).unwrap();
bucketize_words(path, n as usize)[0].1 as i32
}
/// Read the file indicated by path and return the most common
/// words in the file along with number of occurences
fn bucketize_words(path: &str, n: usize) -> Vec<(String, usize)> {
let mut f = File::open(path).unwrap();
let mut data = String::new();
f.read_to_string(&mut data).unwrap();
let mut bag = HashMap::new();
for item in data.split_whitespace() {
let count = bag.entry(item.to_lowercase()).or_insert(0);
*count += 1;
}
n_most_common(bag, n)
}
/// Find the most common words in the bag based on number of occurrences
fn n_most_common<T>(bag: HashMap<T, usize>, n: usize) -> Vec<(T, usize)>
where T: Eq + Hash + Clone
{
let mut count_vec: Vec<_> = bag.into_iter().collect();
count_vec.sort_by(|a, b| b.1.cmp(&a.1));
count_vec.truncate(n);
count_vec
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment