Skip to content

Instantly share code, notes, and snippets.

@vxgmichel
Last active October 11, 2023 11:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vxgmichel/448f2c88d72ef74d8231e1057280ee0e to your computer and use it in GitHub Desktop.
Save vxgmichel/448f2c88d72ef74d8231e1057280ee0e to your computer and use it in GitHub Desktop.
Comparing rust and python on a specific set intersection problem

Comparing rust and python on a set intersection problem

We have a file with a list of space-separated words on each line, as produced by generate_example.py.

The solution program should output the words that appear at least once on each line, sorted.

Three implementations are compared, one in python and two in rust.

Results:

# Generate the example file
$ python generate_example.py > example.txt

# Compile the rust program with the release profile
$ cargo fmt && cargo clippy && cargo build --release
$ cp target/release/intersection intersection

# Compile the rust program with the release profile and the ahash feature
$ cargo fmt && cargo clippy && cargo build --release --features ahash
$ cp target/release/intersection intersection-ahash

# Time the python execution
$ \time -f "%U s" python intersection.py < example.txt
knight marines pat pg porter reed twain
1.07 s

# Time the rust execution
$ \time -f "%U s" target/release/intersection < example.txt
knight marines pat pg porter reed twain
1.17 s

# Time the rust execution
$ \time -f "%U s" target/release/intersection-ahash < example.txt
knight marines pat pg porter reed twain
0.67 s
[package]
name = "intersection"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[features]
ahash = ["dep:ahash"]
[dependencies]
ahash = { version = "0.8.3", optional = true }
import random
def main():
n = 4000
m = n + 7
k = n
with open("/usr/share/dict/words") as f:
words = f.read().splitlines()
words = [word.lower() for word in words if "'" not in word]
subset = random.sample(words, m)
for _ in range(k):
print(*random.sample(subset, n))
if __name__ == "__main__":
main()
import sys
def main():
intersection = set(input().strip().split())
for line in sys.stdin:
words = line.strip().split()
intersection.intersection_update(words)
print(*sorted(intersection))
if __name__ == "__main__":
main()
use std::io::{self, BufRead};
#[cfg(feature = "ahash")]
use ahash::AHashSet as HashSet;
#[cfg(not(feature = "ahash"))]
use std::collections::HashSet;
fn main() {
let stdin = io::stdin();
let line = stdin.lock().lines().next().unwrap().unwrap();
let mut intersection: HashSet<&str> = line.split_whitespace().collect();
for line in stdin.lock().lines().map(|l| l.unwrap()) {
let current_set: HashSet<&str> = line.split_whitespace().collect();
intersection.retain(|&word| current_set.contains(word));
}
let mut result = intersection.into_iter().collect::<Vec<&str>>();
result.sort();
println!("{}", result.join(" "));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment