Skip to content

Instantly share code, notes, and snippets.

@ksurent
Last active July 19, 2023 19:23
Show Gist options
  • Save ksurent/d68a14c223c2cee9caac16ce0e0ebc46 to your computer and use it in GitHub Desktop.
Save ksurent/d68a14c223c2cee9caac16ce0e0ebc46 to your computer and use it in GitHub Desktop.
A silly program that finds CPAN modules that could be deliberate typos. See https://threatpost.com/attackers-use-typo-squatting-to-steal-npm-credentials/127235/.
// Download index file: wget https://www.cpan.org/modules/02packages.details.txt.
use std::{
collections::HashSet,
fmt::Display,
fs::File,
io::{self, BufReader},
};
fn main() -> io::Result<()> {
let fh = File::open("02packages.details.txt")?;
let rd = BufReader::new(fh);
let modules = read_index(rd)?;
let candidates = find_squatters(modules.as_slice());
for c in candidates {
println!("{}", c);
}
Ok(())
}
#[derive(PartialEq, Debug)]
struct Module {
name: String,
author: String,
}
#[derive(PartialEq, Debug)]
struct Candidate<'a> {
left: &'a Module,
right: &'a Module,
distance: f32,
}
impl<'a> Display for Candidate<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(
f,
"{} {} {} {} {}",
self.left.name, self.left.author, self.right.name, self.right.author, self.distance
)
}
}
fn read_index<'a, R: io::BufRead>(rd: R) -> io::Result<Vec<Module>> {
const HEADER_AND_EMPTY_LINE: usize = 9;
const MOD_NAME: usize = 0;
const MOD_PATH: usize = 2;
let skip_prefixes = [
"Google::Ads::AdWords",
"Acme::CPANModulesBundle::Import::PerlAdvent",
"Acme::CPANModules::Import::PerlDancerAdvent",
"Acme::CPANModules::Import::PerlAdvent",
"Acme::CPANModules::Import::MojoliciousAdvent",
];
let mut modules = Vec::new();
let mut lines = rd.lines().skip(HEADER_AND_EMPTY_LINE);
loop {
match lines.next() {
Some(Ok(l)) => {
let pieces = l.split_ascii_whitespace().collect::<Vec<_>>();
assert_eq!(pieces.len(), 3);
let name = pieces[MOD_NAME];
if skip_prefixes.iter().any(|&x| name.starts_with(x)) {
continue;
}
let path = pieces[MOD_PATH];
let cutoff = path.rfind("/").unwrap_or(path.len());
modules.push(Module {
name: name.to_string(),
author: path[..cutoff].to_string(),
});
}
Some(Err(e)) => return Err(e),
None => break,
}
}
Ok(modules)
}
fn find_squatters<'a>(modules: &'a [Module]) -> Vec<Candidate<'a>> {
let mut seen = HashSet::with_capacity(modules.len());
let mut candidates = Vec::with_capacity(modules.len());
let kbd = Keyboard::new();
for m1 in modules.iter() {
let b1 = m1.name.as_bytes();
for m2 in modules.iter() {
if m1.name == m2.name || m1.author == m2.author {
continue;
}
let b2 = m2.name.as_bytes();
let distance = kbd.distance(b1, b2);
if distance < 2f32 && !seen.contains(b2) {
candidates.push(Candidate {
left: m1,
right: m2,
distance,
});
seen.insert(b1);
seen.insert(b2);
}
}
}
candidates
}
#[derive(Copy, Clone)]
struct Key {
x: u8,
y: u8,
}
impl Key {
fn distance(&self, other: Key) -> f32 {
let dx = other.x as f32 - self.x as f32;
let dy = other.y as f32 - self.y as f32;
(dx * dx + dy * dy).sqrt()
}
}
impl From<usize> for Key {
fn from(pos: usize) -> Self {
let x = (pos % 10) as u8;
let y = (pos / 10) as u8;
Self { x, y }
}
}
#[rustfmt::skip]
static LAYOUT: [u8; 40] = [
b'1', b'2', b'3', b'4', b'5', b'6', b'7', b'8', b'9', b'0',
b'q', b'w', b'e', b'r', b't', b'y', b'u', b'i', b'o', b'p',
b'a', b's', b'd', b'f', b'g', b'h', b'j', b'k', b'l', b';',
b'z', b'x', b'c', b'v', b'b', b'n', b'm', b',', b'.', b'/',
];
#[rustfmt::skip]
static LAYOUT_SHIFTED: [u8; 40] = [
b'!', b'@', b'#', b'$', b'%', b'^', b'&', b'*', b'(', b')',
b'Q', b'W', b'E', b'R', b'T', b'Y', b'U', b'I', b'O', b'P',
b'A', b'S', b'D', b'F', b'G', b'H', b'J', b'K', b'L', b':',
b'Z', b'X', b'C', b'V', b'B', b'N', b'M', b'<', b'>', b'?',
];
struct Keyboard {
keys: [Key; 255],
}
impl Keyboard {
fn new() -> Keyboard {
let mut keys = [Key { x: 0, y: 0 }; 255];
for (i, ch) in LAYOUT.iter().copied().enumerate() {
keys[ch as usize] = Key::from(i);
}
for (i, ch) in LAYOUT_SHIFTED.iter().copied().enumerate() {
keys[ch as usize] = Key::from(i);
}
Keyboard { keys }
}
#[inline]
fn as_key(&self, c: u8) -> Key {
unsafe { *self.keys.get_unchecked(c as usize) }
}
#[inline]
fn key_distance(&self, c1: u8, c2: u8) -> f32 {
if c1 == c2 {
return 0f32;
}
self.as_key(c1).distance(self.as_key(c2))
}
fn distance(&self, s1: &[u8], s2: &[u8]) -> f32 {
if s1.len() != s2.len() {
return std::f32::MAX;
}
if s1 == s2 {
return 0f32;
}
let mut dist = 0f32;
for (i, _) in s1.iter().enumerate() {
dist += self.key_distance(s1[i], s2[i]);
}
dist
// s1.iter()
// .copied()
// .enumerate()
// .fold(0f32, |acc, (i, c1)| acc + self.char_distance(c1, s2[i]))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_keyboard_distance() {
let kbd = Keyboard::new();
assert_eq!(
kbd.distance("fooo".as_bytes(), "foo".as_bytes()),
std::f32::MAX,
"extra letter left"
);
assert_eq!(
kbd.distance("foo".as_bytes(), "fooo".as_bytes()),
std::f32::MAX,
"extra letter right"
);
assert_eq!(
kbd.distance("foo".as_bytes(), "foo".as_bytes()),
0f32,
"same"
);
assert_eq!(
kbd.distance("foo".as_bytes(), "FOO".as_bytes()),
0f32,
"same but shifted"
);
assert_eq!(
kbd.distance("foo".as_bytes(), "fop".as_bytes()),
1f32,
"typo squat"
);
assert_eq!(
kbd.distance("foo".as_bytes(), "foP".as_bytes()),
1f32,
"typo squat shifted"
);
}
#[test]
fn test_key_distance() {
assert_eq!(
(Key { x: 0, y: 0 }).distance(Key { x: 0, y: 0 }),
0f32,
"same"
);
assert_eq!(
(Key { x: 0, y: 0 }).distance(Key { x: 1, y: 0 }),
1f32,
"one to the right"
);
let distance = (Key { x: 0, y: 0 }).distance(Key { x: 1, y: 1 });
let expected = 1.414214f32;
let epsilon = 0.0001f32;
assert!(
(distance - expected).abs() < epsilon,
"one to the right diagonally"
);
}
#[test]
fn test_read_from_file() {
let lines = r#"
File: 02packages.details.txt
URL: http://www.perl.com/CPAN/modules/02packages.details.txt
Description: Package names found in directory $CPAN/authors/id/
Columns: package name, version, path
Intended-For: Automated fetch routines, namespace documentation.
Written-By: PAUSE version 1.005
Line-Count: 42
Last-Updated: Tue, 07 Jul 2020 12:56:01 GMT
A 0.01 A/AA/AAA/A-0.01.tar.gz
B 0.02 B/BB/BBB/B-0.02.tar.gz
C undef C/CC/CCC/C-0.03.tar.gz
"#;
let modules = read_index(lines.trim().as_bytes()).expect("parse");
assert_eq!(
modules,
Vec::from([
Module {
name: "A".to_string(),
author: "A/AA/AAA".to_string(),
},
Module {
name: "B".to_string(),
author: "B/BB/BBB".to_string(),
},
Module {
name: "C".to_string(),
author: "C/CC/CCC".to_string(),
},
]),
"correct modules",
)
}
#[test]
fn test_find_squatters() {
let modules = [
Module {
name: "Important::Stuff".to_string(),
author: "A/AA/AAA".to_string(),
},
Module {
name: "Imp0rtant::Stuff".to_string(),
author: "B/BB/BBB".to_string(),
},
Module {
name: "Acme::Whatever".to_string(),
author: "C/CC/CCC".to_string(),
},
];
let candidates = find_squatters(&modules);
assert_eq!(
candidates,
Vec::from([Candidate {
left: &modules[0],
right: &modules[1],
distance: 1.4142135,
}])
);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment