Created
January 5, 2018 16:04
-
-
Save robmiller/241ff8c5d11a3c2ba284d0659e320691 to your computer and use it in GitHub Desktop.
A Ruby implementation of SMOG (Simple Measure of Gobbledygook) scoring for measuring text complexity.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Calculates the SMOG (Simple Measure of Gobbledygook) score for a given | |
# piece of text. A CLI utility that could very easily be adapted into | |
# a library if I could be be bothered. | |
# | |
# More on SMOG: https://en.wikipedia.org/wiki/SMOG | |
# | |
# Author: Rob Miller <r@robm.me.uk> | |
# | |
# Usage: | |
# | |
# $ ./smog path/to/file.txt | |
# SMOG score: 15.34 | |
# Sentences: 14 | |
# Words: 263 | |
# Polysyllables: 64 | |
class Smog | |
def initialize(text) | |
@text = text | |
end | |
def score | |
(3.1291 + 1.043 * Math.sqrt(num_polysyllables * ( 30 / num_sentences.to_f ))).round(2) | |
end | |
def num_sentences | |
sentences.length | |
end | |
def num_words | |
words.length | |
end | |
def num_polysyllables | |
polysyllables.length | |
end | |
private | |
attr_reader :text | |
def sentences | |
@sentences ||= text.split(/\.|!|\?|\;/).reject { |s| s !~ /\w/ } | |
end | |
def words | |
@words ||= text.split(/\s+|—|–/).reject { |w| w !~ /\w/i }.map { |w| w.gsub(/[\(),]/, "") } | |
end | |
def polysyllables | |
@polysyllables ||= words.find_all { |word| Syllables.new(word).count >= 3 } | |
end | |
# A Ruby port of Greg Fast's Perl original, from the | |
# Lingua::EN::Syllable package: | |
# http://search.cpan.org/dist/Lingua-EN-Syllable/ | |
# | |
# That original is copyright (c) 1999 by Greg Fast and distributed | |
# under the GPL. | |
class Syllables | |
SUBTRACT = [ | |
/cial/, | |
/tia/, | |
/cius/, | |
/cious/, | |
/giu/, | |
/ion/, | |
/iou/, | |
/sia$/, | |
/.ely$/, | |
/[^td]ed$/, | |
] | |
ADD = [ | |
/ia/, | |
/riet/, | |
/dien/, | |
/iu/, | |
/io/, | |
/ii/, | |
/microor/, | |
/[aeiouym]bl$/, | |
/[aeiou]{3}/, | |
/^mc/, | |
/ism$/, | |
/isms$/, | |
/([^aeiouy])\1l$/, | |
/[^l]lien/, | |
/^coa[dglx]./, | |
/[^gq]ua[^auieo]/, | |
/dnt$/, | |
] | |
def initialize(word) | |
@word = word | |
end | |
def count | |
word = @word.dup.downcase | |
return 2 if word == "w" | |
return 1 if word.length == 1 | |
word.gsub!("'", "") | |
word.gsub!(/e$/, "") | |
scrugg = word.split(/[^aeiouy]+/).reject { |s| s.nil? || s == "" } | |
syl = 0 | |
SUBTRACT.each do |pattern| | |
syl -= 1 if word =~ pattern | |
end | |
ADD.each do |pattern| | |
syl += 1 if word =~ pattern | |
end | |
syl += scrugg.length | |
syl = 1 if syl == 0 | |
syl | |
end | |
end | |
end | |
smog = Smog.new(File.read(ARGV[0])) | |
puts "SMOG score: #{smog.score}" | |
puts "Sentences: #{smog.num_sentences}" | |
puts "Words: #{smog.num_words}" | |
puts "Polysyllables: #{smog.num_polysyllables}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment