Created
April 27, 2012 19:57
-
-
Save L3n41c/2512355 to your computer and use it in GitHub Desktop.
luard
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lenaic@tatooine:~/doc/prog/luard$ head -n 9999 hello*.cpp | |
==> hello0.cpp <== | |
#include <cstdlib> | |
#include <iostream> | |
int main() | |
{ | |
std::cout << "Hello World!" << std::endl; | |
return EXIT_SUCCESS; | |
} | |
==> hello1.cpp <== | |
#include <cstdlib> | |
#include <iostream> | |
int main() | |
{ | |
std::cout << "Hello World!" << std::endl; | |
return EXIT_SUCCESS; | |
} | |
==> hello2.cpp <== | |
#include <cstdlib> | |
#include <iostream> | |
int main() | |
{ | |
std::cout << "Hello World!" << std::endl; | |
return EXIT_SUCCESS; | |
} | |
lenaic@tatooine:~/doc/prog/luard$ ./luard.el hello*.cpp 2> /dev/null | |
hello0.cpp: 0 | |
hello1.cpp: 2 | |
hello2.cpp: 9 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;;; levenshtein.el --- Edit distance between two strings. | |
;; Copyright (C) 2003, 2005 Aaron S. Hawley, Art Taylor | |
;; Author: Aaron S. Hawley <ashawley at uvm dot edu>, | |
;; Art Taylor | |
;; Keywords: lisp | |
;; This file is free software; you can redistribute it and/or modify | |
;; it under the terms of the GNU General Public License as published by | |
;; the Free Software Foundation; either version 2, or (at your option) | |
;; any later version. | |
;; This file is distributed in the hope that it will be useful, | |
;; but WITHOUT ANY WARRANTY; without even the implied warranty of | |
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
;; GNU General Public License for more details. | |
;; You should have received a copy of the GNU General Public License | |
;; along with GNU Emacs; see the file COPYING. If not, write to | |
;; the Free Software Foundation, Inc., 59 Temple Place - Suite 330, | |
;; Boston, MA 02111-1307, USA. | |
;;; Commentary: | |
;; See: http://wikipedia.org/wiki/Levenshtein_distance | |
;;; History: | |
;; Written by Art Taylor on 15 March 2003 in Washington, DC, USA and | |
;; released under the zlib license. Rewritten by Aaron Hawley on 12 | |
;; July 2005 in Burlington, VT, USA and released under the GNU GPL. | |
;; Posted to the EmacsWiki on 12 July 2005. | |
;;; Code: | |
(defun levenshtein-distance (str1 str2) | |
"Return the edit distance between strings STR1 and STR2." | |
(if (not (stringp str1)) | |
(error "Argument was not a string: %s" str1)) | |
(if (not (stringp str2)) | |
(error "Argument was not a string: %s" str2)) | |
(let* ((make-table ;; Multi-dimensional array object. | |
(function | |
(lambda (columns rows init) | |
(make-vector rows (make-vector columns init))))) | |
(tref ;; Table access method. | |
(function | |
(lambda (table x y) | |
(aref (aref table y) x)))) | |
(tset ;; Table write method. | |
(function | |
(lambda | |
(table x y object) | |
(let ((row (copy-sequence (aref table y)))) | |
(aset row x object) | |
(aset table y row) | |
object)))) | |
;; End table code. | |
(length-str1 (length str1)) | |
(length-str2 (length str2)) | |
;; d is a table with lenStr2+1 rows and lenStr2+1 columns | |
(d (funcall make-table (1+ length-str1) (1+ length-str2) | |
0))) ;; Initialize to zero. | |
;; i and j are used to iterate over str1 and str2 | |
(let ((i 0) | |
(j 0)) | |
(while (<= i length-str1) ;; for i from 0 to lenStr1 | |
(funcall tset d i 0 i) ;; d[i, 0] := i | |
(setq i (1+ i))) ;; i++ | |
(while (<= j length-str2) ;; for j from 0 to lenStr2 | |
(funcall tset d 0 j j) ;; d[0, j] := j | |
(setq j (1+ j)))) ;; j++ | |
(let ((i 1)) | |
(while (<= i length-str1) ;; for i from 1 to lenStr1 | |
(let ((j 1)) | |
(while (<= j length-str2) ;; for j from 1 to lenStr2 | |
(let* ((cost | |
;; if str[i] = str[j] then cost:= 0 else cost := 1 | |
(if (equal (aref str1 (1- i)) (aref str2 (1- j))) | |
0 | |
1)) | |
;; d[i-1, j] + 1 // deletion | |
(deletion (1+ (funcall tref d (1- i) j))) | |
;; d[i, j-1] + 1 // insertion | |
(insertion (1+ (funcall tref d i (1- j)))) | |
;; d[i-j,j-1] + cost // substitution | |
(substitution | |
(+ (funcall tref d (1- i) (1- j)) cost))) | |
(funcall tset d i j ;; d[i,j] := minimum( | |
(min insertion deletion substitution))) | |
(setq j (1+ j)))) ;; j++ | |
(setq i (1+ i)))) ;; i++ | |
;; return d[lenStr1, lenStr2] | |
(funcall tref d length-str1 length-str2))) | |
(provide 'levenshtein) | |
;;; levenshtein.el ends here |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/emacs --script | |
(add-to-list 'load-path (file-name-directory (elt command-line-args 2))) | |
(require 'levenshtein) | |
(defun luard (file) | |
(find-file file) | |
(setq original (buffer-string)) | |
(whitespace-cleanup) | |
(indent-region (point-min) (point-max)) | |
(setq proper (buffer-string)) | |
(princ (format "%s: %d\n" file (levenshtein-distance original proper))) | |
(kill-buffer)) | |
(mapc 'luard argv) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Disclaimer: I didn't think this would be an appropriate measurement in the first place. But abstracting that...
This looks like an unadjusted array boolean difference, where a single space difference near the beginning adds up O(filesize) the the luard count. Discounting the natural bias towards space indentation for not-so-good reasons (more repeated characters on average than with tab indentation), it's likely to be dominated by file size no matter what.
An edit distance, e.g. Levenshtein distance could be both easier to implement[*] and more relevant.
[*] I abide by the downtrending opinion that using a publicly available library is better than reinventing the wheel. But please don't quote me on this in a corporate context ;-)