Skip to content

Instantly share code, notes, and snippets.

@chawel
Last active June 7, 2018 21:50
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chawel/7e4e5b11544ab5a5ccb13e0a333d23d6 to your computer and use it in GitHub Desktop.
Save chawel/7e4e5b11544ab5a5ccb13e0a333d23d6 to your computer and use it in GitHub Desktop.
Google Spreadsheet script: Finds all similar texts in range (using difflib.js, percentage similarity)
/**
* To use this script You must also add difflib module.
* https://github.com/qiao/difflib.js
* Add to project as difflib.gs minified version:
* https://raw.githubusercontent.com/qiao/difflib.js/master/dist/difflib-browser.js
*/
function onOpen() {
SpreadsheetApp.getUi().createAddonMenu()
.addItem('Copy to new sheet', 'findSimilar')
.addToUi();
}
function isSimilar(left, right, percent) {
var ratio = new difflib.SequenceMatcher(null, String(left), String(right)).quickRatio();
console.log(left + " " + right + " " + ratio);
return ratio > percent;
}
function createNewSheet(name) {
var ss = SpreadsheetApp.getActiveSpreadsheet();
if (ss.getSheetByName(name) == null) {
ss.insertSheet(name);
return name;
} else {
return createNewSheet(name + "(0)");
}
}
function findSimilar() {
var ss = SpreadsheetApp.getActiveSpreadsheet();
var activeSheet = SpreadsheetApp.getActiveSheet();
var selectedRange = activeSheet.getActiveRange();
var uniqueValues = new Array();
var selectedCells = selectedRange.getValues();
var percentage = 0.7
for (var i = 0; i < selectedCells.length; ++i){
var row = selectedCells[i];
var firstValue = row[0];
var duplicate = false;
for (var j = 0; j < uniqueValues.length; ++j){
if (isSimilar(firstValue, uniqueValues[j], percentage)) {
duplicate = true;
break;
}
}
if (!duplicate){
selectedRange.getCell(i + 1, 1).setBackground("red");
uniqueValues.push(firstValue);
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment