Last active
June 7, 2018 21:50
-
-
Save chawel/7e4e5b11544ab5a5ccb13e0a333d23d6 to your computer and use it in GitHub Desktop.
Google Spreadsheet script: Finds all similar texts in range (using difflib.js, percentage similarity)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* To use this script You must also add difflib module. | |
* https://github.com/qiao/difflib.js | |
* Add to project as difflib.gs minified version: | |
* https://raw.githubusercontent.com/qiao/difflib.js/master/dist/difflib-browser.js | |
*/ | |
function onOpen() { | |
SpreadsheetApp.getUi().createAddonMenu() | |
.addItem('Copy to new sheet', 'findSimilar') | |
.addToUi(); | |
} | |
function isSimilar(left, right, percent) { | |
var ratio = new difflib.SequenceMatcher(null, String(left), String(right)).quickRatio(); | |
console.log(left + " " + right + " " + ratio); | |
return ratio > percent; | |
} | |
function createNewSheet(name) { | |
var ss = SpreadsheetApp.getActiveSpreadsheet(); | |
if (ss.getSheetByName(name) == null) { | |
ss.insertSheet(name); | |
return name; | |
} else { | |
return createNewSheet(name + "(0)"); | |
} | |
} | |
function findSimilar() { | |
var ss = SpreadsheetApp.getActiveSpreadsheet(); | |
var activeSheet = SpreadsheetApp.getActiveSheet(); | |
var selectedRange = activeSheet.getActiveRange(); | |
var uniqueValues = new Array(); | |
var selectedCells = selectedRange.getValues(); | |
var percentage = 0.7 | |
for (var i = 0; i < selectedCells.length; ++i){ | |
var row = selectedCells[i]; | |
var firstValue = row[0]; | |
var duplicate = false; | |
for (var j = 0; j < uniqueValues.length; ++j){ | |
if (isSimilar(firstValue, uniqueValues[j], percentage)) { | |
duplicate = true; | |
break; | |
} | |
} | |
if (!duplicate){ | |
selectedRange.getCell(i + 1, 1).setBackground("red"); | |
uniqueValues.push(firstValue); | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment