Skip to content

Instantly share code, notes, and snippets.

@karlkranich
Last active May 30, 2021 13:52
Show Gist options
  • Save karlkranich/aa2d21aad7edcfda1f802c7d172e6302 to your computer and use it in GitHub Desktop.
Save karlkranich/aa2d21aad7edcfda1f802c7d172e6302 to your computer and use it in GitHub Desktop.
Find duplicates across Google Sheets
/** Multisheet Duplicate Finder **
To use, paste this code into a Google Spreadsheets Apps Script (Tools - Script editor)
Loops through all of the sheets in the current spreadsheet.
Identifies duplicates in the chosen column.
Skips the chosen number of header rows.
Creates and alert box listing the duplicates, and colors the duplicate cells red.
The directive below limits the script to only be able to access this spreadsheet.
* @OnlyCurrentDoc
*/
function findDuplicatesAcrossSheets() {
// Set the following variables to change the script's behavior
const COLUMN_TO_CHECK = 8; // A=1, B=2, etc.
const HEADER_ROWS = 2; // script will skip this number of rows
dupeList = []; // an array to fill with duplicates
urlLocs = {}; // track which sheet(s) contain a url
var ss = SpreadsheetApp.getActiveSpreadsheet();
var sheets = ss.getSheets();
for (var i = 0; i < sheets.length; i++) {
numRows = sheets[i].getLastRow();
if (numRows > HEADER_ROWS) {
sheetName = sheets[i].getName();
var data = sheets[i].getRange(HEADER_ROWS+1, COLUMN_TO_CHECK, numRows-HEADER_ROWS, 1).getValues();
for (index in data) {
row = parseInt(index) + HEADER_ROWS + 1;
var url = data[index][0];
if (url == "") {continue;} // ignore empty url cells
// We know we are looking at URLs, so we will do some cleanup specific to that type of data
url = url.toLowerCase(); // convert to lowercase
url = url.replace("https://", ""); // remove "https://"
url = url.replace("http://", ""); // remove "http://"
url = url.replace(/^www\./, ""); // remove leading "www."
url = url.replace(/\?.*/, ""); // remove "?" and anything following
url = url.replace(/\/$/, ""); // remove trailing "/"
if (urlLocs.hasOwnProperty(url)) {
dupeList.push("duplicate: " + url + " in sheet " + sheetName + " and sheet " + urlLocs[url].sheet);
sheets[i].getRange(row,COLUMN_TO_CHECK,1,1).setBackground("red");
ss.getSheetByName(urlLocs[url].sheet).getRange(urlLocs[url].row,COLUMN_TO_CHECK,1,1).setBackground("red");
}
urlLocs[url] = {sheet: sheetName, row: row};
}
}
}
if (dupeList.length > 0) {
Browser.msgBox(dupeList.join("\\n"));
} else {
Browser.msgBox("No duplicates found")
}
}
/**
* Adds a custom menu to the active spreadsheet
*/
function onOpen() {
var sheet = SpreadsheetApp.getActiveSpreadsheet();
var entries = [{
name : "Find Duplicates Across Sheets",
functionName : "findDuplicatesAcrossSheets"
}];
sheet.addMenu("My Scripts", entries);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment