Find duplicates across Google Sheets
/** Multisheet Duplicate Finder ** | |
To use, paste this code into a Google Spreadsheets Apps Script (Tools - Script editor) | |
Loops through all of the sheets in the current spreadsheet. | |
Identifies duplicates in the chosen column. | |
Skips the chosen number of header rows. | |
Creates and alert box listing the duplicates, and colors the duplicate cells red. | |
The directive below limits the script to only be able to access this spreadsheet. | |
* @OnlyCurrentDoc | |
*/ | |
function findDuplicatesAcrossSheets() { | |
// Set the following variables to change the script's behavior | |
const COLUMN_TO_CHECK = 8; // A=1, B=2, etc. | |
const HEADER_ROWS = 2; // script will skip this number of rows | |
dupeList = []; // an array to fill with duplicates | |
urlLocs = {}; // track which sheet(s) contain a url | |
var ss = SpreadsheetApp.getActiveSpreadsheet(); | |
var sheets = ss.getSheets(); | |
for (var i = 0; i < sheets.length; i++) { | |
numRows = sheets[i].getLastRow(); | |
if (numRows > HEADER_ROWS) { | |
sheetName = sheets[i].getName(); | |
var data = sheets[i].getRange(HEADER_ROWS+1, COLUMN_TO_CHECK, numRows-HEADER_ROWS, 1).getValues(); | |
for (index in data) { | |
row = parseInt(index) + HEADER_ROWS + 1; | |
var url = data[index][0]; | |
if (url == "") {continue;} // ignore empty url cells | |
// We know we are looking at URLs, so we will do some cleanup specific to that type of data | |
url = url.toLowerCase(); // convert to lowercase | |
url = url.replace("https://", ""); // remove "https://" | |
url = url.replace("http://", ""); // remove "http://" | |
url = url.replace(/^www\./, ""); // remove leading "www." | |
url = url.replace(/\?.*/, ""); // remove "?" and anything following | |
url = url.replace(/\/$/, ""); // remove trailing "/" | |
if (urlLocs.hasOwnProperty(url)) { | |
dupeList.push("duplicate: " + url + " in sheet " + sheetName + " and sheet " + urlLocs[url].sheet); | |
sheets[i].getRange(row,COLUMN_TO_CHECK,1,1).setBackground("red"); | |
ss.getSheetByName(urlLocs[url].sheet).getRange(urlLocs[url].row,COLUMN_TO_CHECK,1,1).setBackground("red"); | |
} | |
urlLocs[url] = {sheet: sheetName, row: row}; | |
} | |
} | |
} | |
if (dupeList.length > 0) { | |
Browser.msgBox(dupeList.join("\\n")); | |
} else { | |
Browser.msgBox("No duplicates found") | |
} | |
} | |
/** | |
* Adds a custom menu to the active spreadsheet | |
*/ | |
function onOpen() { | |
var sheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var entries = [{ | |
name : "Find Duplicates Across Sheets", | |
functionName : "findDuplicatesAcrossSheets" | |
}]; | |
sheet.addMenu("My Scripts", entries); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment