Created
June 15, 2023 00:04
-
-
Save allquantor/9edd1dc8a754649e79d7cce4a1a721c3 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function createSheetsByPromptId() { | |
// Configurable parameters | |
var minimumFilledEntries = 50; | |
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet(); | |
var sheet = spreadsheet.getActiveSheet(); // get current sheet | |
// get entire data range | |
var data = sheet.getDataRange().getValues(); | |
var header = data[0].map(function(name) { | |
return name.replace(/"/g, '').replace(/\s+/g, ' ').trim(); // Removing the double quotation marks, extra spaces and trimming white spaces | |
}); | |
var promptIdIndex = header.indexOf("prompt_id"); | |
var tweetsNicerIndex = header.indexOf("tweets_nicer"); // index of 'tweets_nicer' column | |
Logger.log("Starting to group data based on prompt_id"); | |
// Group rows by promptId | |
var groupedData = data.slice(1).reduce(function(grouped, row) { | |
var promptId = row[promptIdIndex]; | |
if (!grouped[promptId]) { | |
grouped[promptId] = []; | |
} | |
grouped[promptId].push(row); | |
return grouped; | |
}, {}); | |
Logger.log("Grouped data by prompt_id. Total groups: " + Object.keys(groupedData).length); | |
// For each promptId group, process data and create a new sheet | |
for (var promptId in groupedData) { | |
Logger.log("Processing data for prompt_id: " + promptId); | |
// Initialize statistics for each promptId | |
var initialEntriesCount = groupedData[promptId].length; | |
var mergedColumnsCount = 0; | |
var mergedColumnNames = []; | |
var deletedEmptyColumns = []; | |
var deletedLessThanMinimumColumns = []; | |
var keptColumns = []; | |
var groupData = groupedData[promptId]; | |
// Add header row | |
groupData.unshift(header); | |
// Check if groupData has any entries before attempting to transpose | |
if (groupData.length > 0) { | |
// transpose the data for easier iteration over columns | |
var transposedData = groupData[0].map((_, i) => groupData.map(row => row[i])); | |
Logger.log("Transposed data for easier column operations"); | |
// Merge columns with the same name | |
var mergedData = transposedData.reduce(function(accum, currentCol, index) { | |
var colName = currentCol[0]; | |
if (accum && typeof accum.map === 'object' && accum.map[colName] != null) { | |
// column with this name already exists, merge data | |
var existingColIndex = accum.map[colName]; | |
accum.data[existingColIndex] = accum.data[existingColIndex].map(function(cell, i) { | |
if (i === 0) { | |
return cell; | |
} else { | |
// if both cell and currentCol[i] are strings of integers, keep the larger one | |
if (/^\d+$/.test(cell) && /^\d+$/.test(currentCol[i])) { | |
return Math.max(parseInt(cell), parseInt(currentCol[i])).toString(); | |
} else { | |
return (cell.trim() || currentCol[i].trim()) ? (cell + currentCol[i]).trim() : ''; | |
} | |
} | |
}); | |
mergedColumnsCount++; | |
if (!mergedColumnNames.includes(colName)) { | |
mergedColumnNames.push(colName); | |
} | |
} else { | |
// new column, just add it | |
accum.data.push(currentCol); | |
accum.map[colName] = accum.data.length - 1; | |
} | |
return accum; | |
}, {data: [], map: {}}).data; | |
Logger.log("Merged columns with the same name"); | |
// filter columns with enough filled entries and not 'tweets_nicer' | |
var validColumnsData = mergedData.filter((colData, index) => { | |
var colName = colData[0]; | |
if (colName === header[tweetsNicerIndex]) { | |
return false; | |
} | |
var filledEntriesCount = colData.slice(1).filter(cell => cell !== '' && (typeof cell !== 'string' || cell.trim() !== '')).length; | |
var isEnoughEntries = filledEntriesCount >= minimumFilledEntries; | |
if (!isEnoughEntries) { | |
deletedLessThanMinimumColumns.push(colName); | |
} else { | |
keptColumns.push(colName); | |
} | |
return isEnoughEntries; | |
}); | |
Logger.log("Filtered out columns with less than " + minimumFilledEntries + " entries and 'tweets_nicer'"); | |
// Check if validColumnsData has any entries before attempting to transpose | |
if (validColumnsData.length > 0) { | |
// transpose back to original format | |
var finalData = validColumnsData[0].map((_, i) => validColumnsData.map(row => row[i])); | |
// Create new sheet with final data | |
var newSheet = spreadsheet.insertSheet(promptId); | |
newSheet.getRange(1, 1, finalData.length, finalData[0].length).setValues(finalData); | |
Logger.log("Created new sheet for prompt_id: " + promptId); | |
// Print out statistics for each promptId | |
Logger.log("==================================="); | |
Logger.log("Statistics for promptId: " + promptId); | |
Logger.log("==================================="); | |
Logger.log("Initial entries: " + initialEntriesCount); | |
Logger.log("Number of merged columns: " + mergedColumnsCount); | |
Logger.log("Merged column names: " + mergedColumnNames.join(', ')); | |
Logger.log("Number of empty columns deleted: " + deletedEmptyColumns.length); | |
Logger.log("Deleted columns with less than 50 entries: " + deletedLessThanMinimumColumns.join(', ')); | |
Logger.log("Final columns kept: " + keptColumns.join(', ')); | |
} else { | |
Logger.log(`No valid columns found for prompt_id: ${promptId}`); | |
} | |
} else { | |
Logger.log(`No data found for prompt_id: ${promptId}`); | |
} | |
} | |
Logger.log("New sheets created and data is copied. Process completed"); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment