Skip to content

Instantly share code, notes, and snippets.

@allquantor
Created June 15, 2023 00:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save allquantor/9edd1dc8a754649e79d7cce4a1a721c3 to your computer and use it in GitHub Desktop.
Save allquantor/9edd1dc8a754649e79d7cce4a1a721c3 to your computer and use it in GitHub Desktop.
function createSheetsByPromptId() {
// Configurable parameters
var minimumFilledEntries = 50;
var spreadsheet = SpreadsheetApp.getActiveSpreadsheet();
var sheet = spreadsheet.getActiveSheet(); // get current sheet
// get entire data range
var data = sheet.getDataRange().getValues();
var header = data[0].map(function(name) {
return name.replace(/"/g, '').replace(/\s+/g, ' ').trim(); // Removing the double quotation marks, extra spaces and trimming white spaces
});
var promptIdIndex = header.indexOf("prompt_id");
var tweetsNicerIndex = header.indexOf("tweets_nicer"); // index of 'tweets_nicer' column
Logger.log("Starting to group data based on prompt_id");
// Group rows by promptId
var groupedData = data.slice(1).reduce(function(grouped, row) {
var promptId = row[promptIdIndex];
if (!grouped[promptId]) {
grouped[promptId] = [];
}
grouped[promptId].push(row);
return grouped;
}, {});
Logger.log("Grouped data by prompt_id. Total groups: " + Object.keys(groupedData).length);
// For each promptId group, process data and create a new sheet
for (var promptId in groupedData) {
Logger.log("Processing data for prompt_id: " + promptId);
// Initialize statistics for each promptId
var initialEntriesCount = groupedData[promptId].length;
var mergedColumnsCount = 0;
var mergedColumnNames = [];
var deletedEmptyColumns = [];
var deletedLessThanMinimumColumns = [];
var keptColumns = [];
var groupData = groupedData[promptId];
// Add header row
groupData.unshift(header);
// Check if groupData has any entries before attempting to transpose
if (groupData.length > 0) {
// transpose the data for easier iteration over columns
var transposedData = groupData[0].map((_, i) => groupData.map(row => row[i]));
Logger.log("Transposed data for easier column operations");
// Merge columns with the same name
var mergedData = transposedData.reduce(function(accum, currentCol, index) {
var colName = currentCol[0];
if (accum && typeof accum.map === 'object' && accum.map[colName] != null) {
// column with this name already exists, merge data
var existingColIndex = accum.map[colName];
accum.data[existingColIndex] = accum.data[existingColIndex].map(function(cell, i) {
if (i === 0) {
return cell;
} else {
// if both cell and currentCol[i] are strings of integers, keep the larger one
if (/^\d+$/.test(cell) && /^\d+$/.test(currentCol[i])) {
return Math.max(parseInt(cell), parseInt(currentCol[i])).toString();
} else {
return (cell.trim() || currentCol[i].trim()) ? (cell + currentCol[i]).trim() : '';
}
}
});
mergedColumnsCount++;
if (!mergedColumnNames.includes(colName)) {
mergedColumnNames.push(colName);
}
} else {
// new column, just add it
accum.data.push(currentCol);
accum.map[colName] = accum.data.length - 1;
}
return accum;
}, {data: [], map: {}}).data;
Logger.log("Merged columns with the same name");
// filter columns with enough filled entries and not 'tweets_nicer'
var validColumnsData = mergedData.filter((colData, index) => {
var colName = colData[0];
if (colName === header[tweetsNicerIndex]) {
return false;
}
var filledEntriesCount = colData.slice(1).filter(cell => cell !== '' && (typeof cell !== 'string' || cell.trim() !== '')).length;
var isEnoughEntries = filledEntriesCount >= minimumFilledEntries;
if (!isEnoughEntries) {
deletedLessThanMinimumColumns.push(colName);
} else {
keptColumns.push(colName);
}
return isEnoughEntries;
});
Logger.log("Filtered out columns with less than " + minimumFilledEntries + " entries and 'tweets_nicer'");
// Check if validColumnsData has any entries before attempting to transpose
if (validColumnsData.length > 0) {
// transpose back to original format
var finalData = validColumnsData[0].map((_, i) => validColumnsData.map(row => row[i]));
// Create new sheet with final data
var newSheet = spreadsheet.insertSheet(promptId);
newSheet.getRange(1, 1, finalData.length, finalData[0].length).setValues(finalData);
Logger.log("Created new sheet for prompt_id: " + promptId);
// Print out statistics for each promptId
Logger.log("===================================");
Logger.log("Statistics for promptId: " + promptId);
Logger.log("===================================");
Logger.log("Initial entries: " + initialEntriesCount);
Logger.log("Number of merged columns: " + mergedColumnsCount);
Logger.log("Merged column names: " + mergedColumnNames.join(', '));
Logger.log("Number of empty columns deleted: " + deletedEmptyColumns.length);
Logger.log("Deleted columns with less than 50 entries: " + deletedLessThanMinimumColumns.join(', '));
Logger.log("Final columns kept: " + keptColumns.join(', '));
} else {
Logger.log(`No valid columns found for prompt_id: ${promptId}`);
}
} else {
Logger.log(`No data found for prompt_id: ${promptId}`);
}
}
Logger.log("New sheets created and data is copied. Process completed");
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment