Skip to content

Instantly share code, notes, and snippets.

@vorotech
Created December 15, 2021 20:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vorotech/d50f82b3ffd2c1328e50e093e969351b to your computer and use it in GitHub Desktop.
Save vorotech/d50f82b3ffd2c1328e50e093e969351b to your computer and use it in GitHub Desktop.
Find duplicates in Google Drive with TypeScript
// Original example was written with Next JS, but can be any framework
// Authorization part is skipped, see the docs of the framework
// Importatnt notice, when authenticating with Google the extended scope
// which includes 'https://www.googleapis.com/auth/drive' should be requested
import { NextApiRequest, NextApiResponse } from 'next'
import jwt from 'next-auth/jwt'
import { drive_v3, google } from 'googleapis'
const secret = process.env.NEXTAUTH_JWT_SECRET
type JwtToken = {
name: string;
email: string;
accessToken: string;
}
type File = {
id?: string;
name?: string;
size?: string;
md5Checksum?: string;
createdTime?: string;
modifiedTime?: string;
parents?: string[];
ownedByMe?: boolean;
quotaBytesUsed?: string;
owners?: drive_v3.Schema$User[];
}
type FileParent = {
id: string;
name: string;
root: boolean;
}
type FileInfo = {
id: string;
parents: FileParent[];
file: File;
}
/**
* Get all files stored with Google Drive excluding trashed
* @param email Owner user email
* @param drive Google Drive client
* @param pageToken Next page token or undefined
* @param files List of files sorted by size
*/
async function listFiles(email: string, drive: drive_v3.Drive, pageToken: string, files: File[]) {
const res = await drive.files.list({
orderBy: 'quotaBytesUsed',
pageSize: 1000,
pageToken,
// https://developers.google.com/drive/api/v3/search-files#node.js
q: `not trashed and '${email}' in owners`,
spaces: 'drive',
// https://developers.google.com/drive/api/v3/reference/files
fields: 'nextPageToken, files(id, name, size, md5Checksum, createdTime, modifiedTime, parents, ownedByMe, quotaBytesUsed)',
});
if (res.data.files) {
files.push(...res.data.files);
}
if (res.data.nextPageToken) {
await listFiles(email, drive, res.data.nextPageToken, files);
}
}
function traverse(fileInfo: FileInfo, filesMap: Map<string, FileInfo>, rootFolder: File): FileParent[] {
const array: FileParent[] = [];
const addToArray = (id: string): FileParent[] => {
let parent = filesMap.get(id);
if(!parent) {
if (id == rootFolder.id) {
// parent is a root folder
parent = { id, file: rootFolder, parents: []};
} else {
// data about the parent was filtered out while performing original query
parent = { id, file: { id, name: '???'}, parents: [] };
}
}
array.push({ id, name: parent.file.name, root: id == rootFolder.id });
if (parent.file.parents) {
// Check if parents were determined for this item or not
if(!parent.parents) {
return addToArray(parent.file.parents[0]);
}
array.push(...parent.parents);
}
return array;
};
return fileInfo.file.parents ? addToArray(fileInfo.file.parents[0]) : array;
}
export default async (req: NextApiRequest, res: NextApiResponse) => {
const jwtToken = (await jwt.getToken({ req, secret, encryption: true })) as JwtToken;
if(!jwtToken) {
return res.status(401).json({ error: 'Unauthorized' });
}
const auth = new google.auth.OAuth2();
auth.setCredentials({ access_token: jwtToken.accessToken });
const drive = google.drive({ version: 'v3', auth });
const files: File[] = [];
let rootFolder: File;
// Get root folder
try {
const res = await drive.files.get({ fileId: 'root' });
rootFolder = res.data;
} catch (error) {
if (error.message == "Invalid Credentials") {
return res.status(401).json({ error: 'Unauthorized' });
}
return res.status(500).json({ error: error.message, details: error.errors })
}
// List files
try {
await listFiles(jwtToken.email, drive, '', files);
} catch (error) {
if (error.message == "Invalid Credentials") {
return res.status(401).json({ error: 'Unauthorized' });
}
return res.status(500).json({ error: error.message, details: error.errors })
}
// Map files by id with empty full path
const filesMap = new Map<string, FileInfo>(files.map(f => [f.id, { id: f.id, file: f, parents: [] }]));
// Group files by md5 checksum
const groups = Array.from(filesMap.values()).reduce(
(m, i) => {
if (i.file.quotaBytesUsed !== "0") {
i.parents = traverse(i, filesMap, rootFolder);
m.set(i.file.md5Checksum, [...m.get(i.file.md5Checksum) || [], i]);
}
return m;
},
new Map<string, FileInfo[]>()
);
// Filter groups with more than one item
const filteredGroups = Array.from(groups.values()).filter((g: FileInfo[]) => g.length > 1);
return res.status(200).json(JSON.stringify(filteredGroups, null, 2));
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment