From 30ea4ad79d8983f27232ac25781ae1336fc03320 Mon Sep 17 00:00:00 2001 From: Naomi Carrigan Date: Tue, 30 Dec 2025 18:17:27 -0800 Subject: [PATCH] feat: more s3 scripts --- src/s3/bulkUpload.ts | 213 ++++++++++++++++++++++++++++ src/s3/correctContentType.ts | 259 +++++++++++++++++++++++++++++++++++ 2 files changed, 472 insertions(+) create mode 100644 src/s3/bulkUpload.ts create mode 100644 src/s3/correctContentType.ts diff --git a/src/s3/bulkUpload.ts b/src/s3/bulkUpload.ts new file mode 100644 index 0000000..ce9b45c --- /dev/null +++ b/src/s3/bulkUpload.ts @@ -0,0 +1,213 @@ +/** + * @copyright NHCarrigan + * @license Naomi's Public License + * @author Naomi Carrigan + */ +import { readFile, readdir } from "node:fs/promises"; +import { join, relative } from "node:path"; +import { S3Client, PutObjectCommand } from "@aws-sdk/client-s3"; +import { confirm } from "@inquirer/prompts"; +import { SingleBar, Presets } from "cli-progress"; + +const accessKeyId = process.env.AWS_ACCESS_KEY_ID; +const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; + +if (accessKeyId === undefined || secretAccessKey === undefined) { + throw new Error("AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY is not set"); +} + +const dataDirectory = join(import.meta.dirname, "..", "..", "data"); + +/** + * Recursively gets all files in a directory. + * @param directory - The directory to scan. + * @param baseDirectory - The base directory for relative paths. + * @returns An array of file paths relative to baseDirectory. + */ +const getAllFiles = async( + directory: string, + baseDirectory: string, +): Promise> => { + const files: Array = []; + const entries = await readdir(directory, { withFileTypes: true }); + + for (const entry of entries) { + const fullPath = join(directory, entry.name); + const relativePath = relative(baseDirectory, fullPath); + + if (entry.isDirectory()) { + const subFiles = await getAllFiles(fullPath, baseDirectory); + files.push(...subFiles); + } else if (entry.isFile()) { + files.push(relativePath); + } + } + + return files; +}; + +/** + * Type guard to check if a value is a record. + * @param value - The value to check. + * @returns Whether the value is a record. + */ +const isRecord = (value: unknown): value is Record => { + return typeof value === "object" && value !== null && !Array.isArray(value); +}; + +/** + * Formats a tree node into a string representation. + * @param node - The tree node to format. + * @param prefix - The prefix for the current level. + * @param _isLast - Whether this is the last entry (unused but kept for API consistency). + * @returns The formatted tree string. + */ +const formatTree = ( + node: Record, + prefix = "", + _isLast = true, +): string => { + const entries = Object.entries(node).sort(([ a ], [ b ]) => { + const aIsDirectory = typeof node[a] === "object" && node[a] !== null; + const bIsDirectory = typeof node[b] === "object" && node[b] !== null; + + // Directories come first + if (aIsDirectory && !bIsDirectory) { + return -1; + } + if (!aIsDirectory && bIsDirectory) { + return 1; + } + return a.localeCompare(b); + }); + + let result = ""; + + for (let index = 0; index < entries.length; index = index + 1) { + const entry = entries[index]; + if (entry === undefined) { + continue; + } + const [ name, value ] = entry; + const isLastEntry = index === entries.length - 1; + const connector = isLastEntry + ? "└── " + : "├── "; + const nextPrefix = isLastEntry + ? " " + : "│ "; + + result = `${result}${prefix}${connector}${name}\n`; + + if (isRecord(value)) { + const subTree = formatTree( + value, + `${prefix}${nextPrefix}`, + isLastEntry, + ); + result = `${result}${subTree}`; + } + } + + return result; +}; + +/** + * Builds a tree structure from file paths. + * @param files - Array of relative file paths. + * @returns A tree structure as a string. + */ +const buildFileTree = (files: Array): string => { + const tree: Record = {}; + + for (const file of files) { + const parts = file.split("/"); + let current = tree; + + for (let index = 0; index < parts.length; index = index + 1) { + const part = parts[index]; + if (part === undefined) { + continue; + } + if (index === parts.length - 1) { + // Last part is a file + current[part] = null; + } else { + // It's a directory + if (!(part in current) || typeof current[part] !== "object") { + current[part] = {}; + } + const currentValue = current[part]; + if (isRecord(currentValue)) { + current = currentValue; + } + } + } + } + + return formatTree(tree); +}; + +const files = await getAllFiles(dataDirectory, dataDirectory); + +if (files.length === 0) { + console.log("No files found in the data directory."); + process.exit(0); +} + +console.log(`Found ${files.length.toString()} file(s) to upload:\n`); +console.log(buildFileTree(files)); +console.log(`\nTotal: ${files.length.toString()} file(s)\n`); + +const shouldProceed = await confirm({ + default: false, + message: "Do you want to proceed with uploading all these files?", +}); + +if (!shouldProceed) { + console.log("Upload cancelled."); + process.exit(0); +} + +const s3 = new S3Client({ + credentials: { accessKeyId, secretAccessKey }, + endpoint: "https://hel1.your-objectstorage.com", + region: "hel1", +}); + +const bar = new SingleBar({}, Presets.shades_classic); +bar.start(files.length, 0); + +let successCount = 0; +let errorCount = 0; + +for (const file of files) { + try { + const filePath = join(dataDirectory, file); + const fileContent = await readFile(filePath); + + const command = new PutObjectCommand({ + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Body: fileContent, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Bucket: "nhcarrigan", + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Key: file, + }); + + await s3.send(command); + successCount = successCount + 1; + } catch (error) { + console.error(`\nError uploading ${file}:`, error); + errorCount = errorCount + 1; + } + + bar.increment(); +} + +bar.stop(); + +console.log( + `\nUpload complete! ${successCount.toString()} succeeded, ${errorCount.toString()} failed.`, +); + diff --git a/src/s3/correctContentType.ts b/src/s3/correctContentType.ts new file mode 100644 index 0000000..58ef9d3 --- /dev/null +++ b/src/s3/correctContentType.ts @@ -0,0 +1,259 @@ +/** + * @copyright NHCarrigan + * @license Naomi's Public License + * @author Naomi Carrigan + */ +import { extname } from "node:path"; +import { + CopyObjectCommand, + HeadObjectCommand, + ListObjectsV2Command, + type ListObjectsV2CommandOutput, + S3Client, +} from "@aws-sdk/client-s3"; +import { confirm } from "@inquirer/prompts"; + +const accessKeyId = process.env.AWS_ACCESS_KEY_ID; +const secretAccessKey = process.env.AWS_SECRET_ACCESS_KEY; + +if (accessKeyId === undefined || secretAccessKey === undefined) { + throw new Error( + "AWS_ACCESS_KEY_ID or AWS_SECRET_ACCESS_KEY is not set", + ); +} + +const s3 = new S3Client({ + credentials: { accessKeyId, secretAccessKey }, + endpoint: "https://hel1.your-objectstorage.com", + region: "hel1", +}); + +const bucket = "nhcarrigan"; + +/** + * MIME type mapping for file extensions. + */ +/* eslint-disable @typescript-eslint/naming-convention -- File extensions */ +/* eslint-disable stylistic/key-spacing -- Alignment for readability */ +const mimeTypes: Record = { + ".7z": "application/x-7z-compressed", + ".aac": "audio/aac", + ".avi": "video/x-msvideo", + ".bmp": "image/bmp", + ".css": "text/css", + ".csv": "text/csv", + ".doc": "application/msword", + ".docx": + "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + ".eot": "application/vnd.ms-fontobject", + ".flac": "audio/flac", + ".gif": "image/gif", + ".gz": "application/gzip", + ".htm": "text/html", + ".html": "text/html", + ".ico": "image/x-icon", + ".jpeg": "image/jpeg", + ".jpg": "image/jpeg", + ".js": "text/javascript", + ".json": "application/json", + ".md": "text/markdown", + ".mkv": "video/x-matroska", + ".mov": "video/quicktime", + ".mp3": "audio/mpeg", + ".mp4": "video/mp4", + ".ogg": "audio/ogg", + ".otf": "font/otf", + ".pdf": "application/pdf", + ".png": "image/png", + ".ppt": "application/vnd.ms-powerpoint", + ".pptx": + "application/vnd.openxmlformats-officedocument.presentationml.presentation", + ".rar": "application/x-rar-compressed", + ".svg": "image/svg+xml", + ".tar": "application/x-tar", + ".tif": "image/tiff", + ".tiff": "image/tiff", + ".ttf": "font/ttf", + ".txt": "text/plain", + ".wav": "audio/wav", + ".webm": "video/webm", + ".webp": "image/webp", + ".woff": "font/woff", + ".woff2": "font/woff2", + ".xls": "application/vnd.ms-excel", + ".xlsx": + "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", + ".xml": "application/xml", + ".zip": "application/zip", +}; +/* eslint-enable @typescript-eslint/naming-convention -- File extensions */ +/* eslint-enable stylistic/key-spacing -- Alignment for readability */ + +/** + * Gets the MIME type for a file based on its extension. + * @param fileName - The file name or path. + * @returns The MIME type, or undefined if unknown. + */ +const getMimeType = (fileName: string): string | undefined => { + const extension = extname(fileName).toLowerCase(); + return mimeTypes[extension]; +}; + +/** + * Lists all objects in the S3 bucket recursively. + * @returns An array of object keys. + */ +const listAllObjects = async(): Promise> => { + const objects: Array = []; + let continuationToken: string | null = null; + + do { + const command = new ListObjectsV2Command({ + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Bucket: bucket, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + ContinuationToken: continuationToken ?? undefined, + }); + + // eslint-disable-next-line @typescript-eslint/consistent-type-assertions, @typescript-eslint/no-unnecessary-type-assertion -- AWS SDK type inference issue + const response = await s3.send(command) as ListObjectsV2CommandOutput; + + if (response.Contents !== undefined) { + for (const object of response.Contents) { + if (object.Key !== undefined) { + objects.push(object.Key); + } + } + } + + continuationToken = response.NextContinuationToken ?? null; + } while (continuationToken !== null); + + return objects; +}; + +/** + * Gets the content type of an object. + * @param key - The S3 object key to check. + * @returns The content type, or undefined if not found. + */ +const getObjectContentType = async( + key: string, +): Promise => { + const command = new HeadObjectCommand({ + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Bucket: bucket, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Key: key, + }); + + const response = await s3.send(command); + return response.ContentType; +}; + +/** + * Updates the content type of an object. + * @param key - The S3 object key to update. + * @param contentType - The new content type to set. + */ +const updateObjectContentType = async( + key: string, + contentType: string, +): Promise => { + const command = new CopyObjectCommand({ + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Bucket: bucket, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + ContentType: contentType, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + CopySource: `${bucket}/${key}`, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + Key: key, + // eslint-disable-next-line @typescript-eslint/naming-convention -- AWS SDK + MetadataDirective: "REPLACE", + }); + + await s3.send(command); +}; + +console.log("Listing all objects in S3 bucket..."); +const allObjects = await listAllObjects(); +console.log(`Found ${allObjects.length.toString()} object(s) to check.\n`); + +let correctedCount = 0; +let skippedCount = 0; +let errorCount = 0; + +for (const objectKey of allObjects) { + // Skip directory markers (keys ending with /) + if (objectKey.endsWith("/")) { + console.log(`Skipping ${objectKey} (directory marker)`); + skippedCount = skippedCount + 1; + continue; + } + + // eslint-disable-next-line no-useless-assignment -- What? + let currentContentType: string | null = null; + try { + currentContentType = await getObjectContentType(objectKey) ?? null; + } catch (error) { + const errorMessage = error instanceof Error + ? error.message + : String(error); + console.error( + `Error getting content type for ${objectKey}: ${errorMessage}`, + ); + errorCount = errorCount + 1; + continue; + } + + const expectedContentType = getMimeType(objectKey); + + // Skip if we don't know the expected type + if (expectedContentType === undefined) { + console.log(`Skipping ${objectKey} (unknown file type)`); + skippedCount = skippedCount + 1; + continue; + } + + // Check if content type needs correction + const needsCorrection = currentContentType === null + || currentContentType === "application/octet-stream" + || currentContentType !== expectedContentType; + + if (needsCorrection) { + const message = `\nFile: ${objectKey}\nCurrent type: ${ + currentContentType ?? "undefined" + }\nProposed type: ${expectedContentType}\n\nUpdate this file's content type?`; + + const shouldUpdate = await confirm({ + default: true, + message: message, + }); + + if (shouldUpdate) { + try { + await updateObjectContentType(objectKey, expectedContentType); + console.log(`✓ Updated ${objectKey}`); + correctedCount = correctedCount + 1; + } catch (error) { + const errorMessage = error instanceof Error + ? error.message + : String(error); + console.error( + `Error updating ${objectKey}: ${errorMessage}`, + ); + errorCount = errorCount + 1; + } + } else { + console.log(`✗ Skipped ${objectKey}`); + skippedCount = skippedCount + 1; + } + } +} + +console.log( + `\nComplete! ${correctedCount.toString()} file(s) corrected, ${ + skippedCount.toString() + } file(s) skipped, ${errorCount.toString()} error(s).`, +);