From 8461b31642e41ced0d53e9ec94b698c051753e5e Mon Sep 17 00:00:00 2001 From: Laura Hausmann Date: Tue, 16 May 2023 17:33:37 +0200 Subject: [PATCH] Add scripts --- cleanup-media.zsh | 22 ++++++++++++++++++++++ cleanup-stale-minio-files.zsh | 8 ++++++++ fix-broken-media-cache-links.zsh | 11 +++++++++++ 3 files changed, 41 insertions(+) create mode 100755 cleanup-media.zsh create mode 100755 cleanup-stale-minio-files.zsh create mode 100755 fix-broken-media-cache-links.zsh diff --git a/cleanup-media.zsh b/cleanup-media.zsh new file mode 100755 index 0000000..ee41803 --- /dev/null +++ b/cleanup-media.zsh @@ -0,0 +1,22 @@ +#!/bin/zsh + +date=$(date -d "-30 days" "+%F %T.000") + +# Get all files so we can delete them later +rows=$(sudo -iu postgres psql -d calckey -c "SELECT \"url\", \"thumbnailUrl\" FROM \"public\".\"drive_file\" WHERE \"isLink\" = 'FALSE' AND \"storedInternal\" = 'FALSE' AND \"userHost\" IS NOT NULL AND \"userHost\" <> 'estrogen.network' AND \"createdAt\" < '$date' AND NOT EXISTS (SELECT 1 FROM \"user\" WHERE \"user\".\"avatarId\" = \"drive_file\".\"id\") AND NOT EXISTS (SELECT 1 FROM \"user\" WHERE \"user\".\"bannerId\" = \"drive_file\".\"id\")" -t -A --csv) +files= + +for row (${(f)rows}) do + parts=("${(@s:,:)row}") + files+="${parts[1]}\n" + if [[ -n ${parts[2]} ]]; then + files+="${parts[2]}\n" + fi +done + +# Replace file URLs with the remote URI +cleanup_query="UPDATE \"public\".\"drive_file\" SET \"isLink\" = 'TRUE', \"thumbnailUrl\" = NULL, \"url\" = \"uri\", \"accessKey\" = replace(\"accessKey\", 'drive/', ''), \"thumbnailAccessKey\" = replace(\"thumbnailAccessKey\", 'drive/', '') WHERE \"isLink\" = 'FALSE' AND \"storedInternal\" = 'FALSE' AND \"userHost\" IS NOT NULL AND \"userHost\" <> 'estrogen.network' AND \"createdAt\" < '$date' AND NOT EXISTS (SELECT 1 FROM \"user\" WHERE \"user\".\"avatarId\" = \"drive_file\".\"id\") AND NOT EXISTS (SELECT 1 FROM \"user\" WHERE \"user\".\"bannerId\" = \"drive_file\".\"id\")" +echo "$cleanup_query" | sudo -iu postgres psql -d calckey + +# Cleanup object storage +echo "$files" | awk NF | sed 's/https:\/\/e2net.cdn.ztn.sh/garage\/e2net/g' | grep -P '\/e2net\/drive\/.{16,}' | mcli rm --stdin --force diff --git a/cleanup-stale-minio-files.zsh b/cleanup-stale-minio-files.zsh new file mode 100755 index 0000000..99669a4 --- /dev/null +++ b/cleanup-stale-minio-files.zsh @@ -0,0 +1,8 @@ +#!/bin/zsh + +# Get all files so we can delete them later +files=$(sudo -iu postgres psql -d calckey -c "SELECT \"url\" FROM \"public\".\"drive_file\" WHERE \"isLink\" = 'FALSE' AND \"storedInternal\" = 'FALSE' UNION ALL SELECT \"thumbnailUrl\" FROM \"public\".\"drive_file\" WHERE \"isLink\" = 'FALSE' AND \"storedInternal\" = 'FALSE'" -t -A --csv | sed 's/https:\/\/e2net.cdn.ztn.sh\/drive\///g' | awk NF | sort) + +miniofiles=$(mcli ls garage/e2net/drive | grep -Po '[^\s]*$' | grep -Pv '^webpublic-' | sort) + +comm -23 <(echo "$miniofiles") <(echo "$files") | sed 's/^/garage\/e2net\/drive\//g' | grep -P '\/e2net\/drive\/.{16,}' | mcli rm --stdin --force diff --git a/fix-broken-media-cache-links.zsh b/fix-broken-media-cache-links.zsh new file mode 100755 index 0000000..5095b27 --- /dev/null +++ b/fix-broken-media-cache-links.zsh @@ -0,0 +1,11 @@ +#!/bin/zsh + +# Get all files so we can delete them later +files=$(sudo -iu postgres psql -d calckey -c "SELECT \"url\" FROM \"public\".\"drive_file\" WHERE \"isLink\" = 'FALSE' AND \"storedInternal\" = 'FALSE' AND \"userHost\" IS NOT NULL AND \"userHost\" <> 'estrogen.network'" -t -A --csv | sed 's/https:\/\/e2net.cdn.ztn.sh\/drive\///g' | awk NF | sort) +miniofiles=$(mcli ls garage/e2net/drive | grep -Po '[^\s]*$' | grep -Pv '^webpublic-' | sort) +deadlinks=$(comm -13 <(echo "$miniofiles") <(echo "$files") | grep -P '.{16,}' | sed 's/^/https:\/\/e2net.cdn.ztn.sh\/drive\//g') + +while IFS= read -r file ; do + cleanup_query="UPDATE \"public\".\"drive_file\" SET \"isLink\" = 'TRUE', \"thumbnailUrl\" = NULL, \"url\" = \"uri\", \"accessKey\" = replace(\"accessKey\", 'drive/', ''), \"thumbnailAccessKey\" = replace(\"thumbnailAccessKey\", 'drive/', '') WHERE \"url\" = '$file'" + echo "$cleanup_query" | sudo -iu postgres psql -d calckey +done <<< "$deadlinks"