penpot/backend/src/app/tasks/file_media_gc.clj
2022-02-12 16:08:46 +01:00

140 lines
4.2 KiB
Clojure

;; This Source Code Form is subject to the terms of the Mozilla Public
;; License, v. 2.0. If a copy of the MPL was not distributed with this
;; file, You can obtain one at http://mozilla.org/MPL/2.0/.
;;
;; Copyright (c) UXBOX Labs SL
(ns app.tasks.file-media-gc
"A maintenance task that is responsible to purge the unused media
objects from files. A file is eligible to be garbage collected
after some period of inactivity (the default threshold is 72h)."
(:require
[app.common.logging :as l]
[app.common.pages.helpers :as cph]
[app.common.pages.migrations :as pmg]
[app.db :as db]
[app.util.blob :as blob]
[app.util.time :as dt]
[clojure.spec.alpha :as s]
[integrant.core :as ig]))
(declare process-file)
(declare retrieve-candidates)
(s/def ::max-age ::dt/duration)
(defmethod ig/pre-init-spec ::handler [_]
(s/keys :req-un [::db/pool ::max-age]))
(defmethod ig/init-key ::handler
[_ {:keys [pool] :as cfg}]
(fn [_]
(db/with-atomic [conn pool]
(let [cfg (assoc cfg :conn conn)]
(loop [n 0]
(let [files (retrieve-candidates cfg)]
(if (seq files)
(do
(run! (partial process-file cfg) files)
(recur (+ n (count files))))
(do
(l/debug :msg "finished processing files" :processed n)
{:processed n}))))))))
(def ^:private
sql:retrieve-candidates-chunk
"select f.id,
f.data,
extract(epoch from (now() - f.modified_at))::bigint as age
from file as f
where f.has_media_trimmed is false
and f.modified_at < now() - ?::interval
order by f.modified_at asc
limit 10
for update skip locked")
(defn- retrieve-candidates
[{:keys [conn max-age] :as cfg}]
(let [interval (db/interval max-age)]
(->> (db/exec! conn [sql:retrieve-candidates-chunk interval])
(mapv (fn [{:keys [age] :as row}]
(assoc row :age (dt/duration {:seconds age})))))))
(def ^:private
collect-media-xf
(comp
(map :objects)
(mapcat vals)
(keep (fn [{:keys [type] :as obj}]
(case type
:path (get-in obj [:fill-image :id])
:image (get-in obj [:metadata :id])
nil)))))
(defn- collect-used-media
[data]
(let [pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(-> #{}
(into collect-media-xf pages)
(into (keys (:media data))))))
(def ^:private
collect-frames-xf
(comp
(map :objects)
(mapcat vals)
(filter cph/frame-shape?)
(keep :id)))
(defn- collect-frames
[data]
(let [pages (concat
(vals (:pages-index data))
(vals (:components data)))]
(into #{} collect-frames-xf pages)))
(defn- process-file
[{:keys [conn] :as cfg} {:keys [id data age] :as file}]
(let [data (-> (blob/decode data)
(assoc :id id)
(pmg/migrate-data))]
(let [used (collect-used-media data)
unused (->> (db/query conn :file-media-object {:file-id id})
(remove #(contains? used (:id %))))]
(l/debug :action "processing file"
:id id
:age age
:to-delete (count unused))
;; Mark file as trimmed
(db/update! conn :file
{:has-media-trimmed true}
{:id id})
(doseq [mobj unused]
(l/debug :action "deleting media object"
:id (:id mobj)
:media-id (:media-id mobj)
:thumbnail-id (:thumbnail-id mobj))
;; NOTE: deleting the file-media-object in the database
;; automatically marks as touched the referenced storage
;; objects. The touch mechanism is needed because many files can
;; point to the same storage objects and we can't just delete
;; them.
(db/delete! conn :file-media-object {:id (:id mobj)})))
(let [sql (str "delete from file_frame_thumbnail "
" where file_id = ? and not (frame_id = ANY(?))")
ids (->> (collect-frames data)
(db/create-array conn "uuid"))]
;; delete the unused frame thumbnails
(db/exec! conn [sql (:id file) ids]))
nil))