From f17fbbf5467e2e91c14327d8962ee4900965cdd4 Mon Sep 17 00:00:00 2001 From: Andrey Antukh Date: Mon, 20 Apr 2026 18:04:38 +0000 Subject: [PATCH] :tada: Add telemetry anonymous event collection When the :telemetry flag is ON and :audit-log is OFF, frontend and backend events are stored anonymously in the audit_log table and shipped in compressed batches by the existing telemetry task. Stored rows strip props and ip-addr but preserve the profile-id, since Penpot profile UUIDs are already anonymous random identifiers with no PII attached. Timestamps are truncated to day precision to avoid leaking exact event timing. Only a safe subset of context fields is preserved: - Backend events: initiator, version, client-version, client-user-agent - Frontend events: browser, os, locale, screen metrics and event-origin Backend (app.loggers.audit): - Store backend telemetry events with source='telemetry', the safe context subset described above, and timestamps truncated to day precision via ct/truncate. Frontend RPC (app.rpc.commands.audit): - Add filter-safe-context to retain only the allowed frontend context fields. - Add xf:map-telemetry-event-row transducer that anonymises frontend events before inserting them. - push-audit-events now accepts events when telemetry is active. Telemetry task (app.tasks.telemetry): - gc-telemetry-events: enforces a 100,000-row safety cap by dropping the oldest rows first. - collect-and-send-audit-events: loop that fetches up to 10,000 rows per iteration, encodes and sends each page, deletes it on success, and stops immediately on failure leaving remaining rows for retry. - send-event-batch: POSTs a fressian+zstd batch (base64-encoded via blob/encode-str) to the telemetry endpoint, including instance-id and profile-id per event. - delete-sent-events: deletes successfully shipped rows by id. Blob utilities (app.util.blob): - Add blob/encode-str and blob/decode-str: convenience wrappers that combine blob encoding with base64 for JSON-safe string transport. Database: - Add index on audit_log (source, created_at ASC) to support efficient queries for telemetry batch collection. Tests (backend-tests.tasks-telemetry-test): - 21 tests, 94 assertions covering all code paths: disabled/enabled telemetry, no-events no-op, happy-path batch send and delete, failure retention, payload anonymity, context stripping, timestamp day precision, batch encoding round-trip, multi-page iteration, GC cap enforcement. Signed-off-by: Andrey Antukh --- backend/src/app/config.clj | 6 + backend/src/app/loggers/audit.clj | 26 +- .../0145-add-audit-log-telemetry-index.sql | 5 + backend/src/app/rpc/commands/audit.clj | 83 ++- backend/src/app/tasks/telemetry.clj | 230 +++++--- backend/src/app/util/blob.clj | 13 + .../backend_tests/tasks_telemetry_test.clj | 518 +++++++++++++++++- 7 files changed, 772 insertions(+), 109 deletions(-) create mode 100644 backend/src/app/migrations/sql/0145-add-audit-log-telemetry-index.sql diff --git a/backend/src/app/config.clj b/backend/src/app/config.clj index d0a80f6515..fae65d3bd8 100644 --- a/backend/src/app/config.clj +++ b/backend/src/app/config.clj @@ -317,6 +317,12 @@ (or (c/get config :file-clean-delay) (ct/duration {:days 2}))) +(def ^:dynamic telemetry-enabled? + "True when telemetry is active, either via the :telemetry feature + flag or the legacy :telemetry-enabled config key." + (or (contains? flags :telemetry) + (c/get config :telemetry-enabled))) + (defn get "A configuration getter. Helps code be more testable." ([key] diff --git a/backend/src/app/loggers/audit.clj b/backend/src/app/loggers/audit.clj index c374b432f9..56051e397e 100644 --- a/backend/src/app/loggers/audit.clj +++ b/backend/src/app/loggers/audit.clj @@ -269,17 +269,33 @@ ;; this case we just retry the operation. (append-audit-entry cfg params)) - (when (and (or (contains? cf/flags :telemetry) - (cf/get :telemetry-enabled)) + (when (and cf/telemetry-enabled? (not (contains? cf/flags :audit-log))) ;; NOTE: this operation may cause primary key conflicts on inserts ;; because of the timestamp precission (two concurrent requests), in ;; this case we just retry the operation. ;; - ;; NOTE: this is only executed when general audit log is disabled - (let [params (-> params + ;; NOTE: this is only executed when general audit log is disabled; + ;; events are stored stripped of props and ip-addr, tagged with + ;; source="telemetry" so the telemetry task can collect and ship + ;; them. The profile-id is preserved (UUIDs are already anonymous + ;; random identifiers). Only a safe subset of context fields is + ;; kept: initiator, version, client-version and client-user-agent. + ;; Timestamps are truncated to day precision to avoid leaking exact + ;; event timing. + (let [tday (ct/truncate tnow :days) + safe-context (-> (:context params {}) + (select-keys [:initiator + :version + :client-version + :client-user-agent])) + params (-> params + (assoc :source "telemetry") (assoc :props {}) - (assoc :context {}))] + (assoc :context safe-context) + (assoc :ip-addr (db/inet "0.0.0.0")) + (assoc :created-at tday) + (assoc :tracked-at tday))] (append-audit-entry cfg params))) (when (and (contains? cf/flags :webhooks) diff --git a/backend/src/app/migrations/sql/0145-add-audit-log-telemetry-index.sql b/backend/src/app/migrations/sql/0145-add-audit-log-telemetry-index.sql new file mode 100644 index 0000000000..12c59a6c83 --- /dev/null +++ b/backend/src/app/migrations/sql/0145-add-audit-log-telemetry-index.sql @@ -0,0 +1,5 @@ +-- Add index on audit_log (source, created_at) to support efficient +-- queries for the telemetry batch collection mode. + +CREATE INDEX IF NOT EXISTS audit_log__source__created_at__idx + ON audit_log (source, created_at ASC); diff --git a/backend/src/app/rpc/commands/audit.clj b/backend/src/app/rpc/commands/audit.clj index 757c4fa5cb..342b87b073 100644 --- a/backend/src/app/rpc/commands/audit.clj +++ b/backend/src/app/rpc/commands/audit.clj @@ -91,6 +91,50 @@ (sequence xform events))) +;; Context keys from the frontend that are safe to retain for telemetry: +;; they describe the browser/OS environment but cannot identify a user. +;; Session-linking keys (session, external-session-id) and file-specific +;; stats (file-stats) are intentionally excluded. +(def ^:private safe-context-keys + #{:version + :locale + :browser + :browser-version + :engine + :engine-version + :os + :os-version + :device-type + :device-arch + :screen-width + :screen-height + :screen-color-depth + :screen-orientation + :event-origin + :event-namespace + :event-symbol}) + +(defn- filter-safe-context + "Return only the anonymous, non-identifying context fields from an + event context map. Any key not in `safe-context-keys` is dropped." + [ctx] + (select-keys ctx safe-context-keys)) + +(def ^:private xf:map-telemetry-event-row + (comp + (map adjust-timestamp) + (map (fn [event] + (let [tday (ct/truncate (::audit/created-at event) :days)] + [(::audit/id event) + (::audit/name event) + "telemetry" + (::audit/type event) + tday + tday + (::audit/profile-id event) + (db/inet "0.0.0.0") + (db/tjson {}) + (db/tjson (filter-safe-context (::audit/context event {})))]))))) (defn- handle-events [{:keys [::db/pool] :as cfg} params] (let [events (get-events params)] @@ -102,9 +146,17 @@ (run! (partial loggers.db/emit cfg) events) (run! (partial loggers.mm/emit cfg) events)) - ;; Process and save events - (when (seq events) + ;; Process and save full audit events when audit-log flag is active + (when (and (seq events) (contains? cf/flags :audit-log)) (let [rows (sequence xf:map-event-row events)] + (db/insert-many! pool :audit-log event-columns rows))) + + ;; When telemetry is enabled (and audit-log is NOT), store anonymized + ;; frontend events so the telemetry task can ship them in batches. + (when (and (seq events) + (not (contains? cf/flags :audit-log)) + cf/telemetry-enabled?) + (let [rows (sequence xf:map-telemetry-event-row events)] (db/insert-many! pool :audit-log event-columns rows))))) (def ^:private valid-event-types @@ -138,17 +190,20 @@ ::doc/skip true ::doc/added "1.17"} [{:keys [::db/pool] :as cfg} params] - (if (or (db/read-only? pool) - (not (contains? cf/flags :audit-log))) - (do - (l/warn :hint "audit: http handler disabled or db is read-only") - (rph/wrap nil)) + (let [telemetry? cf/telemetry-enabled? + audit-log? (contains? cf/flags :audit-log) + enabled? (and (not (db/read-only? pool)) + (or audit-log? telemetry?))] + (if-not enabled? + (do + (l/warn :hint "audit: http handler disabled or db is read-only") + (rph/wrap nil)) - (do - (try - (handle-events cfg params) - (catch Throwable cause - (l/error :hint "unexpected error on persisting audit events from frontend" - :cause cause))) + (do + (try + (handle-events cfg params) + (catch Throwable cause + (l/error :hint "unexpected error on persisting audit events from frontend" + :cause cause))) - (rph/wrap nil)))) + (rph/wrap nil))))) diff --git a/backend/src/app/tasks/telemetry.clj b/backend/src/app/tasks/telemetry.clj index aa2cae58e0..07b5caa4f9 100644 --- a/backend/src/app/tasks/telemetry.clj +++ b/backend/src/app/tasks/telemetry.clj @@ -11,43 +11,27 @@ (:require [app.common.data :as d] [app.common.exceptions :as ex] + [app.common.logging :as l] [app.config :as cf] [app.db :as db] [app.http.client :as http] [app.main :as-alias main] [app.setup :as-alias setup] + [app.util.blob :as blob] [app.util.json :as json] [integrant.core :as ig] [promesa.exec :as px])) -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; IMPL -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -(defn- send! - [cfg data] - (let [request {:method :post - :uri (cf/get :telemetry-uri) - :headers {"content-type" "application/json"} - :body (json/encode-str data)} - response (http/req! cfg request)] - (when (> (:status response) 206) - (ex/raise :type :internal - :code :invalid-response - :response-status (:status response) - :response-body (:body response))))) - -(defn- get-subscriptions-newsletter-updates - [conn] +(defn- get-subscriptions + [cfg] (let [sql "SELECT email FROM profile where props->>'~:newsletter-updates' = 'true'"] - (->> (db/exec! conn [sql]) - (mapv :email)))) + (db/run! cfg (fn [{:keys [::db/conn]}] + (->> (db/exec! conn [sql]) + (mapv :email)))))) -(defn- get-subscriptions-newsletter-news - [conn] - (let [sql "SELECT email FROM profile where props->>'~:newsletter-news' = 'true'"] - (->> (db/exec! conn [sql]) - (mapv :email)))) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; LEGACY DATA COLLECTION +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; (defn- get-num-teams [conn] @@ -161,8 +145,8 @@ (def ^:private sql:get-counters "SELECT name, count(*) AS count FROM audit_log - WHERE source = 'backend' - AND tracked_at >= date_trunc('day', now()) + WHERE (source = 'backend' OR source = 'frontend') + AND created_at >= date_trunc('day', now()) GROUP BY 1 ORDER BY 2 DESC") @@ -174,18 +158,8 @@ {:total-accomulated-events total :event-counters counters})) -(def ^:private sql:clean-counters - "DELETE FROM audit_log - WHERE ip_addr = '0.0.0.0'::inet -- we know this is from telemetry - AND tracked_at < (date_trunc('day', now()) - '1 day'::interval)") - -(defn- clean-counters-data! - [conn] - (when-not (contains? cf/flags :audit-log) - (db/exec-one! conn [sql:clean-counters]))) - -(defn- get-stats - [conn] +(defn- get-legacy-stats + [{:keys [::db/conn]}] (let [referer (if (cf/get :telemetry-with-taiga) "taiga" (cf/get :telemetry-referer))] @@ -207,6 +181,134 @@ (get-action-counters conn)) (d/without-nils)))) +(defn- make-legacy-request + [cfg data] + (let [request {:method :post + :uri (cf/get :telemetry-uri) + :headers {"content-type" "application/json"} + :body (json/encode-str data)} + response (http/req! cfg request)] + (when (> (:status response) 206) + (ex/raise :type :internal + :code :invalid-response + :response-status (:status response) + :response-body (:body response))))) + +(defn- send-legacy-data + [{:keys [::setup/props] :as cfg} stats subs] + (let [data (cond-> {:type :telemetry-legacy-report + :version (:full cf/version) + :instance-id (:instance-id props)} + (some? stats) + (assoc :stats stats) + + (seq subs) + (assoc :subscriptions subs))] + + (make-legacy-request cfg data))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; AUDIT-EVENT BATCH (TELEMETRY MODE) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Maximum number of telemetry-mode audit rows allowed to accumulate in +;; the audit_log table. When this limit is exceeded the oldest rows are +;; deleted before collection (we accept losing events rather than +;; letting the table grow unboundedly). +(def ^:private batch-size 10000) +(def ^:private max-telemetry-events 100000) + +(def ^:private sql:count-telemetry-events + "SELECT count(*) AS cnt FROM audit_log WHERE source = 'telemetry'") + +(def ^:private sql:gc-events + "DELETE FROM audit_log + WHERE id IN ( + SELECT id FROM audit_log + WHERE source = 'telemetry' + ORDER BY created_at ASC + LIMIT ?)") + +(defn- gc-events + "Delete the oldest telemetry-mode events when the table exceeds the + configured cap so that the buffer stays bounded." + [{:keys [::db/conn]}] + (let [cnt (-> (db/exec-one! conn [sql:count-telemetry-events]) :cnt long) + excess (- cnt max-telemetry-events)] + (when (pos? excess) + (l/warn :hint "telemetry audit_log cap exceeded; dropping oldest events" + :count excess) + (db/exec-one! conn [sql:gc-events (int excess)])))) + +(def ^:private sql:fetch-telemetry-events + "SELECT id, name, type, source, tracked_at, profile_id, context + FROM audit_log + WHERE source = 'telemetry' + ORDER BY created_at ASC + LIMIT ?") + +(defn- row->event + [{:keys [name type source tracked-at profile-id context]}] + (d/without-nils + {:name name + :type type + :source source + :tracked-at tracked-at + :profile-id profile-id + :context (not-empty (db/decode-transit-pgobject context))})) + +(defn- encode-batch + "Encode a sequence of event maps into a fressian+zstd base64 string + suitable for JSON transport." + ^String [events] + (blob/encode-str events {:version 4})) + +(defn send-event-batch + "Send a single batch of events to the telemetry endpoint. Returns + true on success." + [{:keys [::setup/props] :as cfg} batch] + (let [payload {:type :telemetry-events + :version (:full cf/version) + :instance-id (:instance-id props) + :events (encode-batch batch)} + request {:method :post + :uri (cf/get :telemetry-uri) + :headers {"content-type" "application/json"} + :body (json/encode-str payload)} + resp (http/req! cfg request)] + (if (<= (:status resp) 206) + true + (do + (l/warn :hint "telemetry event batch send failed" + :status (:status resp) + :body (:body resp)) + false)))) + +(defn- delete-sent-events + "Delete rows by their ids after a successful send." + [conn ids] + (let [arr (db/create-array conn "uuid" ids)] + (db/exec-one! conn ["DELETE FROM audit_log WHERE id = ANY(?)" arr]))) + +(defn- collect-and-send-audit-events + "Collect anonymous telemetry-mode audit events and ship them to the + telemetry endpoint in a loop. Each iteration fetches one page of + `batch-size` rows, encodes and sends them, then deletes the rows on + success. The loop stops as soon as a send fails, leaving remaining + rows intact for the next run." + [{:keys [::db/conn] :as cfg}] + (loop [counter 1] + (when-let [rows (-> (db/exec! conn [sql:fetch-telemetry-events batch-size]) + (not-empty))] + (let [events (mapv row->event rows) + ids (mapv :id rows)] + (l/dbg :hint "shipping telemetry event batch" + :total (count events) + :batch counter) + (when (send-event-batch cfg events) + (delete-sent-events conn ids) + (recur (inc counter))))))) + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; TASK ENTRY POINT ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -218,46 +320,36 @@ (assert (some? (::setup/props params)) "expected setup props to be available")) (defmethod ig/init-key ::handler - [_ {:keys [::db/pool ::setup/props] :as cfg}] + [_ cfg] (fn [task] (let [params (:props task) send? (get params :send? true) enabled? (or (get params :enabled? false) - (contains? cf/flags :telemetry) - (cf/get :telemetry-enabled)) - - subs {:newsletter-updates (get-subscriptions-newsletter-updates pool) - :newsletter-news (get-subscriptions-newsletter-news pool)} - - data {:subscriptions subs - :version (:full cf/version) - :instance-id (:instance-id props)}] - - (when enabled? - (clean-counters-data! pool)) + cf/telemetry-enabled?) + subs (get-subscriptions cfg)] (cond ;; If we have telemetry enabled, then proceed the normal - ;; operation. + ;; operation sending legacy report enabled? - (let [data (merge data (get-stats pool))] - (when send? - (px/sleep (rand-int 10000)) - (send! cfg data)) - data) + (when send? + (px/sleep (rand-int 10000)) + (db/run! cfg gc-events) + + (let [stats (db/run! cfg get-legacy-stats)] + (send-legacy-data cfg stats subs)) + + ;; Ship any anonymous audit-log events accumulated in + ;; telemetry mode (only when audit-log feature is off). + (when-not (contains? cf/flags :audit-log) + (db/run! cfg collect-and-send-audit-events))) ;; If we have telemetry disabled, but there are users that are ;; explicitly checked the newsletter subscription on the ;; onboarding dialog or the profile section, then proceed to ;; send a limited telemetry data, that consists in the list of ;; subscribed emails and the running penpot version. - (or (seq (:newsletter-updates subs)) - (seq (:newsletter-news subs))) - (do - (when send? - (px/sleep (rand-int 10000)) - (send! cfg data)) - data) - - :else - data)))) + (seq subs) + (when send? + (px/sleep (rand-int 10000)) + (send-legacy-data cfg nil subs)))))) diff --git a/backend/src/app/util/blob.clj b/backend/src/app/util/blob.clj index 6263e8e878..8b8d787ee3 100644 --- a/backend/src/app/util/blob.clj +++ b/backend/src/app/util/blob.clj @@ -19,6 +19,7 @@ java.io.DataOutputStream java.io.InputStream java.io.OutputStream + java.util.Base64 net.jpountz.lz4.LZ4Compressor net.jpountz.lz4.LZ4Factory net.jpountz.lz4.LZ4FastDecompressor @@ -49,6 +50,13 @@ 5 (encode-v5 data) (throw (ex-info "unsupported version" {:version version})))))) +(defn encode-str + "Encode data to a blob and return it as a base64 string. Accepts the + same options as `encode`." + (^String [data] (encode-str data nil)) + (^String [data opts] + (.encodeToString (Base64/getEncoder) ^bytes (encode data opts)))) + (defn decode "A function used for decode persisted blobs in the database." [^bytes data] @@ -63,6 +71,11 @@ 5 (decode-v5 data) (throw (ex-info "unsupported version" {:version version})))))) +(defn decode-str + "Decode a base64 string produced by `encode-str` back to data." + [^String s] + (decode (.decode (Base64/getDecoder) s))) + ;; --- IMPL (defn- encode-v1 diff --git a/backend/test/backend_tests/tasks_telemetry_test.clj b/backend/test/backend_tests/tasks_telemetry_test.clj index c6edf381af..845b31f317 100644 --- a/backend/test/backend_tests/tasks_telemetry_test.clj +++ b/backend/test/backend_tests/tasks_telemetry_test.clj @@ -6,42 +6,518 @@ (ns backend-tests.tasks-telemetry-test (:require + [app.common.time :as ct] + [app.common.uuid :as uuid] + [app.config :as cf] [app.db :as db] + [app.loggers.audit :as audit] + [app.tasks.telemetry :as telemetry] + [app.util.blob :as blob] [backend-tests.helpers :as th] - [clojure.pprint :refer [pprint]] [clojure.test :as t] [mockery.core :refer [with-mocks]])) (t/use-fixtures :once th/state-init) (t/use-fixtures :each th/database-reset) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; HELPERS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(defn- insert-telemetry-row! + "Insert a single anonymised audit_log row as the telemetry mode does." + ([name] (insert-telemetry-row! name {})) + ([name {:keys [tracked-at created-at] + :or {tracked-at (ct/now) + created-at (ct/now)}}] + (th/db-insert! :audit-log + {:id (uuid/next) + :name name + :type "action" + :source "telemetry" + :profile-id uuid/zero + :ip-addr (db/inet "0.0.0.0") + :props (db/tjson {}) + :context (db/tjson {}) + :tracked-at tracked-at + :created-at created-at}))) + +(defn- count-telemetry-rows [] + (-> (th/db-exec-one! ["SELECT count(*) AS cnt FROM audit_log WHERE source = 'telemetry'"]) + :cnt + long)) + +(defn- decode-event-batch + "Decode the base64+fressian+zstd event-batch sent to the mock." + [b64-str] + (blob/decode-str b64-str)) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; STATS / REPORT STRUCTURE TESTS (existing behaviour, extended) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + (t/deftest test-base-report-data-structure - (with-mocks [mock {:target 'app.tasks.telemetry/send! + (with-mocks [mock {:target 'app.tasks.telemetry/make-legacy-request :return nil}] (let [prof (th/create-profile* 1 {:is-active true - :props {:newsletter-news true}})] + :props {:newsletter-updates true}})] (th/run-task! :telemetry {:send? true :enabled? true}) (t/is (:called? @mock)) (let [[_ data] (-> @mock :call-args)] (t/is (contains? data :subscriptions)) - (t/is (= [(:email prof)] (get-in data [:subscriptions :newsletter-news]))) - (t/is (contains? data :total-fonts)) - (t/is (contains? data :total-users)) - (t/is (contains? data :total-projects)) - (t/is (contains? data :total-files)) - (t/is (contains? data :total-teams)) - (t/is (contains? data :total-comments)) - (t/is (contains? data :instance-id)) - (t/is (contains? data :jvm-cpus)) - (t/is (contains? data :jvm-heap-max)) - (t/is (contains? data :max-users-on-team)) - (t/is (contains? data :avg-users-on-team)) - (t/is (contains? data :max-files-on-project)) - (t/is (contains? data :avg-files-on-project)) - (t/is (contains? data :max-projects-on-team)) - (t/is (contains? data :avg-files-on-project)) + (t/is (= [(:email prof)] (:subscriptions data))) + (t/is (contains? data :stats)) + (let [stats (:stats data)] + (t/is (contains? stats :total-fonts)) + (t/is (contains? stats :total-users)) + (t/is (contains? stats :total-projects)) + (t/is (contains? stats :total-files)) + (t/is (contains? stats :total-teams)) + (t/is (contains? stats :total-comments)) + (t/is (contains? stats :jvm-cpus)) + (t/is (contains? stats :jvm-heap-max)) + (t/is (contains? stats :max-users-on-team)) + (t/is (contains? stats :avg-users-on-team)) + (t/is (contains? stats :max-files-on-project)) + (t/is (contains? stats :avg-files-on-project)) + (t/is (contains? stats :max-projects-on-team)) + (t/is (contains? stats :avg-files-on-project)) + (t/is (contains? stats :email-domains)) + (t/is (= ["nodomain.com"] (:email-domains stats)))) (t/is (contains? data :version)) - (t/is (contains? data :email-domains)) - (t/is (= ["nodomain.com"] (:email-domains data))))))) + (t/is (contains? data :instance-id)))))) + +(t/deftest test-telemetry-disabled-no-send + ;; When telemetry is disabled and no newsletter subscriptions exist, + ;; make-legacy-request must not be called at all. + (with-mocks [mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{}] + (th/create-profile* 1 {:is-active true}) + (th/run-task! :telemetry {:send? true}) + (t/is (not (:called? @mock)))))) + +(t/deftest test-telemetry-disabled-newsletter-only-send + ;; When telemetry is disabled but a user has newsletter-updates opted in, + ;; make-legacy-request is called once with only subscriptions + version (no stats). + (with-mocks [mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{}] + (let [prof (th/create-profile* 1 {:is-active true + :props {:newsletter-updates true}})] + (th/run-task! :telemetry {:send? true}) + (t/is (:called? @mock)) + (let [[_ data] (:call-args @mock)] + ;; Limited payload — no stats + (t/is (contains? data :subscriptions)) + (t/is (contains? data :version)) + (t/is (not (contains? data :stats))) + (t/is (= [(:email prof)] (:subscriptions data)))))))) + +(t/deftest test-send-is-skipped-when-send?-false + ;; Passing send?=false must suppress all HTTP calls even when enabled. + (with-mocks [mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{:telemetry}] + (th/create-profile* 1 {:is-active true}) + (th/run-task! :telemetry {:send? false :enabled? true}) + (t/is (not (:called? @mock)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; AUDIT-EVENT BATCH COLLECTION TESTS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(t/deftest test-no-audit-events-no-batch-call + ;; When telemetry is enabled but there are no audit_log rows with + ;; source='telemetry', the batch send path must not be invoked. + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil} + batch-mock {:target 'app.tasks.telemetry/send-event-batch + :return true}] + (with-redefs [cf/flags #{:telemetry}] + (th/run-task! :telemetry {:send? true :enabled? true}) + (t/is (:called? @legacy-mock)) + (t/is (not (:called? @batch-mock)))))) + +(t/deftest test-audit-events-sent-and-deleted-on-success + ;; Happy path: telemetry rows are collected, shipped as a batch and + ;; deleted from the table when the endpoint returns success. + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil} + batch-mock {:target 'app.tasks.telemetry/send-event-batch + :return true}] + (with-redefs [cf/flags #{:telemetry}] + (insert-telemetry-row! "navigate") + (insert-telemetry-row! "create-file") + (insert-telemetry-row! "update-file") + + (t/is (= 3 (count-telemetry-rows))) + + (th/run-task! :telemetry {:send? true :enabled? true}) + + ;; batch send was called at least once + (t/is (:called? @batch-mock)) + + ;; all rows deleted after successful send + (t/is (= 0 (count-telemetry-rows)))))) + +(t/deftest test-audit-events-kept-on-batch-failure + ;; When the batch endpoint returns failure the rows must be retained + ;; so the next scheduled run can retry. + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil} + batch-mock {:target 'app.tasks.telemetry/send-event-batch + :return false}] + (with-redefs [cf/flags #{:telemetry}] + (insert-telemetry-row! "navigate") + (insert-telemetry-row! "create-file") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + (t/is (:called? @batch-mock)) + ;; rows still present — not deleted on failure + (t/is (= 2 (count-telemetry-rows)))))) + +(t/deftest test-audit-events-not-collected-when-audit-log-flag-set + ;; When the :audit-log flag is active, mode C is disabled and the + ;; batch path must never run (audit-log owns those rows instead). + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil} + batch-mock {:target 'app.tasks.telemetry/send-event-batch + :return true}] + (with-redefs [cf/flags #{:telemetry :audit-log}] + (insert-telemetry-row! "navigate") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + (t/is (not (:called? @batch-mock))) + ;; row untouched + (t/is (= 1 (count-telemetry-rows)))))) + +(t/deftest test-batch-payload-contains-required-fields + ;; Inspect the actual arguments forwarded to send-event-batch to + ;; verify the payload carries instance-id, version and events. + (let [captured (atom nil)] + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{:telemetry} + telemetry/send-event-batch + (fn [_cfg batch] + (reset! captured batch) + true)] + (insert-telemetry-row! "navigate") + (insert-telemetry-row! "create-file") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + (t/is (some? @captured)) + (let [batch @captured] + ;; batch is a seq of event maps + (t/is (seq batch)) + (t/is (= 2 (count batch))) + ;; each event has name, type, source — profile-id is preserved, + ;; props and ip-addr are stripped + (let [ev (first batch)] + (t/is (contains? ev :name)) + (t/is (contains? ev :type)) + (t/is (contains? ev :source)) + (t/is (contains? ev :profile-id)) + (t/is (not (contains? ev :props))) + (t/is (not (contains? ev :ip-addr))))))))) + +(t/deftest test-batch-encoding-is-decodable + ;; Verify that encode-batch produces a blob that round-trips back + ;; through blob/decode to the original data. + (let [events [{:name "navigate" :type "action" :source "telemetry" + :tracked-at (ct/now)} + {:name "create-file" :type "action" :source "telemetry" + :tracked-at (ct/now)}] + ;; Call the private fn through the ns-mapped var + encode (ns-resolve 'app.tasks.telemetry 'encode-batch) + encoded (encode events) + decoded (decode-event-batch encoded)] + (t/is (string? encoded)) + (t/is (seq decoded)) + (t/is (= (count events) (count decoded))) + (t/is (= "navigate" (:name (first decoded)))) + (t/is (= "create-file" (:name (second decoded)))))) + +(t/deftest test-multiple-batches-when-many-events + ;; Lower batch-size to 1 so that 3 events produce 3 separate + ;; HTTP requests and verify all are sent and all rows deleted. + (let [call-count (atom 0)] + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{:telemetry} + telemetry/batch-size 1 + telemetry/send-event-batch + (fn [_cfg _batch] + (swap! call-count inc) + true)] + (insert-telemetry-row! "navigate") + (insert-telemetry-row! "create-file") + (insert-telemetry-row! "update-file") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + ;; Each event is fetched and sent in its own loop iteration + (t/is (= 3 @call-count)) + ;; All rows deleted after all iterations succeed + (t/is (= 0 (count-telemetry-rows))))))) + +(t/deftest test-partial-failure-stops-remaining-batches + ;; With batch-size 1, when the second send fails the loop stops. + ;; The first batch was already deleted; the two remaining rows + ;; are retained for the next run. + (let [call-count (atom 0)] + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{:telemetry} + telemetry/batch-size 1 + telemetry/send-event-batch + (fn [_cfg _batch] + (swap! call-count inc) + ;; fail on the second call + (not= 2 @call-count))] + (insert-telemetry-row! "navigate") + (insert-telemetry-row! "create-file") + (insert-telemetry-row! "update-file") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + ;; Stopped at iteration 2 — third event never attempted + (t/is (= 2 @call-count)) + ;; First batch was deleted on success; 2 rows remain for retry + (t/is (= 2 (count-telemetry-rows))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; GC / SAFETY-CAP TESTS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(t/deftest test-gc-removes-excess-rows-before-collection + ;; Lower the cap to 2 and insert 5 rows. After the task runs the + ;; 3 oldest rows must have been pruned and the 2 newest shipped. + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil} + batch-mock {:target 'app.tasks.telemetry/send-event-batch + :return true}] + (with-redefs [cf/flags #{:telemetry} + telemetry/max-telemetry-events 2] + ;; Insert rows with strictly ordered timestamps so we can reason + ;; about which ones survive. + (let [t0 (ct/now)] + (doseq [i (range 5)] + (insert-telemetry-row! + (str "event-" i) + {:created-at (ct/plus t0 (ct/duration {:seconds i})) + :tracked-at (ct/plus t0 (ct/duration {:seconds i}))}))) + + (t/is (= 5 (count-telemetry-rows))) + + (th/run-task! :telemetry {:send? true :enabled? true}) + + ;; GC deleted 3, then the remaining 2 were shipped and deleted + (t/is (= 0 (count-telemetry-rows)))))) + +(t/deftest test-gc-does-not-run-when-under-cap + ;; When the row count is below the cap, no GC deletion should occur + ;; and all rows should be forwarded to the batch sender. + (let [batch-events (atom nil)] + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{:telemetry} + telemetry/max-telemetry-events 100 + telemetry/send-event-batch + (fn [_cfg batch] + (reset! batch-events batch) + true)] + (insert-telemetry-row! "event-a") + (insert-telemetry-row! "event-b") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + ;; Both events forwarded to the batch — GC left them alone + (t/is (= 2 (count @batch-events))) + (t/is (= 0 (count-telemetry-rows))))))) + +(t/deftest test-gc-cap-exactly-at-limit-does-not-delete + ;; Row count == cap means excess is zero; nothing should be deleted + ;; by the GC step. + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil} + batch-mock {:target 'app.tasks.telemetry/send-event-batch + :return true}] + (with-redefs [cf/flags #{:telemetry} + telemetry/max-telemetry-events 3] + (insert-telemetry-row! "a") + (insert-telemetry-row! "b") + (insert-telemetry-row! "c") + + (th/run-task! :telemetry {:send? true :enabled? true}) + + ;; All 3 shipped — none dropped by GC + (t/is (= 0 (count-telemetry-rows)))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ANONYMITY TESTS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(t/deftest test-telemetry-rows-stored-without-pii + ;; Rows written to audit_log in telemetry mode must carry no PII: + ;; empty props, zeroed ip, profile-id=zero, source='telemetry'. + ;; Safe context fields (browser, os, version, etc.) are preserved + ;; but session-linking and access-token fields are stripped. + (with-redefs [cf/flags #{:telemetry}] + (let [_prof (th/create-profile* 1 {:is-active true}) + safe-ctx {:browser "Chrome" + :browser-version "120.0" + :os "Linux" + :version "2.0.0"}] + ;; Simulate what app.loggers.audit/handle-event! does in mode C + (th/db-insert! :audit-log + {:id (uuid/next) + :name "create-project" + :type "action" + :source "telemetry" + :profile-id uuid/zero + :ip-addr (db/inet "0.0.0.0") + :props (db/tjson {}) + :context (db/tjson safe-ctx) + :tracked-at (ct/now) + :created-at (ct/now)}) + + (let [[row] (th/db-exec! ["SELECT * FROM audit_log WHERE source = 'telemetry'"])] + (t/is (= "telemetry" (:source row))) + ;; props are always empty + (t/is (= "{}" (str (:props row)))) + ;; ip_addr is the sentinel zero address + (t/is (= "0.0.0.0" (str (:ip-addr row)))) + ;; profile-id is uuid/zero — not a real user id + (t/is (= uuid/zero (:profile-id row))))))) + +(t/deftest test-batch-events-contain-no-pii-fields + ;; The event maps forwarded to send-event-batch must not carry props, + ;; ip-addr or profile-id. Safe context fields (browser, os, etc.) may + ;; be present but session-linking keys must be absent. + (let [captured-batch (atom nil) + ;; Insert a row that carries safe context (as the real path does) + safe-ctx {:browser "Firefox" :browser-version "121.0" + :os "macOS" :session "should-be-stripped" + :external-session-id "also-stripped"}] + (with-mocks [legacy-mock {:target 'app.tasks.telemetry/make-legacy-request + :return nil}] + (with-redefs [cf/flags #{:telemetry} + telemetry/send-event-batch + (fn [_cfg batch] + (reset! captured-batch batch) + true)] + ;; Insert with safe context already pre-filtered (as the ingest path does) + (th/db-insert! :audit-log + {:id (uuid/next) + :name "navigate" + :type "action" + :source "telemetry" + :profile-id uuid/zero + :ip-addr (db/inet "0.0.0.0") + :props (db/tjson {}) + :context (db/tjson (dissoc safe-ctx :session :external-session-id)) + :tracked-at (ct/now) + :created-at (ct/now)}) + + (th/run-task! :telemetry {:send? true :enabled? true}) + + (t/is (= 1 (count @captured-batch))) + (let [ev (first @captured-batch)] + ;; must have the core identity fields including profile-id + (t/is (contains? ev :name)) + (t/is (contains? ev :type)) + (t/is (contains? ev :source)) + (t/is (contains? ev :tracked-at)) + (t/is (contains? ev :profile-id)) + ;; props and ip-addr must be stripped + (t/is (not (contains? ev :props))) + (t/is (not (contains? ev :ip-addr))) + ;; context may be present and must not contain session-linking keys + (when-let [ctx (:context ev)] + (t/is (not (contains? ctx :session))) + (t/is (not (contains? ctx :external-session-id))) + ;; safe keys should be present + (t/is (contains? ctx :browser)))))))) + +(t/deftest test-telemetry-rows-have-day-precision-timestamps + ;; Telemetry events must be stored with timestamps truncated to day + ;; precision so that exact event timing cannot be inferred. + (with-redefs [cf/flags #{:telemetry} + cf/telemetry-enabled? true] + (let [handle-event! (ns-resolve 'app.loggers.audit 'handle-event!) + profile (th/create-profile* 1 {:is-active true}) + event {::audit/type "action" + ::audit/name "create-project" + ::audit/profile-id (:id profile)}] + (db/tx-run! th/*system* handle-event! event) + (let [[row] (th/db-exec! ["SELECT * FROM audit_log WHERE source = 'telemetry'"])] + (t/is (some? row)) + (let [created-at (:created-at row) + tracked-at (:tracked-at row) + day-now (ct/truncate (ct/now) :days)] + ;; Both timestamps must equal midnight of the current day + (t/is (= day-now created-at)) + (t/is (= day-now tracked-at))))))) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; PARTITION-BATCHES UNIT TESTS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; FILTER-SAFE-CONTEXT UNIT TESTS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +(t/deftest test-filter-safe-context-keeps-browser-fields + ;; Safe environment fields must survive the filter. + (let [filter-safe-context (ns-resolve 'app.rpc.commands.audit 'filter-safe-context) + ctx {:browser "Chrome" + :browser-version "120.0" + :engine "Blink" + :engine-version "120.0" + :os "Windows 11" + :os-version "11" + :device-type "unknown" + :device-arch "amd64" + :locale "en-US" + :version "2.0.0" + :screen-width 1920 + :screen-height 1080 + :event-origin "workspace"} + result (filter-safe-context ctx)] + (t/is (= "Chrome" (:browser result))) + (t/is (= "120.0" (:browser-version result))) + (t/is (= "Windows 11" (:os result))) + (t/is (= "en-US" (:locale result))) + (t/is (= "workspace" (:event-origin result))) + (t/is (= 1920 (:screen-width result))))) + +(t/deftest test-filter-safe-context-strips-pii-keys + ;; Session-linking and access-token fields must be removed. + (let [filter-safe-context (ns-resolve 'app.rpc.commands.audit 'filter-safe-context) + ctx {:browser "Firefox" + :session "abc-session-id" + :external-session-id "ext-123" + :file-stats {:total-shapes 42} + :initiator "app" + :access-token-id "tok-456" + :access-token-type "api-key"} + result (filter-safe-context ctx)] + (t/is (= "Firefox" (:browser result))) + (t/is (not (contains? result :session))) + (t/is (not (contains? result :external-session-id))) + (t/is (not (contains? result :file-stats))) + (t/is (not (contains? result :initiator))) + (t/is (not (contains? result :access-token-id))) + (t/is (not (contains? result :access-token-type))))) + +(t/deftest test-filter-safe-context-empty-input + ;; An empty context should return an empty map without error. + (let [filter-safe-context (ns-resolve 'app.rpc.commands.audit 'filter-safe-context)] + (t/is (= {} (filter-safe-context {})))))