Skip to content

You are viewing documentation for Immuta version 2023.4.

For the latest version, view our documentation for Immuta SaaS or the latest self-hosted version.

Determine Background Job Batches

Audience: System Administrators

Content Summary: This page provides a SQL query that determines batches of background jobs.

Query Overview

Below is a SQL query, which will help determine batches of background jobs.

Note: This query unions the job and archive table. Adjust the top interval appropriately for your needs.

Immuta >= 2022.1

This query is for all 2022.1 versions of Immuta and above.

WITH
vars AS (
    SELECT
    NOW() AS end_dt,
    --TO_TIMESTAMP('2021-11-23 12:05:00', 'YYYY-MM-DD hh24:mi:ss')::timestamp without time zone at time zone 'EDT' AS start_dt
    NOW() - INTERVAL '7 days' AS start_dt
),
all_jobs_raw AS (
    SELECT name, state, startedon, createdon, completedon, data FROM pgboss.job
    UNION SELECT name, state, startedon, createdon, completedon, data FROM pgboss.archive -- COMMENT OUT THIS TABLE IF LESS THAN 12 HOURS AND RUNNING SLOW
),
all_jobs AS (
    SELECT
        CASE
            WHEN name ILIKE 'nativeSqlDataSourceSync_Redshift%' THEN 'Redshift Native Update'
            WHEN name = 'nativeSqlAutomaticSubscriptionNotifications_Redshift' THEN 'Immuta Upd Redshift DS'
            WHEN name = 'nativeSqlAutomaticSubscriptionNotifications_Snowflake' THEN 'Immuta Upd Snowflake DS'
            WHEN name = 'updatePolicies' THEN 'updatePolicies'
            WHEN name ILIKE 'nativeSqlProfileRefresh_Redshift%' THEN 'nativeSqlProfileRefresh_Redshift'
            WHEN name ILIKE 'nativeSqlProfileRefresh_Snowflake%' THEN 'nativeSqlProfileRefresh_Snowflake'
            ELSE name
        END AS name, state, startedon, completedon, date_trunc('hour', createdon) AS createdon_trunc -- Change the date_trunc value to 'hour' or 'day'
    FROM all_jobs_raw INNER JOIN vars ON TRUE
    WHERE all_jobs_raw.createdon > vars.start_dt and all_jobs_raw.createdon < vars.end_dt
    AND name NOT LIKE '\_\_%' AND name NOT ILIKE 'cleanUp%' AND name NOT ILIKE 'initiateScheduled%'
    AND name NOT IN (
        'bootstrapImmutaQueryEngine', 'checkForRollOver', 'checkInWithLicenseServer','clearMaxTTLTokens', 'columnEvolutionCheck', 'createPolicySearchRecords',
        'dataSourceTest', 'entitlements_cache_job_root', 'entitlements_cache_job_worker','executeCustomerMetricsQueries', 'expirePolicyAdjustments',
        'expirePolicyCertifications', 'loadIamGroups', 'nativeSqlMigration_Redshift','recomputeDslFromJsonPolicies', 'removeIamAuths', 'scheduleDbtUpdates',
        'scheduleSchemaEvolutionChecks', 'schemaProjectsQueryableMigration', 'visibilitySchemaUpdate') -- ignore list
    --AND (name ILIKE '%snowflake%') -- adjust to filter down to specific jobs
),
batches AS (
    SELECT DISTINCT name, createdon_trunc, COUNT(*) AS total_count,
        MIN(createdon_trunc) AS createdon, MAX(completedon) AS completedon
    FROM all_jobs GROUP BY name, createdon_trunc),
created_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS created_count
    FROM all_jobs WHERE state = 'created' GROUP BY name, createdon_trunc),
retry_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS retry_count
    FROM all_jobs WHERE state = 'retry' GROUP BY name, createdon_trunc),
active_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS active_count
    FROM all_jobs WHERE state = 'active' GROUP BY name, createdon_trunc),
completed_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS completed_count,
        AVG(completedon - startedon) AS average_time
    FROM all_jobs WHERE state = 'completed' GROUP BY name, createdon_trunc),
expired_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS expired_count
    FROM all_jobs WHERE state = 'expired' GROUP BY name, createdon_trunc),
cancelled_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS cancelled_count
    FROM all_jobs WHERE state = 'cancelled' GROUP BY name, createdon_trunc),
failed_jobs AS (
    SELECT name, createdon_trunc, COUNT(*) AS failed_count
    FROM all_jobs WHERE state = 'failed'  GROUP BY name, createdon_trunc)
SELECT
    batches.*,
    completed_jobs.average_time,
    batches.completedon - batches.createdon AS duration,
    created_jobs.created_count,
    retry_jobs.retry_count,
    active_jobs.active_count,
    completed_jobs.completed_count,
    expired_jobs.expired_count,
    cancelled_jobs.cancelled_count,
    failed_jobs.failed_count
FROM batches
LEFT OUTER JOIN created_jobs ON batches.name = created_jobs.name AND batches.createdon_trunc = created_jobs.createdon_trunc
LEFT OUTER JOIN retry_jobs ON batches.name = retry_jobs.name AND batches.createdon_trunc = retry_jobs.createdon_trunc
LEFT OUTER JOIN active_jobs ON batches.name = active_jobs.name AND batches.createdon_trunc = active_jobs.createdon_trunc
LEFT OUTER JOIN completed_jobs ON batches.name = completed_jobs.name AND batches.createdon_trunc = completed_jobs.createdon_trunc
LEFT OUTER JOIN expired_jobs ON batches.name = expired_jobs.name AND batches.createdon_trunc = expired_jobs.createdon_trunc
LEFT OUTER JOIN cancelled_jobs ON batches.name = cancelled_jobs.name AND batches.createdon_trunc = cancelled_jobs.createdon_trunc
LEFT OUTER JOIN failed_jobs ON batches.name = failed_jobs.name AND batches.createdon_trunc = failed_jobs.createdon_trunc
ORDER BY name, createdon_trunc DESC;