Skip to content

Commit

Permalink
Merge pull request #286 from cybertec-postgresql/invalid_indexes_metric
Browse files Browse the repository at this point in the history
[+] add new metrics for invalid and unused indexes
  • Loading branch information
pashagolub authored Sep 28, 2023
2 parents 401f12b + 95fe46f commit 45ed90e
Show file tree
Hide file tree
Showing 11 changed files with 260 additions and 16 deletions.
146 changes: 138 additions & 8 deletions src/db/sql/config/config_definitions.sql
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,16 @@ select
extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s,
extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -165,7 +174,16 @@ select
extract(epoch from (now() - coalesce((pg_stat_file('postmaster.pid', true)).modification, pg_postmaster_start_time())))::int8 as postmaster_uptime_s,
extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -202,7 +220,16 @@ select
checksum_failures,
extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -232,7 +259,16 @@ select
checksum_failures,
extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -276,7 +312,16 @@ select
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -313,7 +358,16 @@ select
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -356,7 +410,16 @@ select
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -392,7 +455,16 @@ select
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down Expand Up @@ -6077,6 +6149,64 @@ GROUP BY
$sql$);


insert into pgwatch2.metric(m_name, m_pg_version_from, m_sql)
values (
'unused_indexes',
11,
$sql$
select /* pgwatch3_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
*
from (
select
format('%I.%I', sui.schemaname, sui.indexrelname) as tag_index_full_name,
sui.idx_scan,
coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b,
system_identifier::text as tag_sys_id /* to easily check also all replicas as could be still used there */
from
pg_stat_user_indexes sui
join pg_index i on i.indexrelid = sui.indexrelid
join pg_control_system() on true
where not sui.schemaname like E'pg\\_temp%'
and idx_scan = 0
and not (indisprimary or indisunique or indisexclusion)
and not exists (select * from pg_locks where relation = sui.relid and mode = 'AccessExclusiveLock')
) x
where index_size_b > 100*1024^2 /* list >100MB only */
order by index_size_b desc
limit 25;
$sql$);


insert into pgwatch2.metric(m_name, m_pg_version_from, m_sql)
values (
'invalid_indexes',
11,
$sql$
select /* pgwatch3_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
format('%I.%I', n.nspname , ci.relname) as tag_index_full_name,
coalesce(pg_relation_size(indexrelid), 0) as index_size_b
from
pg_index i
join pg_class ci on ci.oid = i.indexrelid
join pg_class cr on cr.oid = i.indrelid
join pg_namespace n on n.oid = ci.relnamespace
where not n.nspname like E'pg\\_temp%'
and not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)
and not exists (select * from pg_locks where relation = indexrelid and mode = 'AccessExclusiveLock') /* can't get size then */
order by index_size_b desc
limit 100;
$sql$);


/* Metric attributes */
-- truncate pgwatch3.metric_attribute;

Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/11/metric.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,16 @@ select /* pgwatch3_generated */
extract(epoch from (now() - pg_postmaster_start_time()))::int8 as postmaster_uptime_s,
extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/11/metric_su.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,16 @@ select /* pgwatch3_generated */
extract(epoch from (now() - coalesce((pg_stat_file('postmaster.pid', true)).modification, pg_postmaster_start_time())))::int8 as postmaster_uptime_s,
extract(epoch from (now() - pg_backup_start_time()))::int8 as backup_duration_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/12/metric.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@ select /* pgwatch3_generated */
checksum_failures,
extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/12/metric_su.sql
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,16 @@ select /* pgwatch3_generated */
checksum_failures,
extract(epoch from (now() - checksum_last_failure))::int8 as checksum_last_failure_s,
case when pg_is_in_recovery() then 1 else 0 end as in_recovery_int,
system_identifier::text as tag_sys_id
system_identifier::text as tag_sys_id,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/14/metric.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,16 @@ select /* pgwatch3_generated */
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/14/metric_su.sql
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,16 @@ select /* pgwatch3_generated */
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/15/metric.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,16 @@ select /* pgwatch3_generated */
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
11 changes: 10 additions & 1 deletion src/metrics/db_stats/15/metric_su.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,16 @@ select /* pgwatch3_generated */
sessions,
sessions_abandoned,
sessions_fatal,
sessions_killed
sessions_killed,
(select count(*) from pg_index i
where not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)) as invalid_indexes
from
pg_stat_database, pg_control_system()
where
Expand Down
21 changes: 21 additions & 0 deletions src/metrics/invalid_indexes/11/metric.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
select /* pgwatch3_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
format('%I.%I', n.nspname , ci.relname) as tag_index_full_name,
coalesce(pg_relation_size(indexrelid), 0) as index_size_b
from
pg_index i
join pg_class ci on ci.oid = i.indexrelid
join pg_class cr on cr.oid = i.indrelid
join pg_namespace n on n.oid = ci.relnamespace
where not n.nspname like E'pg\\_temp%'
and not indisvalid
and not exists ( /* leave out ones that are being actively rebuilt */
select * from pg_locks l
join pg_stat_activity a using (pid)
where l.relation = i.indexrelid
and a.state = 'active'
and a.query ~* 'concurrently'
)
and not exists (select * from pg_locks where relation = indexrelid and mode = 'AccessExclusiveLock') /* can't get size then */
order by index_size_b desc
limit 100;
21 changes: 21 additions & 0 deletions src/metrics/unused_indexes/11/metric.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
select /* pgwatch3_generated */
(extract(epoch from now()) * 1e9)::int8 as epoch_ns,
*
from (
select
format('%I.%I', sui.schemaname, sui.indexrelname) as tag_index_full_name,
sui.idx_scan,
coalesce(pg_relation_size(sui.indexrelid), 0) as index_size_b,
system_identifier::text as tag_sys_id /* to easily check also all replicas as could be still used there */
from
pg_stat_user_indexes sui
join pg_index i on i.indexrelid = sui.indexrelid
join pg_control_system() on true
where not sui.schemaname like E'pg\\_temp%'
and idx_scan = 0
and not (indisprimary or indisunique or indisexclusion)
and not exists (select * from pg_locks where relation = sui.relid and mode = 'AccessExclusiveLock')
) x
where index_size_b > 100*1024^2 /* list >100MB only */
order by index_size_b desc
limit 25;

0 comments on commit 45ed90e

Please sign in to comment.