Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions database/MIGRATION.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Database Migration Guide

## `cml_data_1h` continuous aggregate

**Branch:** `feature/performance-and-grafana-improvements`

`init.sql` only runs on a fresh database volume, so when deploying this branch
to a machine that already has data you must apply the migration manually.

### Steps

**1. Pull and redeploy the application**

```bash
git pull origin main
docker compose up -d --build
```

**2. Create the continuous aggregate**

```bash
docker compose exec database psql -U myuser -d mydatabase -c "
CREATE MATERIALIZED VIEW cml_data_1h
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
cml_id,
sublink_id,
MIN(rsl) AS rsl_min,
MAX(rsl) AS rsl_max,
AVG(rsl) AS rsl_avg,
MIN(tsl) AS tsl_min,
MAX(tsl) AS tsl_max,
AVG(tsl) AS tsl_avg
FROM cml_data
GROUP BY bucket, cml_id, sublink_id
WITH NO DATA;

SELECT add_continuous_aggregate_policy('cml_data_1h',
start_offset => INTERVAL '2 days',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
"
```

**3. Backfill historical data (one-time)**

```bash
docker compose exec database psql -U myuser -d mydatabase -c "
CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL);
"
```

This may take a few seconds depending on how much data is present. After it
completes the refresh policy keeps the view up to date automatically.
33 changes: 32 additions & 1 deletion database/init.sql
Original file line number Diff line number Diff line change
Expand Up @@ -81,4 +81,35 @@ SELECT create_hypertable('cml_data', 'time');

-- Index is created by the archive_loader service after bulk data load (faster COPY).
-- If no archive data is loaded, create it manually:
-- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC);
-- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC);

-- ---------------------------------------------------------------------------
-- 1-hour continuous aggregate for fast queries over large time ranges.
-- Grafana and the webserver automatically switch to this view when the
-- requested time range exceeds 3 days, reducing the scanned row count
-- by ~360x (10-second raw data → 1-hour buckets).
-- ---------------------------------------------------------------------------
CREATE MATERIALIZED VIEW cml_data_1h
WITH (timescaledb.continuous) AS
SELECT
time_bucket('1 hour', time) AS bucket,
cml_id,
sublink_id,
MIN(rsl) AS rsl_min,
MAX(rsl) AS rsl_max,
AVG(rsl) AS rsl_avg,
MIN(tsl) AS tsl_min,
MAX(tsl) AS tsl_max,
AVG(tsl) AS tsl_avg
FROM cml_data
GROUP BY bucket, cml_id, sublink_id
WITH NO DATA;

-- Automatically refresh every hour, covering up to 2 days of history.
-- The 1-hour end_offset prevents partial (in-progress) buckets from being
-- materialised prematurely; very recent data reads through to raw cml_data.
SELECT add_continuous_aggregate_policy('cml_data_1h',
start_offset => INTERVAL '2 days',
end_offset => INTERVAL '1 hour',
schedule_interval => INTERVAL '1 hour'
);
9 changes: 9 additions & 0 deletions database/init_archive_data.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,3 +84,12 @@ EOSQL

echo "Archive data successfully loaded!"
# Note: cml_stats is populated by the parser's background stats thread on startup.

# Refresh the 1-hour continuous aggregate so that Grafana and the webserver can
# immediately serve pre-aggregated data for large time ranges without scanning
# the full raw cml_data table.
echo "Refreshing 1h continuous aggregate (cml_data_1h)..."
psql $PSQL_FLAGS <<-EOSQL
CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL);
EOSQL
echo "Continuous aggregate refresh complete."
196 changes: 196 additions & 0 deletions grafana/provisioning/dashboards/definitions/cml-archive.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
{
"id": null,
"uid": "cml-archive",
"title": "CML Archive",
"tags": [],
"timezone": "browser",
"schemaVersion": 36,
"version": 1,
"refresh": "",
"time": {
"from": "now-1M",
"to": "now"
},
"panels": [
{
"id": 1,
"title": "Active Sublinks per Hour",
"type": "timeseries",
"datasource": {
"type": "grafana-postgresql-datasource",
"uid": "PostgreSQL"
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 0
},
"targets": [
{
"datasource": {
"type": "grafana-postgresql-datasource",
"uid": "PostgreSQL"
},
"format": "time_series",
"rawQuery": true,
"rawSql": "SELECT\n bucket AS \"time\",\n 'sublinks' AS metric,\n COUNT(*) AS value\nFROM cml_data_1h\nWHERE bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nGROUP BY bucket\nORDER BY 1 ASC",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "blue",
"mode": "fixed"
},
"custom": {
"drawStyle": "bars",
"barAlignment": 0,
"lineWidth": 1,
"fillOpacity": 60,
"gradientMode": "none",
"spanNulls": false,
"showPoints": "never",
"stacking": {
"mode": "none",
"group": "A"
},
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Sublinks",
"axisPlacement": "auto",
"scaleDistribution": {
"type": "linear"
},
"hideFrom": {
"tooltip": false,
"viz": false,
"legend": false
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short",
"decimals": 0,
"displayName": "Active sublinks"
},
"overrides": []
},
"options": {
"tooltip": {
"mode": "single",
"sort": "none"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom"
}
}
},
{
"id": 2,
"title": "Approximate Data Points per Hour",
"type": "timeseries",
"datasource": {
"type": "grafana-postgresql-datasource",
"uid": "PostgreSQL"
},
"gridPos": {
"h": 9,
"w": 24,
"x": 0,
"y": 9
},
"targets": [
{
"datasource": {
"type": "grafana-postgresql-datasource",
"uid": "PostgreSQL"
},
"format": "time_series",
"rawQuery": true,
"rawSql": "SELECT\n bucket AS \"time\",\n 'data points' AS metric,\n COUNT(*) * 360 AS value\nFROM cml_data_1h\nWHERE bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nGROUP BY bucket\nORDER BY 1 ASC",
"refId": "A"
}
],
"fieldConfig": {
"defaults": {
"color": {
"fixedColor": "semi-dark-green",
"mode": "fixed"
},
"custom": {
"drawStyle": "bars",
"barAlignment": 0,
"lineWidth": 1,
"fillOpacity": 60,
"gradientMode": "none",
"spanNulls": false,
"showPoints": "never",
"stacking": {
"mode": "none",
"group": "A"
},
"axisCenteredZero": false,
"axisColorMode": "text",
"axisLabel": "Data points",
"axisPlacement": "auto",
"scaleDistribution": {
"type": "linear"
},
"hideFrom": {
"tooltip": false,
"viz": false,
"legend": false
},
"thresholdsStyle": {
"mode": "off"
}
},
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
}
]
},
"unit": "short",
"decimals": 0,
"displayName": "Approx. data points"
},
"overrides": []
},
"options": {
"tooltip": {
"mode": "single",
"sort": "none"
},
"legend": {
"displayMode": "hidden",
"placement": "bottom"
}
}
}
],
"templating": {
"list": []
},
"annotations": {
"list": []
}
}
Loading
Loading