From a88ebb2781cca9a71506b352a3dcd81cf0b38242 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 25 Mar 2026 17:33:50 +0100 Subject: [PATCH 1/5] Add 1h pre-aggregated data and improve real-time dashboard - Add TimescaleDB continuous aggregate cml_data_1h (1h min/max/avg) - Remove Altair time-series from real-time route; keep for archive only - Grafana: show 1h min/max band always, avg/raw switching on zoom level - Grafana: per-sublink colors, interval selector (Auto/Raw) --- database/init.sql | 33 +- database/init_archive_data.sh | 9 + .../dashboards/definitions/cml-realtime.json | 549 ++++++++++++++++-- webserver/main.py | 76 --- 4 files changed, 550 insertions(+), 117 deletions(-) diff --git a/database/init.sql b/database/init.sql index 3483d87..ce79455 100644 --- a/database/init.sql +++ b/database/init.sql @@ -81,4 +81,35 @@ SELECT create_hypertable('cml_data', 'time'); -- Index is created by the archive_loader service after bulk data load (faster COPY). -- If no archive data is loaded, create it manually: --- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC); \ No newline at end of file +-- CREATE INDEX idx_cml_data_cml_id ON cml_data (cml_id, time DESC); + +-- --------------------------------------------------------------------------- +-- 1-hour continuous aggregate for fast queries over large time ranges. +-- Grafana and the webserver automatically switch to this view when the +-- requested time range exceeds 3 days, reducing the scanned row count +-- by ~360x (10-second raw data → 1-hour buckets). +-- --------------------------------------------------------------------------- +CREATE MATERIALIZED VIEW cml_data_1h +WITH (timescaledb.continuous) AS +SELECT + time_bucket('1 hour', time) AS bucket, + cml_id, + sublink_id, + MIN(rsl) AS rsl_min, + MAX(rsl) AS rsl_max, + AVG(rsl) AS rsl_avg, + MIN(tsl) AS tsl_min, + MAX(tsl) AS tsl_max, + AVG(tsl) AS tsl_avg +FROM cml_data +GROUP BY bucket, cml_id, sublink_id +WITH NO DATA; + +-- Automatically refresh every hour, covering up to 2 days of history. +-- The 1-hour end_offset prevents partial (in-progress) buckets from being +-- materialised prematurely; very recent data reads through to raw cml_data. +SELECT add_continuous_aggregate_policy('cml_data_1h', + start_offset => INTERVAL '2 days', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '1 hour' +); \ No newline at end of file diff --git a/database/init_archive_data.sh b/database/init_archive_data.sh index 9b09177..f4b60f6 100755 --- a/database/init_archive_data.sh +++ b/database/init_archive_data.sh @@ -84,3 +84,12 @@ EOSQL echo "Archive data successfully loaded!" # Note: cml_stats is populated by the parser's background stats thread on startup. + +# Refresh the 1-hour continuous aggregate so that Grafana and the webserver can +# immediately serve pre-aggregated data for large time ranges without scanning +# the full raw cml_data table. +echo "Refreshing 1h continuous aggregate (cml_data_1h)..." +psql $PSQL_FLAGS <<-EOSQL + CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL); +EOSQL +echo "Continuous aggregate refresh complete." diff --git a/grafana/provisioning/dashboards/definitions/cml-realtime.json b/grafana/provisioning/dashboards/definitions/cml-realtime.json index 6059cf5..831d2ca 100644 --- a/grafana/provisioning/dashboards/definitions/cml-realtime.json +++ b/grafana/provisioning/dashboards/definitions/cml-realtime.json @@ -101,7 +101,7 @@ }, "lineInterpolation": "linear", "lineWidth": 2, - "pointSize": 5, + "pointSize": 4, "scaleDistribution": { "type": "linear" }, @@ -127,7 +127,224 @@ }, "unit": "dBm" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "sublink_1 min" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.pointSize", + "value": 0 + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2 min" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.pointSize", + "value": 0 + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_1 max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 20 + }, + { + "id": "custom.fillBelowTo", + "value": "sublink_1 min" + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2 max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 20 + }, + { + "id": "custom.fillBelowTo", + "value": "sublink_2 min" + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_1 avg" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2 avg" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_1" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -144,7 +361,7 @@ "showLegend": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -156,8 +373,38 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS \"metric\",\n rsl AS \"value\"\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${aggregation}' = 'RAW'\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nUNION ALL\nSELECT\n time_bucket((CASE WHEN '${interval}' = 'auto' THEN (${__interval_ms} || ' milliseconds') ELSE '${interval}' END)::interval, time) AS \"time\",\n sublink_id AS \"metric\",\n CASE\n WHEN '${aggregation}' = 'MEDIAN' THEN PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY rsl)\n WHEN '${aggregation}' = 'AVG' THEN AVG(rsl)\n WHEN '${aggregation}' = 'MIN' THEN MIN(rsl)\n WHEN '${aggregation}' = 'MAX' THEN MAX(rsl)\n WHEN '${aggregation}' = 'STDDEV' THEN STDDEV(rsl)\n END AS \"value\"\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${aggregation}' <> 'RAW'\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nGROUP BY time_bucket((CASE WHEN '${interval}' = 'auto' THEN (${__interval_ms} || ' milliseconds') ELSE '${interval}' END)::interval, time), sublink_id\nORDER BY \"time\" ASC", + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' min' AS metric,\n rsl_min AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", "refId": "A" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' max' AS metric,\n rsl_max AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "B" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' avg' AS metric,\n rsl_avg AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) > 259200\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "C" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS metric,\n rsl AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND (\n ('${interval}' = 'auto' AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) <= 259200)\n OR '${interval}' = 'raw'\n )\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "D" } ], "title": "CML Time Series - Received Signal Level", @@ -189,7 +436,7 @@ }, "lineInterpolation": "linear", "lineWidth": 2, - "pointSize": 5, + "pointSize": 4, "scaleDistribution": { "type": "linear" }, @@ -215,7 +462,224 @@ }, "unit": "dB" }, - "overrides": [] + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "sublink_1 min" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.pointSize", + "value": 0 + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2 min" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 0 + }, + { + "id": "custom.pointSize", + "value": 0 + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_1 max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 20 + }, + { + "id": "custom.fillBelowTo", + "value": "sublink_1 min" + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2 max" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 0 + }, + { + "id": "custom.fillOpacity", + "value": 20 + }, + { + "id": "custom.fillBelowTo", + "value": "sublink_2 min" + }, + { + "id": "custom.hideFrom", + "value": { + "tooltip": true, + "viz": false, + "legend": true + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_1 avg" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2 avg" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + }, + { + "id": "custom.lineWidth", + "value": 2 + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_1" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "blue", + "mode": "fixed" + } + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "sublink_2" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "semi-dark-orange", + "mode": "fixed" + } + } + ] + } + ] }, "gridPos": { "h": 8, @@ -232,7 +696,7 @@ "showLegend": true }, "tooltip": { - "mode": "single", + "mode": "multi", "sort": "none" } }, @@ -244,8 +708,38 @@ }, "format": "time_series", "rawQuery": true, - "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS \"metric\",\n tsl AS \"value\"\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${aggregation}' = 'RAW'\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nUNION ALL\nSELECT\n time_bucket((CASE WHEN '${interval}' = 'auto' THEN (${__interval_ms} || ' milliseconds') ELSE '${interval}' END)::interval, time) AS \"time\",\n sublink_id AS \"metric\",\n CASE\n WHEN '${aggregation}' = 'MEDIAN' THEN PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY tsl)\n WHEN '${aggregation}' = 'AVG' THEN AVG(tsl)\n WHEN '${aggregation}' = 'MIN' THEN MIN(tsl)\n WHEN '${aggregation}' = 'MAX' THEN MAX(tsl)\n WHEN '${aggregation}' = 'STDDEV' THEN STDDEV(tsl)\n END AS \"value\"\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND '${aggregation}' <> 'RAW'\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nGROUP BY time_bucket((CASE WHEN '${interval}' = 'auto' THEN (${__interval_ms} || ' milliseconds') ELSE '${interval}' END)::interval, time), sublink_id\nORDER BY \"time\" ASC", + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' min' AS metric,\n tsl_min AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", "refId": "A" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' max' AS metric,\n tsl_max AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "B" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n bucket AS \"time\",\n sublink_id || ' avg' AS metric,\n tsl_avg AS value\nFROM cml_data_1h\nWHERE cml_id = '${cml_id}'\n AND '${interval}' = 'auto'\n AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) > 259200\n AND bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "C" + }, + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n time AS \"time\",\n sublink_id AS metric,\n tsl AS value\nFROM cml_data\nWHERE cml_id = '${cml_id}'\n AND (\n ('${interval}' = 'auto' AND EXTRACT(EPOCH FROM ($__timeTo()::timestamptz - $__timeFrom()::timestamptz)) <= 259200)\n OR '${interval}' = 'raw'\n )\n AND time >= $__timeFrom()::timestamptz\n AND time <= $__timeTo()::timestamptz\nORDER BY 1 ASC", + "refId": "D" } ], "title": "CML Time Series - Transmitted Signal Level", @@ -402,8 +896,8 @@ { "current": { "selected": true, - "text": "1min", - "value": "1 minute" + "text": "Auto", + "value": "auto" }, "description": "Time aggregation interval for downsampling data", "hide": 0, @@ -413,42 +907,17 @@ "name": "interval", "options": [ { - "selected": false, + "selected": true, "text": "Auto", "value": "auto" }, - { - "selected": true, - "text": "1min", - "value": "1 minute" - }, - { - "selected": false, - "text": "5min", - "value": "5 minutes" - }, - { - "selected": false, - "text": "15min", - "value": "15 minutes" - }, - { - "selected": false, - "text": "1h", - "value": "1 hour" - }, { "selected": false, - "text": "6h", - "value": "6 hours" - }, - { - "selected": false, - "text": "1d", - "value": "1 day" + "text": "Raw", + "value": "raw" } ], - "query": "Auto : auto,1min : 1 minute,5min : 5 minutes,15min : 15 minutes,1h : 1 hour,6h : 6 hours,1d : 1 day", + "query": "Auto : auto,Raw : raw", "queryValue": "", "skipUrlSync": true, "type": "custom" @@ -460,7 +929,7 @@ "value": "AVG" }, "description": "Aggregation function for downsampling", - "hide": 0, + "hide": 2, "includeAll": false, "label": "Aggregation", "multi": false, diff --git a/webserver/main.py b/webserver/main.py index b0487d0..567ffb1 100644 --- a/webserver/main.py +++ b/webserver/main.py @@ -280,80 +280,18 @@ def get_available_cmls(): return [] -def generate_time_series_plot(cml_id, sublink_id="sublink_1", hours=168): - """Generate a time series plot for a specific CML (last 7 days by default)""" - try: - conn = get_db_connection() - if not conn: - return None - - # Query data for the last 7 days (168 hours) relative to the latest data point - query = """ - SELECT time, rsl - FROM cml_data - WHERE cml_id = %s AND sublink_id = %s - AND time >= (SELECT MAX(time) FROM cml_data WHERE cml_id = %s) - INTERVAL '7 days' - ORDER BY time - """ - df = pd.read_sql_query(query, conn, params=(cml_id, sublink_id, cml_id)) - conn.close() - - if df.empty: - return None - - # Ensure Altair uses the default (light) theme and create plot with light styling - try: - alt.themes.enable("default") - except Exception: - pass - - df["time"] = pd.to_datetime(df["time"]) - chart = ( - alt.Chart(df) - .mark_line(color="#1f77b4", point=True) - .encode(x="time:T", y="rsl:Q", tooltip=["time:T", "rsl:Q"]) - .properties( - width=800, height=400, title=f"Received Signal Level - CML {cml_id}" - ) - .configure_view( - stroke="transparent", - ) - .configure_title(fontSize=16, anchor="start", color="#111") - .configure_axis(labelColor="#333", titleColor="#333", gridColor="#e6e6e6") - .configure_legend(labelColor="#333", titleColor="#333") - .configure_background("#ffffff") - .interactive() - ) - - chart_html = chart.to_html() - # Ensure the embedded HTML uses a light background when inserted into pages - try: - if "" in chart_html: - inject = "" - chart_html = chart_html.replace("", inject + "") - except Exception: - pass - - return chart_html - except Exception as e: - print(f"Error generating time series plot: {e}") - return None - - @app.route("/realtime") def realtime(): """Real-time data page""" map_html = generate_cml_map() cmls = get_available_cmls() default_cml = cmls[0] if cmls else None - plot_html = generate_time_series_plot(default_cml) if default_cml else None return render_template( "realtime.html", map_html=map_html, cmls=cmls, selected_cml=default_cml, - plot_html=plot_html, ) @@ -465,20 +403,6 @@ def api_cml_map(): return jsonify([]) -@app.route("/api/timeseries/") -def api_timeseries(cml_id): - """API endpoint for fetching time series data""" - hours = request.args.get("hours", 24, type=int) - plot_html = generate_time_series_plot(cml_id, hours=hours) - if not plot_html: - return jsonify( - { - "html": "
No data available for this CML
" - } - ) - return jsonify({"html": plot_html}) - - @app.route("/api/cml-stats") def api_cml_stats(): """API endpoint for fetching per-CML statistics for data quality visualization""" From 2bfa86b62913537fe7776078232360f3e7cf1020 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 25 Mar 2026 17:57:16 +0100 Subject: [PATCH 2/5] Avoid full table scans on cml_data for stats queries - Replace COUNT(*) with approximate_row_count() for record totals - Replace MIN/MAX(time) FROM cml_data with MIN/MAX(bucket) FROM cml_data_1h - Replace per-CML COUNT GROUP BY with cml_data_1h aggregate estimate --- webserver/main.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/webserver/main.py b/webserver/main.py index 567ffb1..f4b5dca 100644 --- a/webserver/main.py +++ b/webserver/main.py @@ -76,12 +76,12 @@ def overview(): cur.execute("SELECT COUNT(DISTINCT cml_id) FROM cml_metadata") stats["total_cmls"] = cur.fetchone()[0] - # Get count of data records - cur.execute("SELECT COUNT(*) FROM cml_data") + # Get approximate count of data records (fast on large tables) + cur.execute("SELECT approximate_row_count('cml_data')") stats["total_records"] = cur.fetchone()[0] - # Get data date range - cur.execute("SELECT MIN(time), MAX(time) FROM cml_data") + # Get data date range (from 1h aggregate — fast, indexed) + cur.execute("SELECT MIN(bucket), MAX(bucket) FROM cml_data_1h") result = cur.fetchone() if result: stats["data_start_date"] = result[0] @@ -430,7 +430,7 @@ def api_cml_stats(): LEFT JOIN ( SELECT cml_id, rsl FROM cml_data - WHERE time >= (SELECT MAX(time) FROM cml_data) - INTERVAL '60 minutes' + WHERE time >= (SELECT MAX(bucket) FROM cml_data_1h) - INTERVAL '60 minutes' ) cd ON cs.cml_id = cd.cml_id GROUP BY cs.cml_id, cs.total_records, cs.valid_records, cs.null_records, cs.completeness_percent, cs.min_rsl, cs.max_rsl, cs.mean_rsl, @@ -473,7 +473,7 @@ def api_data_time_range(): return jsonify({"earliest": None, "latest": None}) cur = conn.cursor() - cur.execute("SELECT MIN(time), MAX(time) FROM cml_data") + cur.execute("SELECT MIN(bucket), MAX(bucket) FROM cml_data_1h") result = cur.fetchone() cur.close() conn.close() @@ -509,28 +509,28 @@ def get_archive_statistics(): cur = conn.cursor() - # Total records - cur.execute("SELECT COUNT(*) FROM cml_data") + # Total records (approximate, fast on large tables) + cur.execute("SELECT approximate_row_count('cml_data')") stats["total_records"] = cur.fetchone()[0] # CML count cur.execute("SELECT COUNT(DISTINCT cml_id) FROM cml_metadata") stats["cml_count"] = cur.fetchone()[0] - # Date range - cur.execute("SELECT MIN(time), MAX(time) FROM cml_data") + # Date range (from 1h aggregate — fast, indexed) + cur.execute("SELECT MIN(bucket), MAX(bucket) FROM cml_data_1h") result = cur.fetchone() if result: stats["date_range"]["start"] = result[0] stats["date_range"]["end"] = result[1] - # Records per CML + # Records per CML (from 1h aggregate — fast, no full table scan) cur.execute( """ - SELECT cml_id, COUNT(*) as count - FROM cml_data - GROUP BY cml_id - ORDER BY count DESC + SELECT cml_id, COUNT(*) * 360 as count + FROM cml_data_1h + GROUP BY cml_id + ORDER BY count DESC LIMIT 10 """ ) From c205f3d40a5a6c72240a52910e84848d5dbaee55 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 25 Mar 2026 18:22:51 +0100 Subject: [PATCH 3/5] Replace Altair archive chart with embedded Grafana dashboard - Remove generate_archive_charts() and pandas/altair imports - Add cml-archive Grafana dashboard (active sublinks + data points per hour) - Simplify archive page: remove header, metric cards and top-CML table - Add compact summary bar with record count, CML count, date range --- .../dashboards/definitions/cml-archive.json | 196 ++++++++++++++++++ webserver/main.py | 62 +----- webserver/templates/archive.html | 193 +++-------------- 3 files changed, 225 insertions(+), 226 deletions(-) create mode 100644 grafana/provisioning/dashboards/definitions/cml-archive.json diff --git a/grafana/provisioning/dashboards/definitions/cml-archive.json b/grafana/provisioning/dashboards/definitions/cml-archive.json new file mode 100644 index 0000000..b76ad5c --- /dev/null +++ b/grafana/provisioning/dashboards/definitions/cml-archive.json @@ -0,0 +1,196 @@ +{ + "id": null, + "uid": "cml-archive", + "title": "CML Archive", + "tags": [], + "timezone": "browser", + "schemaVersion": 36, + "version": 1, + "refresh": "", + "time": { + "from": "now-1M", + "to": "now" + }, + "panels": [ + { + "id": 1, + "title": "Active Sublinks per Hour", + "type": "timeseries", + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 0 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n bucket AS \"time\",\n 'sublinks' AS metric,\n COUNT(*) AS value\nFROM cml_data_1h\nWHERE bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nGROUP BY bucket\nORDER BY 1 ASC", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "blue", + "mode": "fixed" + }, + "custom": { + "drawStyle": "bars", + "barAlignment": 0, + "lineWidth": 1, + "fillOpacity": 60, + "gradientMode": "none", + "spanNulls": false, + "showPoints": "never", + "stacking": { + "mode": "none", + "group": "A" + }, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Sublinks", + "axisPlacement": "auto", + "scaleDistribution": { + "type": "linear" + }, + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short", + "decimals": 0, + "displayName": "Active sublinks" + }, + "overrides": [] + }, + "options": { + "tooltip": { + "mode": "single", + "sort": "none" + }, + "legend": { + "displayMode": "hidden", + "placement": "bottom" + } + } + }, + { + "id": 2, + "title": "Approximate Data Points per Hour", + "type": "timeseries", + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "gridPos": { + "h": 9, + "w": 24, + "x": 0, + "y": 9 + }, + "targets": [ + { + "datasource": { + "type": "grafana-postgresql-datasource", + "uid": "PostgreSQL" + }, + "format": "time_series", + "rawQuery": true, + "rawSql": "SELECT\n bucket AS \"time\",\n 'data points' AS metric,\n COUNT(*) * 360 AS value\nFROM cml_data_1h\nWHERE bucket >= $__timeFrom()::timestamptz\n AND bucket <= $__timeTo()::timestamptz\nGROUP BY bucket\nORDER BY 1 ASC", + "refId": "A" + } + ], + "fieldConfig": { + "defaults": { + "color": { + "fixedColor": "semi-dark-green", + "mode": "fixed" + }, + "custom": { + "drawStyle": "bars", + "barAlignment": 0, + "lineWidth": 1, + "fillOpacity": 60, + "gradientMode": "none", + "spanNulls": false, + "showPoints": "never", + "stacking": { + "mode": "none", + "group": "A" + }, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "Data points", + "axisPlacement": "auto", + "scaleDistribution": { + "type": "linear" + }, + "hideFrom": { + "tooltip": false, + "viz": false, + "legend": false + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short", + "decimals": 0, + "displayName": "Approx. data points" + }, + "overrides": [] + }, + "options": { + "tooltip": { + "mode": "single", + "sort": "none" + }, + "legend": { + "displayMode": "hidden", + "placement": "bottom" + } + } + } + ], + "templating": { + "list": [] + }, + "annotations": { + "list": [] + } +} \ No newline at end of file diff --git a/webserver/main.py b/webserver/main.py index f4b5dca..9cfb53b 100644 --- a/webserver/main.py +++ b/webserver/main.py @@ -2,9 +2,7 @@ import time import math import psycopg2 -import pandas as pd import folium -import altair as alt import requests from flask import Flask, render_template, request, jsonify, Response, redirect from datetime import datetime, timedelta @@ -498,8 +496,6 @@ def get_archive_statistics(): "total_records": 0, "cml_count": 0, "date_range": {"start": None, "end": None}, - "records_per_cml": [], - "uptime_stats": {"online": 0, "offline": 0}, } try: @@ -524,20 +520,6 @@ def get_archive_statistics(): stats["date_range"]["start"] = result[0] stats["date_range"]["end"] = result[1] - # Records per CML (from 1h aggregate — fast, no full table scan) - cur.execute( - """ - SELECT cml_id, COUNT(*) * 360 as count - FROM cml_data_1h - GROUP BY cml_id - ORDER BY count DESC - LIMIT 10 - """ - ) - stats["records_per_cml"] = [ - {"cml_id": row[0], "count": row[1]} for row in cur.fetchall() - ] - cur.close() conn.close() except Exception as e: @@ -546,53 +528,11 @@ def get_archive_statistics(): return stats -def generate_archive_charts(): - """Generate charts for archive statistics""" - try: - conn = get_db_connection() - if not conn: - return {"data_distribution": None} - - # Get data distribution by minute - query = """ - SELECT DATE_TRUNC('minute', time) as minute, COUNT(*) as count - FROM cml_data - GROUP BY DATE_TRUNC('minute', time) - ORDER BY minute - """ - df = pd.read_sql_query(query, conn) - conn.close() - - if df.empty: - return {"data_distribution": None} - - # Convert minute column to datetime for proper sorting - df["minute"] = pd.to_datetime(df["minute"]) - - # Create bar chart - chart = ( - alt.Chart(df) - .mark_bar() - .encode(x="minute:T", y="count:Q", tooltip=["minute:T", "count:Q"]) - .properties(width=900, height=400, title="Data Records per Minute") - .interactive() - ) - - return {"data_distribution": chart.to_html()} - except Exception as e: - print(f"Error generating archive charts: {e}") - return {"data_distribution": None} - - @app.route("/archive") def archive(): """Archive statistics page""" stats = get_archive_statistics() - charts = generate_archive_charts() - - return render_template( - "archive.html", stats=stats, chart_html=charts["data_distribution"] - ) + return render_template("archive.html", stats=stats) # ==================== DATA UPLOADS ROUTES ==================== diff --git a/webserver/templates/archive.html b/webserver/templates/archive.html index ae9bd8a..e350d35 100644 --- a/webserver/templates/archive.html +++ b/webserver/templates/archive.html @@ -4,179 +4,42 @@ {% block content %}
- -
-

- Data Archive -

-

Historical data analysis and statistics

-
- - -
-
-
-
{{ "{:,}".format(stats.total_records) }}
-
- Total Records -
- Records in archive -
- -
-
{{ stats.cml_count }}
-
- Active CMLs -
- Monitored links -
-
-
- {% if stats.date_range and stats.date_range.start and stats.date_range.end %} - {{ (stats.date_range.end - stats.date_range.start).days }} - {% else %} - 0 - {% endif %} -
-
- Days Archived -
- Data collection span -
-
-
- - -
-

- Archive Timeline -

- {% if stats.date_range and stats.date_range.start and stats.date_range.end %} -
-
-
-
-
- Archive - Start -
- {{ stats.date_range.start.strftime('%Y-%m-%d %H:%M:%S') }} -
-
-
-
-
- Latest - Record -
- {{ stats.date_range.end.strftime('%Y-%m-%d %H:%M:%S') }} -
-
-
-
-
-
-
-
-
- {% else %} -
- - No archived data available yet + +
+
+ + + ~{{ "{:,}".format(stats.total_records) }} records + + + + {{ stats.cml_count }} CMLs + + {% if stats.date_range and stats.date_range.start and stats.date_range.end %} + + + {{ (stats.date_range.end - stats.date_range.start).days }} days archived + + + {{ stats.date_range.start.strftime('%Y-%m-%d') }} + → + {{ stats.date_range.end.strftime('%Y-%m-%d') }} + + {% endif %}
- {% endif %}
- -
-

- Data Distribution -

- - {% if chart_html %} + +
-
- {{ chart_html | safe }} +
+
- {% else %} -
- - No data available to display charts -
- {% endif %}
- -
-

- Top 10 Most Active CMLs -

- - {% if stats.records_per_cml %} -
-
- - - - - - - - - - - {% set total = stats.total_records %} - {% for item in stats.records_per_cml %} - - - - - - - {% endfor %} - -
Rank - CML - ID - Records - Share
- - {{ loop.index }} - - - {{ item.cml_id }} - - - {{ "{:,}".format(item.count) }} - - -
-
- {% if total > 0 %}{{ "%.1f" | format((item.count / total) * 100) }}%{% else - %}N/A{% endif %} -
-
-
-
-
- {% else %} -
- - No CML data available -
- {% endif %} -
{% endblock %} \ No newline at end of file From 105e8015e2d4e99f44fbba0c345125c0167ba9af Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 25 Mar 2026 18:38:15 +0100 Subject: [PATCH 4/5] Add database migration guide for `cml_data_1h` continuous aggregate --- database/MIGRATION.md | 56 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 database/MIGRATION.md diff --git a/database/MIGRATION.md b/database/MIGRATION.md new file mode 100644 index 0000000..b899064 --- /dev/null +++ b/database/MIGRATION.md @@ -0,0 +1,56 @@ +# Database Migration Guide + +## `cml_data_1h` continuous aggregate + +**Branch:** `feature/performance-and-grafana-improvements` + +`init.sql` only runs on a fresh database volume, so when deploying this branch +to a machine that already has data you must apply the migration manually. + +### Steps + +**1. Pull and redeploy the application** + +```bash +git pull origin main +docker compose up -d --build +``` + +**2. Create the continuous aggregate** + +```bash +docker compose exec database psql -U myuser -d mydatabase -c " +CREATE MATERIALIZED VIEW cml_data_1h +WITH (timescaledb.continuous) AS +SELECT + time_bucket('1 hour', time) AS bucket, + cml_id, + sublink_id, + MIN(rsl) AS rsl_min, + MAX(rsl) AS rsl_max, + AVG(rsl) AS rsl_avg, + MIN(tsl) AS tsl_min, + MAX(tsl) AS tsl_max, + AVG(tsl) AS tsl_avg +FROM cml_data +GROUP BY bucket, cml_id, sublink_id +WITH NO DATA; + +SELECT add_continuous_aggregate_policy('cml_data_1h', + start_offset => INTERVAL '2 days', + end_offset => INTERVAL '1 hour', + schedule_interval => INTERVAL '1 hour' +); +" +``` + +**3. Backfill historical data (one-time)** + +```bash +docker compose exec database psql -U myuser -d mydatabase -c " +CALL refresh_continuous_aggregate('cml_data_1h', NULL, NULL); +" +``` + +This may take a few seconds depending on how much data is present. After it +completes the refresh policy keeps the view up to date automatically. From 5067a8a37142488c63f7d911d20e80cb40db45e2 Mon Sep 17 00:00:00 2001 From: Christian Chwala Date: Wed, 25 Mar 2026 18:38:35 +0100 Subject: [PATCH 5/5] Remove unused dependencies from requirements and tests --- webserver/requirements.txt | 3 --- webserver/tests/test_api_cml_stats.py | 1 - 2 files changed, 4 deletions(-) diff --git a/webserver/requirements.txt b/webserver/requirements.txt index 7efb6d7..0111892 100644 --- a/webserver/requirements.txt +++ b/webserver/requirements.txt @@ -1,9 +1,6 @@ Flask==2.3.3 psycopg2-binary==2.9.7 -pandas==2.0.3 -numpy==1.24.3 folium==0.14.0 -altair==5.0.1 gunicorn==22.0.0 requests==2.31.0 diff --git a/webserver/tests/test_api_cml_stats.py b/webserver/tests/test_api_cml_stats.py index 5838380..36b85e7 100644 --- a/webserver/tests/test_api_cml_stats.py +++ b/webserver/tests/test_api_cml_stats.py @@ -6,7 +6,6 @@ # Ensure optional heavy imports won't fail at import time sys.modules.setdefault("folium", Mock()) -sys.modules.setdefault("altair", Mock()) sys.modules.setdefault("requests", Mock())