diff --git a/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report.ipynb b/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report.ipynb
index 8fdf547..eddb1c4 100644
--- a/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report.ipynb
+++ b/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report.ipynb
@@ -103,7 +103,8 @@
"required_packages <- c(\n",
" \"tidyverse\", \n",
" \"arrow\", \n",
- " \"sf\", \n",
+ " \"sf\",\n",
+ " \"IRdisplay\",\n",
" \"reticulate\",\n",
" \"patchwork\"\n",
") \n",
@@ -171,18 +172,6 @@
"ADM_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4b96fa16-25cc-4420-9ad8-332af4a59fdf",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": null,
@@ -272,24 +261,6 @@
"printdim(shapes_data)"
]
},
- {
- "cell_type": "markdown",
- "id": "c881f748-e391-46c9-a36a-ed11c238a6ce",
- "metadata": {},
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "65ea60f5-99e9-46d1-89f0-03245d9efd0b",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": []
- },
{
"cell_type": "markdown",
"id": "e3d5b582-a38f-4ce0-a9a2-9a53ab5eb233",
@@ -847,326 +818,6 @@
"}\n"
]
},
- {
- "cell_type": "markdown",
- "id": "eb276692",
- "metadata": {},
- "source": [
- "## ⚠️ 3.2. Carte de la Population Désagrégée (spécifique au pays)\n",
- "Le code suivant est spécifique à chaque pays et repose sur une population désagrégée. "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4d33724e",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": [
- "population_data_filtered <- population_data\n",
- "if (COUNTRY_CODE == \"NER\") {\n",
- " print(\"🇳🇪 Executing NER specific code ... \")\n",
- "\n",
- " IRdisplay::display_markdown(\"\n",
- " ### 🇳🇪 NER specific code \n",
- " Made ad hoc to allow comparison with data from other or previous analyses. Namely:\n",
- " * only year 2022 to 2024\n",
- " * specific palette (yellowish to brick red)\n",
- " * specific intervals\n",
- " * looks at **disaggregated** population <- this is sometimes contry-specific!\n",
- "\")\n",
- "\n",
- " # --- Filter data to keep only 2022-2024 ... ---\n",
- " years_to_keep <- 2022:2024\n",
- " population_data_filtered <- population_data |> filter(YEAR %in% years_to_keep)\n",
- "\n",
- " # --- Read data from SNT_metadata.json ---\n",
- " metadata_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, \"SNT_metadata.json\"))},\n",
- " error = function(e) {\n",
- " msg <- paste0(\"Error while loading metadata\", conditionMessage(e)) \n",
- " cat(msg) \n",
- " stop(msg) \n",
- " })\n",
- "\n",
- " # --- Assign population breaks from metadata ---\n",
- " value_breaks_tot <- jsonlite::fromJSON(metadata_json$POPULATION_TOTAL$SCALE)\n",
- " value_breaks_u5 <- jsonlite::fromJSON(metadata_json$POPULATION_U5$SCALE)\n",
- " value_breaks_fe <- jsonlite::fromJSON(metadata_json$POPULATION_PREGNANT$SCALE)\n",
- "\n",
- " # --- Define function to create dyanic labels based on breaks for pop category ---\n",
- " create_dynamic_labels <- function(breaks) {\n",
- " fmt <- function(x) {\n",
- " format(x / 1000, big.mark = \"'\", scientific = FALSE, trim = TRUE)\n",
- " }\n",
- " \n",
- " labels <- c(\n",
- " paste0(\"< \", fmt(breaks[1]), \"k\"), # First label\n",
- " paste0(fmt(breaks[-length(breaks)]), \" - \", fmt(breaks[-1]), \"k\"), # Middle\n",
- " paste0(\"> \", fmt(breaks[length(breaks)]), \"k\") # Last label\n",
- " ) \n",
- " return(labels)\n",
- " }\n",
- "\n",
- " # --- Create dynamic labels based on breaks ---\n",
- " labels_tot <- create_dynamic_labels(value_breaks_tot)\n",
- " labels_u5 <- create_dynamic_labels(value_breaks_u5)\n",
- " labels_fe <- create_dynamic_labels(value_breaks_fe)\n",
- "\n",
- "}"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "0fdb96a0-873d-4f85-9c34-23c89c204c30",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": [
- "NER_palette_population <- c(\n",
- " \"1\" = \"#fae6db\",\n",
- " \"2\" = \"#f1b195\",\n",
- " \"3\" = \"#ea7354\",\n",
- " \"4\" = \"#cc3f32\",\n",
- " \"5\" = \"#972620\"\n",
- ")\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "95892df7-e5b8-4d7a-bf96-88673e633370",
- "metadata": {},
- "source": [
- "### Population Totales"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "a0a196b8-2db5-478d-899a-48985d1735f0",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": [
- "if (COUNTRY_CODE == \"NER\") {\n",
- "\n",
- " # IMPORTNAT: palette vector MUST be RENAMED with the (dynamic) descriptive labels\n",
- "names(NER_palette_population) <- labels_tot\n",
- "\n",
- "plot <- population_data_filtered %>%\n",
- " mutate(\n",
- " CATEGORY_POPULATION = cut(\n",
- " POPULATION,\n",
- " breaks = c(0, value_breaks_tot, Inf),\n",
- " labels = labels_tot, \n",
- " right = TRUE,\n",
- " include.lowest = TRUE\n",
- " )\n",
- " ) %>% \n",
- " left_join(shapes_data, \n",
- " by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID)) %>% \n",
- " ggplot() +\n",
- " geom_sf(aes(geometry = geometry,\n",
- " fill = CATEGORY_POPULATION),\n",
- " color = \"black\",\n",
- " linewidth = 0.25, \n",
- " show.legend = TRUE\n",
- " ) +\n",
- " labs(\n",
- " title = \"Population totale par district sanitaire (DS)\",\n",
- " subtitle = \"Source: NMDR / DHIS2\"\n",
- " ) +\n",
- " scale_fill_manual(\n",
- " values = NER_palette_population, \n",
- " limits = labels_tot, \n",
- " drop = FALSE \n",
- " ) +\n",
- " facet_wrap(~YEAR, ncol = 3) +\n",
- " theme_void() +\n",
- " theme(\n",
- " plot.title = element_text(face = \"bold\"),\n",
- " plot.subtitle = element_text(margin = margin(5, 0, 20, 0)),\n",
- " legend.position = \"bottom\",\n",
- " legend.title = element_blank(),\n",
- " strip.text = element_text(face = \"bold\"),\n",
- " legend.key.height = unit(0.5, \"line\"),\n",
- " legend.margin = margin(10, 0, 0, 0)\n",
- " )\n",
- "\n",
- "print(plot)\n",
- "\n",
- "# Export to see better in high resolution\n",
- "ggsave(\n",
- " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", paste0(COUNTRY_CODE, \"_choropleth_population_totals.png\")),\n",
- " width = 14,\n",
- " height = 8,\n",
- " dpi = 300\n",
- ")\n",
- "}\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "aca477aa-4d93-4a74-ad8c-32a30f85a552",
- "metadata": {},
- "source": [
- "### Population Femmes Enceintes (FE)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "9324a56b",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": [
- "if (COUNTRY_CODE == \"NER\") {\n",
- "\n",
- "names(NER_palette_population) <- labels_fe\n",
- "\n",
- "plot <- population_data_filtered %>%\n",
- " mutate(\n",
- " CATEGORY_POPULATION = cut(\n",
- " POPULATION_FE,\n",
- " breaks = c(0, value_breaks_fe, Inf),\n",
- " labels = labels_fe, \n",
- " right = TRUE,\n",
- " include.lowest = TRUE\n",
- " )\n",
- " ) %>% \n",
- " left_join(shapes_data, \n",
- " by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID)) %>% \n",
- " ggplot() +\n",
- " geom_sf(aes(geometry = geometry,\n",
- " fill = CATEGORY_POPULATION),\n",
- " color = \"black\",\n",
- " linewidth = 0.25, \n",
- " show.legend = TRUE\n",
- " ) +\n",
- " labs(\n",
- " title = \"Population des femmes enceintes par district sanitaire (DS)\",\n",
- " subtitle = \"Source: NMDR / DHIS2\"\n",
- " ) +\n",
- " scale_fill_manual(\n",
- " values = NER_palette_population, \n",
- " limits = labels_fe, \n",
- " drop = FALSE # Prevents dropping empty levels from legend\n",
- " ) +\n",
- " facet_wrap(~YEAR, ncol = 3) +\n",
- " theme_void() +\n",
- " theme(\n",
- " plot.title = element_text(face = \"bold\"),\n",
- " plot.subtitle = element_text(margin = margin(5, 0, 20, 0)),\n",
- " legend.position = \"bottom\",\n",
- " legend.title = element_blank(),\n",
- " strip.text = element_text(face = \"bold\"),\n",
- " legend.key.height = unit(0.5, \"line\"),\n",
- " legend.margin = margin(10, 0, 0, 0)\n",
- " )\n",
- "\n",
- "print(plot)\n",
- "\n",
- "# Export to see better in high resolution\n",
- "ggsave(\n",
- " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", paste0(COUNTRY_CODE, \"_choropleth_population_fe.png\")),\n",
- " width = 14, \n",
- " height = 8,\n",
- " dpi = 300\n",
- ")\n",
- "\n",
- "}\n"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "bd5fe86d-591a-4f5a-bc42-58180a413d5d",
- "metadata": {},
- "source": [
- "### Population Enfants moins de 5 ans (U5)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "4046761f",
- "metadata": {
- "vscode": {
- "languageId": "r"
- }
- },
- "outputs": [],
- "source": [
- "if (COUNTRY_CODE == \"NER\") {\n",
- "\n",
- "names(NER_palette_population) <- labels_u5\n",
- "\n",
- "plot <- population_data_filtered %>%\n",
- " mutate(\n",
- " CATEGORY_POPULATION = cut(\n",
- " POPULATION_U5,\n",
- " breaks = c(0, value_breaks_u5, Inf),\n",
- " labels = labels_u5, \n",
- " right = TRUE,\n",
- " include.lowest = TRUE\n",
- " )\n",
- " ) %>% \n",
- " left_join(shapes_data, \n",
- " by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID)) %>% \n",
- " ggplot() +\n",
- " geom_sf(aes(geometry = geometry,\n",
- " fill = CATEGORY_POPULATION),\n",
- " color = \"black\",\n",
- " linewidth = 0.25, \n",
- " show.legend = TRUE\n",
- " ) +\n",
- " labs(\n",
- " title = \"Population des enfants de moins de 5 ans par district sanitaire (DS)\",\n",
- " subtitle = \"Source: NMDR / DHIS2\"\n",
- " ) +\n",
- " scale_fill_manual(\n",
- " values = NER_palette_population, \n",
- " limits = labels_u5, \n",
- " drop = FALSE \n",
- " ) +\n",
- " facet_wrap(~YEAR, ncol = 3) +\n",
- " theme_void() +\n",
- " theme(\n",
- " plot.title = element_text(face = \"bold\"),\n",
- " plot.subtitle = element_text(margin = margin(5, 0, 20, 0)),\n",
- " legend.position = \"bottom\",\n",
- " legend.title = element_blank(),\n",
- " strip.text = element_text(face = \"bold\"),\n",
- " legend.key.height = unit(0.5, \"line\"),\n",
- " legend.margin = margin(10, 0, 0, 0)\n",
- " )\n",
- "\n",
- "print(plot)\n",
- "\n",
- "# Export PNG\n",
- "ggsave(\n",
- " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", paste0(COUNTRY_CODE, \"_choropleth_population_u5.png\")),\n",
- " width = 14, \n",
- " height = 8,\n",
- " dpi = 300\n",
- ")\n",
- "\n",
- "}"
- ]
- },
{
"cell_type": "markdown",
"id": "61e5ac12-c973-48e0-8c97-1af90e4b59a5",
@@ -1195,7 +846,7 @@
"outputs": [],
"source": [
"# hist(population_data$POPULATION)\n",
- "hist(population_data_filtered$POPULATION)"
+ "hist(population_data$POPULATION)"
]
},
{
@@ -1209,7 +860,7 @@
},
"outputs": [],
"source": [
- "ggplot(population_data_filtered) +\n",
+ "ggplot(population_data) +\n",
" geom_point(aes(x = POPULATION,\n",
" y = fct_reorder(ADM2_NAME, POPULATION),\n",
" color = factor(YEAR))\n",
@@ -1243,11 +894,20 @@
]
},
{
- "cell_type": "markdown",
- "id": "d6ab387a-cc9e-42b9-a634-12af21bef0f5",
- "metadata": {},
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "82ff591f",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
"source": [
- "#### Population Femmes Enceintes (FE)"
+ "# Conditions of existence of disaggregated populations\n",
+ "\n",
+ "condition_pregnant_women <- \"POPULATION_FE\" %in% names(population_data)\n",
+ "condition_u5_children <- \"POPULATION_U5\" %in% names(population_data)"
]
},
{
@@ -1261,16 +921,19 @@
},
"outputs": [],
"source": [
- "# Wrap in if statement to avoid errors if POPULATION_FE is missing\n",
- "if (\"POPULATION_FE\" %in% names(population_data_filtered)) { \n",
- " hist(population_data_filtered$POPULATION_FE)\n",
+ "# If the \"POPULATION_FE\" disaggregation is available\n",
+ "\n",
+ "if (condition_pregnant_women) {\n",
+ "\n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"
Population Femmes Enceintes (FE)
\")\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "4200afa2-e2f0-4876-9842-141b96f32fe8",
+ "id": "5f721048",
"metadata": {
"vscode": {
"languageId": "r"
@@ -1278,48 +941,43 @@
},
"outputs": [],
"source": [
- "if (\"POPULATION_FE\" %in% names(population_data_filtered)) { \n",
- " \n",
- "ggplot(population_data_filtered) +\n",
- " geom_point(aes(x = POPULATION_FE,\n",
- " y = fct_reorder(ADM2_NAME, POPULATION_FE),\n",
- " color = factor(YEAR))\n",
- " ) +\n",
- " facet_grid(rows = \"ADM1_NAME\", \n",
- " scale = \"free_y\", \n",
- " space = \"free_y\", \n",
- " switch = \"y\") +\n",
- " scale_x_continuous(breaks = c(0, 2e+04, 4e+04, 6e+04, 8e+05, 1e+06, 1.5e+06),\n",
- " labels = scales::comma) +\n",
- " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
- " labs(\n",
- " # title = \"\"\n",
- " color = \"Année\") +\n",
- " theme_minimal() +\n",
- " theme(\n",
- " axis.text = element_text(size = 7),\n",
- " axis.title.x = element_text(size = 7),\n",
- " axis.title.y = element_blank(),\n",
- " strip.placement = \"outside\",\n",
- " panel.grid.minor.x = element_blank(),\n",
- " legend.position = \"bottom\"\n",
- " )\n",
- "\n",
- "} "
- ]
- },
- {
- "cell_type": "markdown",
- "id": "e39305c0-3700-48c3-967a-b9c6af3e737f",
- "metadata": {},
- "source": [
- "#### Population Enfants moins de 5 ans (U5)"
+ "if (condition_pregnant_women) {\n",
+ "\n",
+ " # 1. Histogram of the pregnant women population\n",
+ " hist(population_data$POPULATION_FE)\n",
+ "\n",
+ " # 2. Plot of the pregnant women population\n",
+ " ggplot(population_data) +\n",
+ " geom_point(aes(x = POPULATION_FE,\n",
+ " y = fct_reorder(ADM2_NAME, POPULATION_FE),\n",
+ " color = factor(YEAR))\n",
+ " ) +\n",
+ " facet_grid(rows = \"ADM1_NAME\", \n",
+ " scale = \"free_y\", \n",
+ " space = \"free_y\", \n",
+ " switch = \"y\") +\n",
+ " scale_x_continuous(breaks = c(0, 2e+04, 4e+04, 6e+04, 8e+05, 1e+06, 1.5e+06),\n",
+ " labels = scales::comma) +\n",
+ " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
+ " labs(\n",
+ " # title = \"\"\n",
+ " color = \"Année\") +\n",
+ " theme_minimal() +\n",
+ " theme(\n",
+ " axis.text = element_text(size = 7),\n",
+ " axis.title.x = element_text(size = 7),\n",
+ " axis.title.y = element_blank(),\n",
+ " strip.placement = \"outside\",\n",
+ " panel.grid.minor.x = element_blank(),\n",
+ " legend.position = \"bottom\"\n",
+ " )\n",
+ "}"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "bbda9b88-9b91-4845-83a8-795a12124999",
+ "id": "742116ab-fef7-46ea-8c4b-0aa2a166005d",
"metadata": {
"vscode": {
"languageId": "r"
@@ -1327,15 +985,19 @@
},
"outputs": [],
"source": [
- "if (\"POPULATION_U5\" %in% names(population_data_filtered)) {\n",
- " hist(population_data_filtered$POPULATION_U5)\n",
- "}"
+ "# If the \"POPULATION_U5\" disaggregation is available\n",
+ "\n",
+ "if (condition_u5_children) {\n",
+ "\n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Enfants moins de 5 ans (U5)
\")\n",
+ "}\n"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "742116ab-fef7-46ea-8c4b-0aa2a166005d",
+ "id": "10552bb1",
"metadata": {
"vscode": {
"languageId": "r"
@@ -1343,32 +1005,36 @@
},
"outputs": [],
"source": [
- "if (\"POPULATION_U5\" %in% names(population_data_filtered)) {\n",
- "\n",
- "ggplot(population_data_filtered) +\n",
- " geom_point(aes(x = POPULATION_U5,\n",
- " y = fct_reorder(ADM2_NAME, POPULATION_U5, .na_rm = FALSE),\n",
- " color = factor(YEAR))\n",
- " ) +\n",
- " facet_grid(rows = \"ADM1_NAME\", \n",
- " scale = \"free_y\", \n",
- " space = \"free_y\", \n",
- " switch = \"y\") +\n",
- " scale_x_continuous(breaks = c(0, 2e+04, 4e+04, 6e+04, 8e+04, 1e+05, 1.5e+05),\n",
- " labels = scales::comma) +\n",
- " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
- " labs(\n",
- " # title = \"\"\n",
- " color = \"Année\") +\n",
- " theme_minimal() +\n",
- " theme(\n",
- " axis.text = element_text(size = 7),\n",
- " axis.title.x = element_text(size = 7),\n",
- " axis.title.y = element_blank(),\n",
- " strip.placement = \"outside\",\n",
- " panel.grid.minor.x = element_blank(),\n",
- " legend.position = \"bottom\"\n",
- " )\n",
+ "if (condition_u5_children) {\n",
+ "\n",
+ " # 1. Histogram of the u5 children population\n",
+ " hist(population_data$POPULATION_U5)\n",
+ "\n",
+ " # 2. Plot of the u5 children population\n",
+ " ggplot(population_data) +\n",
+ " geom_point(aes(x = POPULATION_U5,\n",
+ " y = fct_reorder(ADM2_NAME, POPULATION_U5, .na_rm = FALSE),\n",
+ " color = factor(YEAR))\n",
+ " ) +\n",
+ " facet_grid(rows = \"ADM1_NAME\", \n",
+ " scale = \"free_y\", \n",
+ " space = \"free_y\", \n",
+ " switch = \"y\") +\n",
+ " scale_x_continuous(breaks = c(0, 2e+04, 4e+04, 6e+04, 8e+04, 1e+05, 1.5e+05),\n",
+ " labels = scales::comma) +\n",
+ " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
+ " labs(\n",
+ " # title = \"\"\n",
+ " color = \"Année\") +\n",
+ " theme_minimal() +\n",
+ " theme(\n",
+ " axis.text = element_text(size = 7),\n",
+ " axis.title.x = element_text(size = 7),\n",
+ " axis.title.y = element_blank(),\n",
+ " strip.placement = \"outside\",\n",
+ " panel.grid.minor.x = element_blank(),\n",
+ " legend.position = \"bottom\"\n",
+ " )\n",
"\n",
"}"
]
diff --git a/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report_NER.ipynb b/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report_NER.ipynb
new file mode 100644
index 0000000..9c9fa1b
--- /dev/null
+++ b/pipelines/snt_dhis2_formatting/reporting/snt_dhis2_formatting_report_NER.ipynb
@@ -0,0 +1,1461 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "47551f88-b40b-449f-9dc1-59db71183611",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# 💡 Comments / Questions & To Do's:\n",
+ "# - filter by YEAR keep only 2022-2024): \n",
+ "# 1. Why these years? Arbitrary choice? Based on what? linked to what?\n",
+ "# 2. Is this a paramater is some other pipeline? if so, should be integrated here somehow \n",
+ "# - Missing data: why do we have NA values for population? Are these real NA (missing data) or 0?\n",
+ "# - OUTLIERS: there are clear outliers (i.e., DS AGADEZ): shall we do some simple data cleaning here?\n",
+ "# - Population catagories (breaks) do we have a specific scale in mind \n",
+ "# (i.e., use same as another country) or can I set it based on the data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "342b6b54-4812-4b07-b408-68a034b4014e",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# TO DO / FINISH:\n",
+ "# - add safety \"if\" logic so nb does not fail if data is missing or wrong path ...\n",
+ "# - (maybe) also add meaningful messages\n",
+ "# - Add code to export PNG files of relevant figures\n",
+ "# - Set dynamic boundaries for POPULATION categories? (so can use same code in different countries)\n",
+ "# - Clean code to avoid redundancies (especially ggplot stuff, a lot of copy pasted ...)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5b72f828-4fc1-462d-babc-f8f6c9c96ff5",
+ "metadata": {},
+ "source": [
+ "## 0. Paths and Config"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d3285c7-1a60-46ad-9541-36a703d51924",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Set SNT Paths\n",
+ "SNT_ROOT_PATH <- \"~/workspace\"\n",
+ "CODE_PATH <- file.path(SNT_ROOT_PATH, \"code\")\n",
+ "CONFIG_PATH <- file.path(SNT_ROOT_PATH, \"configuration\")\n",
+ "\n",
+ "REPORTING_NB_PATH <- file.path(SNT_ROOT_PATH, \"pipelines/snt_dhis2_formatting/reporting\")\n",
+ "\n",
+ "# Create output directories if they don't exist (before loading utils)\n",
+ "figures_dir <- file.path(REPORTING_NB_PATH, \"outputs\", \"figures\")\n",
+ "if (!dir.exists(figures_dir)) {\n",
+ " dir.create(figures_dir, recursive = TRUE)\n",
+ " print(paste0(\"Created figures directory: \", figures_dir))\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "732733e7-8890-4c3e-be64-496fd4a2c800",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Load util functions\n",
+ "source(file.path(CODE_PATH, \"snt_utils.r\"))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3f26728d-10a0-42d6-a7ff-368cc38e60b9",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "required_packages <- c(\n",
+ " \"tidyverse\", \n",
+ " \"arrow\", \n",
+ " \"sf\",\n",
+ " \"IRdisplay\",\n",
+ " \"reticulate\",\n",
+ " \"patchwork\"\n",
+ ") \n",
+ "\n",
+ "# Execute function\n",
+ "install_and_load(required_packages)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "20475dd9-5091-4f87-9ae2-d0235921fe94",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Set environment to load openhexa.sdk from the right environment\n",
+ "Sys.setenv(PROJ_LIB = \"/opt/conda/share/proj\")\n",
+ "Sys.setenv(GDAL_DATA = \"/opt/conda/share/gdal\")\n",
+ "Sys.setenv(RETICULATE_PYTHON = \"/opt/conda/bin/python\")\n",
+ "\n",
+ "# Load openhexa.sdk\n",
+ "reticulate::py_config()$python\n",
+ "openhexa <- import(\"openhexa.sdk\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9f70d726-1c34-47dc-b963-bb23e42994bb",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Load SNT config\n",
+ "config_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, \"SNT_config.json\"))},\n",
+ " error = function(e) {\n",
+ " msg <- paste0(\"Error while loading configuration\", conditionMessage(e)) \n",
+ " cat(msg) \n",
+ " stop(msg) \n",
+ " })"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "90d58c60-fb4e-40e4-add8-5f258f541843",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Configuration variables\n",
+ "dataset_name <- config_json$SNT_DATASET_IDENTIFIERS$DHIS2_DATASET_FORMATTED\n",
+ "COUNTRY_CODE <- config_json$SNT_CONFIG$COUNTRY_CODE\n",
+ "COUNTRY_NAME <- config_json$SNT_CONFIG$COUNTRY_NAME\n",
+ "ADM_2 <- toupper(config_json$SNT_CONFIG$DHIS2_ADMINISTRATION_2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8eece9e0-2544-48c1-8579-a5a721af4ff8",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# print function\n",
+ "printdim <- function(df, name = deparse(substitute(df))) {\n",
+ " cat(\"Dimensions of\", name, \":\", nrow(df), \"rows x\", ncol(df), \"columns\\n\\n\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "643abe28-da3b-4bd2-9ecc-126b18b85c69",
+ "metadata": {},
+ "source": [
+ "## 1. Import data"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "43bbbcdf-c1d1-4631-980c-2c4465cf7a55",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# import analytics DHIS2 data\n",
+ "routine_data <- tryCatch({ get_latest_dataset_file_in_memory(dataset_name, paste0(COUNTRY_CODE, \"_routine.parquet\")) }, \n",
+ " error = function(e) {\n",
+ " msg <- paste0(\"[WARNING] Error while loading DHIS2 Routine data for: \" , COUNTRY_CODE, \n",
+ " \" the report cannot be executed. [ERROR DETAILS] \", conditionMessage(e))\n",
+ " stop(msg)\n",
+ " })\n",
+ "\n",
+ "printdim(routine_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d53274c5-965e-4a11-bb77-c9b899d5cb9c",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "population_data <- tryCatch({ get_latest_dataset_file_in_memory(dataset_name, paste0(COUNTRY_CODE, \"_population.parquet\")) }, \n",
+ " error = function(e) {\n",
+ " msg <- paste0(COUNTRY_NAME , \" Population data is not available in dataset : \" , dataset_name, \" last version.\")\n",
+ " log_msg(msg, \"warning\")\n",
+ " population_data <- NULL\n",
+ " })\n",
+ "\n",
+ "printdim(population_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c1be5372-cbc1-4343-ab11-01eae0fa9d60",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "shapes_data <- tryCatch({ get_latest_dataset_file_in_memory(dataset_name, paste0(COUNTRY_CODE, \"_shapes.geojson\")) }, \n",
+ " error = function(e) { \n",
+ " msg <- paste0(COUNTRY_NAME , \" Shapes data is not available in dataset : \" , dataset_name, \" last version.\")\n",
+ " log_msg(msg, \"warning\")\n",
+ " shapes_data <- NULL\n",
+ " })\n",
+ "\n",
+ "printdim(shapes_data)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e3d5b582-a38f-4ce0-a9a2-9a53ab5eb233",
+ "metadata": {},
+ "source": [
+ "# **Complétude des indicateurs composites**\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ca84fce1-0407-433a-a98a-e65ed15ab8de",
+ "metadata": {},
+ "source": [
+ "# 1. Complétude du rapportage des indicateurs composites / Reporting Completeness of Composite Indicators"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c109e82d-8c72-41f0-857a-322163cf213e",
+ "metadata": {},
+ "source": [
+ "## 1.1 Proportion de formations sanitaires ayant rapporté des valeurs nulles, manquantes (NULL) ou positives pour chaque indicateur"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0f54505a-2dcc-429e-a900-46d4fae6fd31",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 0: Rename your data for convenience\n",
+ "data <- routine_data\n",
+ "\n",
+ "# Step 1: Convert PERIOD to DATE\n",
+ "data <- data %>%\n",
+ " mutate(\n",
+ " DATE = ymd(paste0(PERIOD, \"01\"))\n",
+ " )\n",
+ "\n",
+ "# Step 2: Reshape wide to long: INDICATOR = column name (e.g., CONF), VALUE = value\n",
+ "indicator_vars <- setdiff(names(data), c(\n",
+ " \"PERIOD\", \"YEAR\", \"MONTH\", \"OU_ID\", \"OU_NAME\", \"ADM1_NAME\", \"ADM1_ID\", \"ADM2_NAME\", \"ADM2_ID\", \"DATE\"\n",
+ "))\n",
+ "\n",
+ "long_data <- data %>%\n",
+ " pivot_longer(cols = all_of(indicator_vars),\n",
+ " names_to = \"INDICATOR\",\n",
+ " values_to = \"VALUE\") %>%\n",
+ " rename(OU = OU_ID)\n",
+ "\n",
+ "# Step 3: Build expected full grid (OU × INDICATOR × DATE)\n",
+ "full_grid <- expand_grid(\n",
+ " OU = unique(long_data$OU),\n",
+ " INDICATOR = unique(long_data$INDICATOR),\n",
+ " DATE = unique(long_data$DATE)\n",
+ ")\n",
+ "\n",
+ "# Step 4: Join and assess reporting status\n",
+ "reporting_check <- full_grid %>%\n",
+ " left_join(\n",
+ " long_data %>% select(OU, INDICATOR, DATE, VALUE),\n",
+ " by = c(\"OU\", \"INDICATOR\", \"DATE\")\n",
+ " ) %>%\n",
+ " mutate(\n",
+ " is_missing = is.na(VALUE),\n",
+ " is_zero = VALUE == 0 & !is.na(VALUE),\n",
+ " is_positive = VALUE > 0 & !is.na(VALUE)\n",
+ " )\n",
+ "\n",
+ "# Step 5: Summarise reporting status\n",
+ "reporting_summary <- reporting_check %>%\n",
+ " group_by(INDICATOR, DATE) %>%\n",
+ " summarise(\n",
+ " n_total = n_distinct(OU),\n",
+ " n_missing = sum(is_missing),\n",
+ " n_zero = sum(is_zero),\n",
+ " n_positive = sum(is_positive),\n",
+ " pct_missing = ifelse(n_total > 0, 100 * n_missing / n_total, 0),\n",
+ " pct_zero = ifelse(n_total > 0, 100 * n_zero / n_total, 0),\n",
+ " pct_positive = ifelse(n_total > 0, 100 * n_positive / n_total, 0),\n",
+ " .groups = \"drop\"\n",
+ " )\n",
+ "\n",
+ "# Step 6: Prepare plot-ready data\n",
+ "plot_data <- reporting_summary %>%\n",
+ " pivot_longer(\n",
+ " cols = starts_with(\"pct_\"),\n",
+ " names_to = \"Status\",\n",
+ " values_to = \"Percentage\"\n",
+ " ) %>%\n",
+ " mutate(\n",
+ " Status = recode(Status,\n",
+ " pct_missing = \"Valeur manquante\",\n",
+ " pct_zero = \"Valeur nulle rapportée\",\n",
+ " pct_positive = \"Valeur positive rapportée\")\n",
+ " ) %>%\n",
+ " complete(INDICATOR, DATE, Status, fill = list(Percentage = 0))\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cfd115e7-176d-4beb-9ab9-2e6990cb16af",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "options(repr.plot.width = 17, repr.plot.height = 10)\n",
+ "ggplot(plot_data, aes(x = DATE, y = Percentage, fill = Status)) +\n",
+ " geom_col(position = \"stack\") +\n",
+ " facet_wrap(~ INDICATOR, scales = \"free_y\", ncol = 4) +\n",
+ " scale_y_continuous() +\n",
+ " scale_fill_manual(values = c(\n",
+ " \"Valeur manquante\" = \"tomato\",\n",
+ " \"Valeur nulle rapportée\" = \"skyblue\",\n",
+ " \"Valeur positive rapportée\" = \"green\"\n",
+ " )) +\n",
+ " labs(\n",
+ " title = \"Taux de rapportage par indicateur (niveau formation sanitaire)\",\n",
+ " subtitle = \"Proportion des valeurs rapportées par mois et par indicateur\",\n",
+ " x = \"Mois\", y = \"% des formations sanitaires\",\n",
+ " fill = \"Statut du rapportage\"\n",
+ " ) +\n",
+ " theme_minimal(base_size = 16) +\n",
+ " theme(\n",
+ " plot.title = element_text(face = \"bold\", size = 20),\n",
+ " strip.text = element_text(size = 16),\n",
+ " axis.title = element_text(size = 16),\n",
+ " axis.text = element_text(size = 16)\n",
+ " )\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e6871759-714b-437a-8b9c-5a5a06656567",
+ "metadata": {},
+ "source": [
+ "## 1.2 Proportion des districts ayant rapporté des valeurs nulles, manquantes (NULL) ou positives pour chaque indicateur."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c89f6c77-dd42-4616-8eb5-1642d5b51157",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 0: Rename for convenience\n",
+ "data <- routine_data\n",
+ "\n",
+ "# Step 1: Convert PERIOD to proper Date\n",
+ "data <- data %>%\n",
+ " mutate(Date = ymd(paste0(PERIOD, \"01\")))\n",
+ "\n",
+ "# Step 2: Identify indicator columns\n",
+ "indicator_cols <- setdiff(names(data), c(\n",
+ " \"PERIOD\", \"YEAR\", \"MONTH\", \"OU_ID\", \"OU_NAME\",\n",
+ " \"ADM1_NAME\", \"ADM1_ID\", \"ADM2_NAME\", \"ADM2_ID\", \"Date\"\n",
+ "))\n",
+ "\n",
+ "# Step 3: Reshape to long format\n",
+ "data_long <- data %>%\n",
+ " select(ADM2_ID, OU_ID, Date, all_of(indicator_cols)) %>%\n",
+ " pivot_longer(cols = all_of(indicator_cols),\n",
+ " names_to = \"Indicator\", values_to = \"value\") %>%\n",
+ " mutate(value = as.numeric(value))\n",
+ "\n",
+ "# Step 4: Full expected grid at ADM2 level\n",
+ "full_grid <- expand_grid(\n",
+ " ADM2_ID = unique(data_long$ADM2_ID),\n",
+ " Indicator = unique(data_long$Indicator),\n",
+ " Date = unique(data_long$Date)\n",
+ ")\n",
+ "\n",
+ "# Step 5: Detect if *any* health facility reported per district × indicator × date\n",
+ "reporting_check <- data_long %>%\n",
+ " group_by(ADM2_ID, Indicator, Date) %>%\n",
+ " summarise(\n",
+ " is_missing = all(is.na(value)),\n",
+ " is_zero = all(value == 0, na.rm = TRUE),\n",
+ " is_positive = any(value > 0, na.rm = TRUE),\n",
+ " .groups = \"drop\"\n",
+ " )\n",
+ "\n",
+ "# Step 6: Join with full grid to fill in missing ADM2s\n",
+ "reporting_full <- full_grid %>%\n",
+ " left_join(reporting_check, by = c(\"ADM2_ID\", \"Indicator\", \"Date\")) %>%\n",
+ " mutate(\n",
+ " is_missing = replace_na(is_missing, TRUE),\n",
+ " is_zero = replace_na(is_zero, FALSE),\n",
+ " is_positive = replace_na(is_positive, FALSE)\n",
+ " )\n",
+ "\n",
+ "# Step 7: Summarise by Indicator and Date\n",
+ "reporting_summary <- reporting_full %>%\n",
+ " group_by(Indicator, Date) %>%\n",
+ " summarise(\n",
+ " n_total = n_distinct(ADM2_ID),\n",
+ " n_missing = sum(is_missing),\n",
+ " n_zero = sum(is_zero & !is_missing),\n",
+ " n_positive = sum(is_positive),\n",
+ " pct_missing = ifelse(n_total > 0, 100 * n_missing / n_total, 0),\n",
+ " pct_zero = ifelse(n_total > 0, 100 * n_zero / n_total, 0),\n",
+ " pct_positive = ifelse(n_total > 0, 100 * n_positive / n_total, 0),\n",
+ " .groups = \"drop\"\n",
+ " )\n",
+ "\n",
+ "# Step 8: Reshape for plotting\n",
+ "plot_data <- reporting_summary %>%\n",
+ " pivot_longer(cols = starts_with(\"pct_\"),\n",
+ " names_to = \"Status\", values_to = \"Percentage\") %>%\n",
+ " mutate(Status = recode(Status,\n",
+ " pct_missing = \"Valeur manquante\",\n",
+ " pct_zero = \"Valeur nulle rapportée\",\n",
+ " pct_positive = \"Valeur positive rapportée\")) %>%\n",
+ " complete(Indicator, Date, Status, fill = list(Percentage = 0))\n",
+ "\n",
+ "# Step 9: Plot\n",
+ "ggplot(plot_data, aes(x = Date, y = Percentage, fill = Status)) +\n",
+ " geom_col(position = \"stack\") +\n",
+ " facet_wrap(~ Indicator, scales = \"free_y\") +\n",
+ " scale_y_continuous(limits = c(0, 100)) +\n",
+ " scale_fill_manual(values = c(\n",
+ " \"Valeur manquante\" = \"tomato\",\n",
+ " \"Valeur nulle rapportée\" = \"skyblue\",\n",
+ " \"Valeur positive rapportée\" = \"green\"\n",
+ " )) +\n",
+ " labs(\n",
+ " title = \"Taux de rapportage par indicateur (niveau district)\",\n",
+ " subtitle = \"Proportion des districts (ADM2_ID) rapportant chaque mois\",\n",
+ " x = \"Mois\", y = \"% des districts\",\n",
+ " fill = \"Statut du rapportage\"\n",
+ " ) +\n",
+ " theme_minimal(base_size = 14) +\n",
+ " theme(\n",
+ " plot.title = element_text(face = \"bold\", size = 18),\n",
+ " strip.text = element_text(size = 14),\n",
+ " axis.title = element_text(size = 14),\n",
+ " axis.text = element_text(size = 12)\n",
+ " )\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "5cda3985",
+ "metadata": {},
+ "source": [
+ "# 2. Cohérence interne des indicateurs composites"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c131a633",
+ "metadata": {},
+ "source": [
+ "## 2.1 Filtrage préliminaire des valeurs aberrantes pour l’analyse de cohérence\n",
+ "\n",
+ "Avant d’évaluer la cohérence entre les indicateurs composites, nous éliminons d’abord les valeurs aberrantes les plus extrêmes. Cette étape ne modifie pas définitivement le jeu de données et ne vise pas à détecter toutes les valeurs aberrantes ; elle permet simplement d’exclure les cas extrêmes afin de faciliter une évaluation plus fiable de la cohérence entre les indicateurs."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "936268f4",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Function to detect outliers based on MAD method\n",
+ "detect_mad_outliers <- function(data_long, deviation = 15, outlier_column = \"mad_flag\") {\n",
+ " data_long %>%\n",
+ " group_by(OU, indicator, YEAR) %>%\n",
+ " mutate(\n",
+ " median_val = median(value, na.rm = TRUE),\n",
+ " mad_val = mad(value, na.rm = TRUE),\n",
+ " \"{outlier_column}\" := value > (median_val + deviation * mad_val) | value < (median_val - deviation * mad_val)\n",
+ " ) %>%\n",
+ " ungroup()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "881f9625",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 0: Select relevant core indicators\n",
+ "target_indicators <- c(\"SUSP\", \"TEST\", \"CONF\", \"MALTREAT\", \"PRES\")\n",
+ "\n",
+ "# Step 1: Convert wide to long format\n",
+ "routine_long <- routine_data %>%\n",
+ " pivot_longer(\n",
+ " cols = all_of(target_indicators),\n",
+ " names_to = \"indicator\",\n",
+ " values_to = \"value\"\n",
+ " ) %>%\n",
+ " mutate(\n",
+ " PERIOD = as.character(PERIOD), # Ensure PERIOD is character for join\n",
+ " OU = OU_ID # Alias for join clarity\n",
+ " )\n",
+ "\n",
+ "# Step 2: Filter to indicators of interest\n",
+ "routine_long_filtered <- routine_long %>%\n",
+ " filter(indicator %in% target_indicators)\n",
+ "\n",
+ "# Step 3: Calculate MAD15\n",
+ "mad15_data <- detect_mad_outliers(\n",
+ " routine_long_filtered,\n",
+ " deviation = 15,\n",
+ " outlier_column = \"mad15\"\n",
+ ")\n",
+ "\n",
+ "# Step 4: Calculate MAD10 (only where mad15 not flagged or missing)\n",
+ "mad10_flags <- mad15_data %>%\n",
+ " filter(is.na(mad15) | mad15 == FALSE, !is.na(value)) %>%\n",
+ " detect_mad_outliers(deviation = 10, outlier_column = \"mad10\")\n",
+ "\n",
+ "# Step 5: Combine MAD15 and MAD10 results\n",
+ "mad_combined <- mad15_data %>%\n",
+ " left_join(\n",
+ " mad10_flags %>% select(PERIOD, OU, indicator, mad10),\n",
+ " by = c(\"PERIOD\", \"OU\", \"indicator\")\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "04d41ed1",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 6: Identify outliers (MAD15 or MAD10 flagged as TRUE)\n",
+ "outlier_flags <- mad_combined %>%\n",
+ " filter(mad15 == TRUE | mad10 == TRUE) %>%\n",
+ " mutate(PERIOD = as.numeric(PERIOD)) %>%\n",
+ " select(PERIOD, OU, indicator)\n",
+ "\n",
+ "# Step 7: Reshape routine_data to long format for filtering\n",
+ "routine_long_all <- routine_data %>%\n",
+ " pivot_longer(\n",
+ " cols = all_of(target_indicators),\n",
+ " names_to = \"indicator\",\n",
+ " values_to = \"value\"\n",
+ " ) %>%\n",
+ " mutate(OU = OU_ID)\n",
+ "\n",
+ "# Step 8: Remove outliers\n",
+ "routine_long_clean <- routine_long_all %>%\n",
+ " anti_join(outlier_flags, by = c(\"PERIOD\", \"OU\", \"indicator\"))\n",
+ "\n",
+ "# Step 9: Reshape back to wide format if needed\n",
+ "routine_data_clean <- routine_long_clean %>%\n",
+ " select(-OU) %>%\n",
+ " pivot_wider(names_from = indicator, values_from = value)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c6a5a77b",
+ "metadata": {},
+ "source": [
+ "## 2.2 Cohérence des indicateurs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6cfeb18e",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 1: Extract year and month from PERIOD\n",
+ "routine_hd_month <- routine_data_clean %>%\n",
+ " mutate(\n",
+ " YEAR = substr(PERIOD, 1, 4),\n",
+ " MONTH = substr(PERIOD, 5, 6)\n",
+ " ) %>%\n",
+ " group_by(ADM2_ID, YEAR, MONTH) %>%\n",
+ " summarise(\n",
+ " SUSP = sum(SUSP, na.rm = TRUE),\n",
+ " TEST = sum(TEST, na.rm = TRUE),\n",
+ " CONF = sum(CONF, na.rm = TRUE),\n",
+ " MALTREAT = sum(MALTREAT, na.rm = TRUE),\n",
+ " PRES = sum(PRES, na.rm = TRUE),\n",
+ " .groups = \"drop\"\n",
+ " )\n",
+ "\n",
+ "# Step 2: Create scatter plots\n",
+ "options(repr.plot.width = 14, repr.plot.height = 6)\n",
+ "\n",
+ "p1 <- ggplot(routine_hd_month, aes(x = SUSP, y = TEST)) +\n",
+ " geom_point(alpha = 0.5, color = \"blue\") +\n",
+ " geom_abline(slope = 1, intercept = 0, linetype = \"dashed\", color = \"red\") +\n",
+ " labs(title = \"Suspectés vs Testés\", x = \"Cas suspectés\", y = \"Cas testés\") +\n",
+ " theme_minimal(base_size = 16)\n",
+ "\n",
+ "p2 <- ggplot(routine_hd_month, aes(x = TEST, y = CONF)) +\n",
+ " geom_point(alpha = 0.5, color = \"darkgreen\") +\n",
+ " geom_abline(slope = 1, intercept = 0, linetype = \"dashed\", color = \"red\") +\n",
+ " labs(title = \"Testés vs Confirmés\", x = \"Cas testés\", y = \"Cas confirmés\") +\n",
+ " theme_minimal(base_size = 16)\n",
+ "\n",
+ "p3 <- ggplot(routine_hd_month, aes(x = CONF, y = MALTREAT)) +\n",
+ " geom_point(alpha = 0.5, color = \"purple\") +\n",
+ " geom_abline(slope = 1, intercept = 0, linetype = \"dashed\", color = \"red\") +\n",
+ " labs(title = \"Confirmés vs Traités\", x = \"Cas confirmés\", y = \"Cas traités\") +\n",
+ " theme_minimal(base_size = 16)\n",
+ "\n",
+ "# Step 3: Combine plots\n",
+ "(p1 | p2 | p3) + plot_layout(guides = \"collect\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0df24272",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Step 1: Aggregate monthly values\n",
+ "rds_clean_month <- routine_data_clean %>%\n",
+ " mutate(\n",
+ " YEAR = substr(PERIOD, 1, 4),\n",
+ " MONTH = substr(PERIOD, 5, 6),\n",
+ " DATE = as.Date(paste(YEAR, MONTH, \"01\", sep = \"-\"))\n",
+ " ) %>%\n",
+ " group_by(YEAR, MONTH, DATE) %>%\n",
+ " summarise(\n",
+ " SUSP = sum(SUSP, na.rm = TRUE),\n",
+ " TEST = sum(TEST, na.rm = TRUE),\n",
+ " CONF = sum(CONF, na.rm = TRUE),\n",
+ " PRES = sum(PRES, na.rm = TRUE),\n",
+ " .groups = \"drop\"\n",
+ " )\n",
+ "\n",
+ "# Step 2: Plot monthly national trends\n",
+ "options(repr.plot.width = 14, repr.plot.height = 6)\n",
+ "rds_clean_month %>%\n",
+ " pivot_longer(cols = c(SUSP, TEST, CONF, PRES), names_to = \"Indicator\") %>%\n",
+ " ggplot(aes(x = DATE, y = value, color = Indicator)) +\n",
+ " geom_line(linewidth = 1.2) +\n",
+ " labs(\n",
+ " title = \"Tendances mensuelles nationales des indicateurs composites (après suppression des outliers)\",\n",
+ " x = \"Mois\", y = \"Nombre de cas\", color = \"Indicateur\"\n",
+ " ) +\n",
+ " theme_minimal(base_size = 16) +\n",
+ " theme(\n",
+ " plot.title = element_text(face = \"bold\", size = 20),\n",
+ " axis.title = element_text(size = 16),\n",
+ " axis.text = element_text(size = 16),\n",
+ " legend.title = element_text(size = 16),\n",
+ " legend.text = element_text(size = 16)\n",
+ " )\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "780fc9f8-6c67-4328-85f1-6bdefcd15b48",
+ "metadata": {},
+ "source": [
+ "# 3. Carte des populations par district sanitaire (DS)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "da58bbd3",
+ "metadata": {},
+ "source": [
+ "## 3.1. Carte de la Population pour ADM2 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6965155d",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Code from previous version of the notebook\n",
+ "# Uses continuos scale for population\n",
+ "\n",
+ "# Run if population_data is available\n",
+ "if (!is.null(population_data) & !is.null(shapes_data)) {\n",
+ " # Join population to spatial shapes\n",
+ " map_data <- shapes_data %>%\n",
+ " left_join(population_data, by = \"ADM2_ID\")\n",
+ " \n",
+ " # Plot population per district (DS)\n",
+ " plot <- ggplot(map_data) +\n",
+ " geom_sf(aes(fill = POPULATION), color = \"white\", size = 0.2) +\n",
+ " scale_fill_viridis_c(option = \"C\", name = \"Population\") +\n",
+ " labs(\n",
+ " title = \"Population totale par district sanitaire (DS)\",\n",
+ " subtitle = \"Données DHIS2\",\n",
+ " caption = \"Source: NMDR / DHIS2\"\n",
+ " ) +\n",
+ " theme_minimal(base_size = 14) \n",
+ "\n",
+ " print(plot)\n",
+ "\n",
+ "} else {\n",
+ " print(\"Population or shapes data not available.\")\n",
+ "}\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb276692",
+ "metadata": {},
+ "source": [
+ "## ⚠️ 3.2. Carte de la Population Désagrégée (spécifique au pays)\n",
+ "Le code suivant est spécifique à chaque pays et repose sur une population désagrégée. "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "62ec70f5",
+ "metadata": {},
+ "source": [
+ "### 🇳🇪 NER specific code \n",
+ "Made ad hoc to allow comparison with data from other or previous analyses. Namely:\n",
+ "* only year 2022 to 2024\n",
+ "* specific palette (yellowish to brick red)\n",
+ "* specific intervals\n",
+ "* looks at **disaggregated** population <- this is sometimes contry-specific!"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4d33724e",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (!is.null(population_data)) {\n",
+ " print(\"🇳🇪 Executing NER specific code ... \")\n",
+ "\n",
+ " # --- Filter data to keep only 2022-2024 ... ---\n",
+ " years_to_keep <- 2022:2024\n",
+ " population_data <- population_data |> filter(YEAR %in% years_to_keep)\n",
+ "\n",
+ " # --- Read data from SNT_metadata.json ---\n",
+ " metadata_json <- tryCatch({ jsonlite::fromJSON(file.path(CONFIG_PATH, \"SNT_metadata.json\"))},\n",
+ " error = function(e) {\n",
+ " msg <- paste0(\"Error while loading metadata\", conditionMessage(e)) \n",
+ " cat(msg) \n",
+ " stop(msg) \n",
+ " })\n",
+ "\n",
+ " # --- Assign population breaks from metadata ---\n",
+ " parse_scale <- function(scale_obj) {\n",
+ " if (is.character(scale_obj)) {\n",
+ " return(jsonlite::fromJSON(scale_obj))\n",
+ " }\n",
+ " if (is.atomic(scale_obj) || is.list(scale_obj)) {\n",
+ " return(as.numeric(unlist(scale_obj)))\n",
+ " }\n",
+ " stop(\"Invalid SCALE format in SNT_metadata.json\")\n",
+ " }\n",
+ "\n",
+ " get_scale <- function(meta, candidates) {\n",
+ " for (name in candidates) {\n",
+ " if (!is.null(meta[[name]]) && !is.null(meta[[name]]$SCALE)) {\n",
+ " return(meta[[name]]$SCALE)\n",
+ " }\n",
+ " }\n",
+ " return(NULL)\n",
+ " }\n",
+ "\n",
+ " scale_tot <- get_scale(metadata_json, c(\"POPULATION_TOTAL\", \"POPULATION\"))\n",
+ " scale_u5 <- get_scale(metadata_json, c(\"POPULATION_U5\", \"POPULATION\"))\n",
+ " scale_fe <- get_scale(metadata_json, c(\"POPULATION_PREGNANT\", \"POPULATION\"))\n",
+ "\n",
+ " if (is.null(scale_tot) || is.null(scale_u5) || is.null(scale_fe)) {\n",
+ " stop(\"Missing population SCALE in SNT_metadata.json\")\n",
+ " }\n",
+ "\n",
+ " value_breaks_tot <- parse_scale(scale_tot)\n",
+ " value_breaks_u5 <- parse_scale(scale_u5)\n",
+ " value_breaks_fe <- parse_scale(scale_fe)\n",
+ "\n",
+ " if (length(value_breaks_tot) < 1 || length(value_breaks_u5) < 1 || length(value_breaks_fe) < 1) {\n",
+ " stop(\"Population SCALE in SNT_metadata.json must contain at least one break value.\")\n",
+ " }\n",
+ "\n",
+ " # --- Define function to create dyanic labels based on breaks for pop category ---\n",
+ " create_dynamic_labels <- function(breaks) {\n",
+ " fmt <- function(x) {\n",
+ " format(x / 1000, big.mark = \"'\", scientific = FALSE, trim = TRUE)\n",
+ " }\n",
+ " \n",
+ " labels <- c(\n",
+ " paste0(\"< \", fmt(breaks[1]), \"k\"), # First label\n",
+ " paste0(fmt(breaks[-length(breaks)]), \" - \", fmt(breaks[-1]), \"k\"), # Middle\n",
+ " paste0(\"> \", fmt(breaks[length(breaks)]), \"k\") # Last label\n",
+ " ) \n",
+ " return(labels)\n",
+ " }\n",
+ "\n",
+ " # --- Create dynamic labels based on breaks ---\n",
+ " labels_tot <- create_dynamic_labels(value_breaks_tot)\n",
+ " labels_u5 <- create_dynamic_labels(value_breaks_u5)\n",
+ " labels_fe <- create_dynamic_labels(value_breaks_fe)\n",
+ "\n",
+ " # --- NER palette: build with same length as labels (yellowish -> brick red) ---\n",
+ " NER_palette_base <- c(\"#fae6db\", \"#f1b195\", \"#ea7354\", \"#cc3f32\", \"#972620\")\n",
+ " make_ner_palette <- function(labels) {\n",
+ " setNames(colorRampPalette(NER_palette_base)(length(labels)), labels)\n",
+ " }\n",
+ "\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "0fdb96a0-873d-4f85-9c34-23c89c204c30",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# NER palette: base colors (yellowish -> brick red). Actual palette is built\n",
+ "# per plot with make_ner_palette(labels_*) so length always matches labels."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9fa4cded",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "# Conditions for display of total and disaggregated population plots\n",
+ "\n",
+ "pregnant_women_col <- \"POP_PREGNANT_WOMAN\"\n",
+ "u5_children_col <- \"POP_UNDER_5\"\n",
+ "\n",
+ "# existence of general population data and labels for plot\n",
+ "condition_population_data <- !is.null(population_data) && nrow(population_data) > 0\n",
+ "condition_plot_total_population <- condition_population_data && exists(\"labels_tot\") && !is.null(shapes_data)\n",
+ "\n",
+ "# existence of pregnant women data and labels for plot\n",
+ "condition_pregnant_women <- condition_population_data && pregnant_women_col %in% names(population_data)\n",
+ "condition_plot_pregnant_women <- condition_pregnant_women && exists(\"labels_fe\") && !is.null(shapes_data)\n",
+ "\n",
+ "# existence of under5 children disaggregation and labels for plot\n",
+ "condition_u5_children <- condition_population_data && u5_children_col %in% names(population_data)\n",
+ "condition_plot_u5_children <- condition_u5_children && exists(\"labels_u5\") && !is.null(shapes_data)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a0a196b8-2db5-478d-899a-48985d1735f0",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_plot_total_population) {\n",
+ "\n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Totale
\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d4263ea8",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_plot_total_population) {\n",
+ " \n",
+ " # Palette length must match number of labels (dynamic from metadata breaks)\n",
+ " NER_palette_population <- make_ner_palette(labels_tot)\n",
+ "\n",
+ " plot <- population_data %>%\n",
+ " mutate(\n",
+ " CATEGORY_POPULATION = cut(\n",
+ " POPULATION,\n",
+ " breaks = c(0, value_breaks_tot, Inf),\n",
+ " labels = labels_tot, \n",
+ " right = TRUE,\n",
+ " include.lowest = TRUE\n",
+ " )\n",
+ " ) %>% \n",
+ " left_join(shapes_data, \n",
+ " by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID)) %>% \n",
+ " ggplot() +\n",
+ " geom_sf(aes(geometry = geometry,\n",
+ " fill = CATEGORY_POPULATION),\n",
+ " color = \"black\",\n",
+ " linewidth = 0.25, \n",
+ " show.legend = TRUE\n",
+ " ) +\n",
+ " labs(\n",
+ " title = \"Population totale par district sanitaire (DS)\",\n",
+ " subtitle = \"Source: NMDR / DHIS2\"\n",
+ " ) +\n",
+ " scale_fill_manual(\n",
+ " values = NER_palette_population, \n",
+ " limits = labels_tot, \n",
+ " drop = FALSE \n",
+ " ) +\n",
+ " facet_wrap(~YEAR, ncol = 3) +\n",
+ " theme_void() +\n",
+ " theme(\n",
+ " plot.title = element_text(face = \"bold\"),\n",
+ " plot.subtitle = element_text(margin = margin(5, 0, 20, 0)),\n",
+ " legend.position = \"bottom\",\n",
+ " legend.title = element_blank(),\n",
+ " strip.text = element_text(face = \"bold\"),\n",
+ " legend.key.height = unit(0.5, \"line\"),\n",
+ " legend.margin = margin(10, 0, 0, 0)\n",
+ " )\n",
+ "\n",
+ " print(plot)\n",
+ "\n",
+ " # Export to see better in high resolution\n",
+ " ggsave(\n",
+ " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", paste0(COUNTRY_CODE, \"_choropleth_population_totals.png\")),\n",
+ " width = 14,\n",
+ " height = 8,\n",
+ " dpi = 300\n",
+ " )\n",
+ "}\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9324a56b",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_plot_pregnant_women) {\n",
+ "\n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Femmes Enceintes (FE)
\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b83cf3c0",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_plot_pregnant_women) {\n",
+ "\n",
+ " NER_palette_population <- make_ner_palette(labels_fe)\n",
+ "\n",
+ " plot <- population_data %>%\n",
+ " mutate(\n",
+ " CATEGORY_POPULATION = cut(\n",
+ " # POPULATION_FE,\n",
+ " !!sym(pregnant_women_col), # avoid hard coding column, because column names in data changed\n",
+ " breaks = c(0, value_breaks_fe, Inf),\n",
+ " labels = labels_fe, \n",
+ " right = TRUE,\n",
+ " include.lowest = TRUE\n",
+ " )\n",
+ " ) %>% \n",
+ " left_join(shapes_data, \n",
+ " by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID)) %>% \n",
+ " ggplot() +\n",
+ " geom_sf(aes(geometry = geometry,\n",
+ " fill = CATEGORY_POPULATION),\n",
+ " color = \"black\",\n",
+ " linewidth = 0.25, \n",
+ " show.legend = TRUE\n",
+ " ) +\n",
+ " labs(\n",
+ " title = \"Population des femmes enceintes par district sanitaire (DS)\",\n",
+ " subtitle = \"Source: NMDR / DHIS2\"\n",
+ " ) +\n",
+ " scale_fill_manual(\n",
+ " values = NER_palette_population, \n",
+ " limits = labels_fe, \n",
+ " drop = FALSE # Prevents dropping empty levels from legend\n",
+ " ) +\n",
+ " facet_wrap(~YEAR, ncol = 3) +\n",
+ " theme_void() +\n",
+ " theme(\n",
+ " plot.title = element_text(face = \"bold\"),\n",
+ " plot.subtitle = element_text(margin = margin(5, 0, 20, 0)),\n",
+ " legend.position = \"bottom\",\n",
+ " legend.title = element_blank(),\n",
+ " strip.text = element_text(face = \"bold\"),\n",
+ " legend.key.height = unit(0.5, \"line\"),\n",
+ " legend.margin = margin(10, 0, 0, 0)\n",
+ " )\n",
+ "\n",
+ " print(plot)\n",
+ "\n",
+ " # Export to see better in high resolution\n",
+ " ggsave(\n",
+ " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", paste0(COUNTRY_CODE, \"_choropleth_population_fe.png\")),\n",
+ " width = 14, \n",
+ " height = 8,\n",
+ " dpi = 300\n",
+ " )\n",
+ "\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4046761f",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_plot_u5_children) {\n",
+ "\n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Enfants moins de 5 ans (U5)
\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bc703bc5",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_plot_u5_children) {\n",
+ " \n",
+ " NER_palette_population <- make_ner_palette(labels_u5)\n",
+ "\n",
+ " plot <- population_data %>%\n",
+ " mutate(\n",
+ " CATEGORY_POPULATION = cut(\n",
+ " # POPULATION_U5,\n",
+ " !!sym(u5_children_col), # avoid hard coding column, because column names in data changed\n",
+ " breaks = c(0, value_breaks_u5, Inf),\n",
+ " labels = labels_u5, \n",
+ " right = TRUE,\n",
+ " include.lowest = TRUE\n",
+ " )\n",
+ " ) %>% \n",
+ " left_join(shapes_data, \n",
+ " by = join_by(ADM1_NAME, ADM1_ID, ADM2_NAME, ADM2_ID)) %>% \n",
+ " ggplot() +\n",
+ " geom_sf(aes(geometry = geometry,\n",
+ " fill = CATEGORY_POPULATION),\n",
+ " color = \"black\",\n",
+ " linewidth = 0.25, \n",
+ " show.legend = TRUE\n",
+ " ) +\n",
+ " labs(\n",
+ " title = \"Population des enfants de moins de 5 ans par district sanitaire (DS)\",\n",
+ " subtitle = \"Source: NMDR / DHIS2\"\n",
+ " ) +\n",
+ " scale_fill_manual(\n",
+ " values = NER_palette_population, \n",
+ " limits = labels_u5, \n",
+ " drop = FALSE \n",
+ " ) +\n",
+ " facet_wrap(~YEAR, ncol = 3) +\n",
+ " theme_void() +\n",
+ " theme(\n",
+ " plot.title = element_text(face = \"bold\"),\n",
+ " plot.subtitle = element_text(margin = margin(5, 0, 20, 0)),\n",
+ " legend.position = \"bottom\",\n",
+ " legend.title = element_blank(),\n",
+ " strip.text = element_text(face = \"bold\"),\n",
+ " legend.key.height = unit(0.5, \"line\"),\n",
+ " legend.margin = margin(10, 0, 0, 0)\n",
+ " )\n",
+ "\n",
+ " print(plot)\n",
+ "\n",
+ " # Export PNG\n",
+ " ggsave(\n",
+ " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", paste0(COUNTRY_CODE, \"_choropleth_population_u5.png\")),\n",
+ " width = 14, \n",
+ " height = 8,\n",
+ " dpi = 300\n",
+ " )\n",
+ "\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "61e5ac12-c973-48e0-8c97-1af90e4b59a5",
+ "metadata": {},
+ "source": [
+ "## 3.2. Complétude et qualité des données de la Population"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bec2759d-9ac4-42e1-9f7e-7076780bd7d6",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_population_data) {\n",
+ "\n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Totale
\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2f2e74b5",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_population_data) {\n",
+ "\n",
+ " # 1. Histogram of the total population\n",
+ " hist(population_data$POPULATION)\n",
+ "\n",
+ " # 2. Plot of the total population\n",
+ " ggplot(population_data) +\n",
+ " geom_point(aes(x = POPULATION,\n",
+ " y = fct_reorder(ADM2_NAME, POPULATION),\n",
+ " color = factor(YEAR))\n",
+ " ) +\n",
+ " facet_grid(rows = \"ADM1_NAME\", \n",
+ " scale = \"free_y\", \n",
+ " space = \"free_y\", \n",
+ " switch = \"y\") +\n",
+ " scale_x_continuous(breaks = c(0, 2e+05, 4e+05, 6e+05, 8e+05, 1e+06, 1.5e+06),\n",
+ " labels = scales::comma) +\n",
+ " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
+ " labs(color = \"Année\") +\n",
+ " theme_minimal() +\n",
+ " theme(\n",
+ " axis.text = element_text(size = 7),\n",
+ " axis.title.x = element_text(size = 7),\n",
+ " axis.title.y = element_blank(),\n",
+ " strip.placement = \"outside\",\n",
+ " panel.grid.minor.x = element_blank(),\n",
+ " legend.position = \"bottom\"\n",
+ " )\n",
+ "\n",
+ " ggsave(\n",
+ " filename = file.path(REPORTING_NB_PATH, \"outputs\", \"figures\", \"hist_population_totale.png\"),\n",
+ " units = \"cm\",\n",
+ " width = 15,\n",
+ " height = 23,\n",
+ " bg = \"white\"\n",
+ " )\n",
+ "\n",
+ "} else {\n",
+ " print(\"Population data not available.\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6bb79dd-2d8a-4cd1-bf91-3e0e48c14eda",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_pregnant_women) { \n",
+ " \n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Femmes Enceintes (FE)
\")\n",
+ "\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5d4ca310",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "if (condition_pregnant_women) { \n",
+ "\n",
+ " # 1. Histogram of the pregnant women population\n",
+ " hist(population_data[[pregnant_women_col]]) # avoid hard coding \n",
+ "\n",
+ " # 2. Plot of the pregnant women population\n",
+ " ggplot(\n",
+ " population_data,\n",
+ " aes(\n",
+ " x = .data[[pregnant_women_col]],\n",
+ " y = fct_reorder(.data[[\"ADM2_NAME\"]], .data[[pregnant_women_col]]), # these should all be dynamic; avoid hardcoding\n",
+ " color = factor(.data[[\"YEAR\"]])\n",
+ " )\n",
+ " ) +\n",
+ " geom_point() + \n",
+ " facet_grid(rows = \"ADM1_NAME\", \n",
+ " scale = \"free_y\", \n",
+ " space = \"free_y\", \n",
+ " switch = \"y\") +\n",
+ " scale_x_continuous(breaks = c(0, 2e+04, 4e+04, 6e+04, 8e+05, 1e+06, 1.5e+06),\n",
+ " labels = scales::comma) +\n",
+ " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
+ " labs(\n",
+ " # title = \"\"\n",
+ " color = \"Année\") +\n",
+ " theme_minimal() +\n",
+ " theme(\n",
+ " axis.text = element_text(size = 7),\n",
+ " axis.title.x = element_text(size = 7),\n",
+ " axis.title.y = element_blank(),\n",
+ " strip.placement = \"outside\",\n",
+ " panel.grid.minor.x = element_blank(),\n",
+ " legend.position = \"bottom\"\n",
+ " )\n",
+ " \n",
+ "} else {\n",
+ " print(\"Data for pregnant women not available.\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbda9b88-9b91-4845-83a8-795a12124999",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_u5_children) {\n",
+ " \n",
+ " # Display formatted markdown (only if the condition is true)\n",
+ " IRdisplay::display_html(\"Population Enfants moins de 5 ans (U5)
\")\n",
+ "\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d2512e79",
+ "metadata": {
+ "vscode": {
+ "languageId": "r"
+ }
+ },
+ "outputs": [],
+ "source": [
+ "if (condition_u5_children) {\n",
+ "\n",
+ " # 1. Make histogram\n",
+ " hist(population_data[[u5_children_col]]) # avoid hard coding \n",
+ "\n",
+ " # 2. Make plot\n",
+ " ggplot(\n",
+ " population_data,\n",
+ " aes(\n",
+ " x = .data[[u5_children_col]],\n",
+ " y = fct_reorder(.data[[\"ADM2_NAME\"]], .data[[u5_children_col]]), # these should all be dynamic; avoid hardcoding\n",
+ " color = factor(.data[[\"YEAR\"]])\n",
+ " )\n",
+ " ) +\n",
+ " geom_point() + \n",
+ " facet_grid(rows = \"ADM1_NAME\", \n",
+ " scale = \"free_y\", \n",
+ " space = \"free_y\", \n",
+ " switch = \"y\") +\n",
+ " scale_x_continuous(breaks = c(0, 2e+04, 4e+04, 6e+04, 8e+04, 1e+05, 1.5e+05),\n",
+ " labels = scales::comma) +\n",
+ " scale_color_viridis_d(option = \"mako\", end = 0.8) +\n",
+ " labs(\n",
+ " # title = \"\"\n",
+ " color = \"Année\") +\n",
+ " theme_minimal() +\n",
+ " theme(\n",
+ " axis.text = element_text(size = 7),\n",
+ " axis.title.x = element_text(size = 7),\n",
+ " axis.title.y = element_blank(),\n",
+ " strip.placement = \"outside\",\n",
+ " panel.grid.minor.x = element_blank(),\n",
+ " legend.position = \"bottom\"\n",
+ " )\n",
+ " \n",
+ "} else {\n",
+ " print(\"Data for children under five not available.\")\n",
+ "}"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "R",
+ "language": "R",
+ "name": "ir"
+ },
+ "language_info": {
+ "codemirror_mode": "r",
+ "file_extension": ".r",
+ "mimetype": "text/x-r-source",
+ "name": "R",
+ "pygments_lexer": "r",
+ "version": "4.4.3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}