From dc95324abe57773ea7a6289ae8bd2b52a97b6bd2 Mon Sep 17 00:00:00 2001 From: lhoesly Date: Wed, 25 Feb 2026 14:38:11 -0500 Subject: [PATCH 1/3] add code and documentation to grant schemas to groups --- python/grant_schemas/README.md | 133 ++++++++++++++++ .../grant_schemas/automate_schema_grants.py | 142 ++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 python/grant_schemas/README.md create mode 100644 python/grant_schemas/automate_schema_grants.py diff --git a/python/grant_schemas/README.md b/python/grant_schemas/README.md new file mode 100644 index 0000000..34248dd --- /dev/null +++ b/python/grant_schemas/README.md @@ -0,0 +1,133 @@ +# Schema-Based Granting + +## Overview + +Many developed databases hold thousands of different tables and views, which makes organization and discovery of data difficult and greatly complicates questions of data sharing, granting, and access. + +One remedy is to organize tables into schemas that align with different audiences and use cases, and then make and enforce sharing decisions accordingly. Even when there are exceptions, it is a significant improvement to think of dozens of schemas instead of thousands of individual tables. + +However, schemas are a relatively under-supported and unintuitive database feature for sharing decisions, for a few main reasons: + +- Schema and table permissions work together hierarchically in Redshift - users need **both** USAGE permission on the schema **and** SELECT permission on the tables. Granting one without the other is insufficient, making permission management more complex than simple folder-based sharing +- Schema purposes cannot always be clearly intuited from their names, requiring the use of thorough external documentation for users to know where certain kinds of data should go +- Newly created data tables inherit no permissions from their schema and are only accessible to the table owner and superusers by default, regardless of what groups have access to the schema itself + +## Purpose + +This script automates database sharing to work more like shared folders in collaborative file sharing software, such that: + +1. Individuals primarily have access to different topic schemas based on their **group** membership, with minimal cases of person-level exceptions +2. Tables put into a schema are understood to be made **automatically** available to members of those groups (not a Redshift/SQL default behavior) +3. Behavior is documented and explained directly in context, so that users are not surprised by cases where: + - Data is **not** shared with other users as expected, or + - Data **is** shared with other users when it was **not** expected (data leakage) + +## Configuration + +### Step 1: Set Environment Variables + +```bash +export DATABASE="your_database_name" # Required: The name of your database +export DRY_RUN="True" # Optional: Set to False to execute changes +export GRANT_USAGE="False" # Optional: Set to True to also grant USAGE on schemas +export GRANT_FUTURE="True" # Optional: Set to False to skip future table grants (default: True) +``` + +### Step 2: Configure Schema Grants + +Edit the `SCHEMA_GRANTS_CONFIG` list in `automate_schema_grants.py` to define which groups should have access to which schemas: + +```python +SCHEMA_GRANTS_CONFIG = [ + { + 'schema_name': 'reporting', + 'groups': ['analysts', 'managers', 'executives'], + 'table_creators': ['etl_user', 'data_engineer_bot'], # Users who create tables + 'notes': 'Reporting tables for business intelligence' + }, + { + 'schema_name': 'raw_data', + 'groups': ['data_engineers', 'etl_users'], + 'table_creators': ['etl_service_account'], + 'notes': 'Raw data ingestion schema' + }, +] +``` + +**IMPORTANT - `table_creators` Configuration:** + +In Redshift, `ALTER DEFAULT PRIVILEGES` only applies to objects created by **specific users**. You must list all users who might create tables in the `table_creators` field. Common users to include: +- Service accounts (e.g., `etl_service_account`, `airflow_user`) +- Application users that create tables +- Data engineers or analysts with CREATE privileges + +If you omit `table_creators`, default privileges will only apply to tables created by the user running this script, meaning tables created by other users won't automatically inherit the correct permissions. + +## Usage + +### Dry Run (Preview Changes) + +```bash +python automate_schema_grants.py +``` + +This will log the SQL GRANT statements that would be executed without actually making any changes. + +### Execute Changes + +```bash +export DRY_RUN="False" +python automate_schema_grants.py +``` + +This will execute the SQL statements to grant permissions. + +### Grant USAGE Permissions + +By default, the script only grants SELECT permissions on tables. To also grant USAGE permissions on the schemas themselves: + +```bash +export GRANT_USAGE="True" +python automate_schema_grants.py +``` + +### Disable Future Object Grants + +By default, the script grants permissions on both existing and future tables/views. To only grant on existing objects: + +```bash +export GRANT_FUTURE="False" +python automate_schema_grants.py +``` + +## Tables vs Views + +In Redshift, the command `GRANT SELECT ON ALL TABLES IN SCHEMA` covers: +- Regular tables +- Views +- External tables +- Late-binding views + +Similarly, `ALTER DEFAULT PRIVILEGES` applies to both tables and views created in the future. This means you don't need separate commands for views - they're automatically included. + +## How It Works + +1. **Reads Configuration**: Loads the schema-to-groups mapping from `SCHEMA_GRANTS_CONFIG` +2. **Generates GRANT Statements**: Creates SQL statements to grant SELECT (and optionally USAGE) permissions + - Grants SELECT on all existing tables and views in each schema + - Optionally grants USAGE on the schema itself (required for accessing tables/views) + - Sets default privileges for future tables and views created by specified users +3. **Executes or Logs**: Either executes the changes (when `DRY_RUN=False`) or logs them for review + +**Note**: In Redshift, "ALL TABLES" includes tables, views, and external tables that currently exist in the schema. + +**Critical Limitation**: The `ALTER DEFAULT PRIVILEGES` command only applies to objects created by specific users. The script uses `FOR USER ` to grant privileges on future objects created by each user in the `table_creators` list. If a user not in this list creates a table, the permissions will **not** be automatically applied, and you'll need to either: +- Re-run this script to grant on the newly created tables +- Add that user to the `table_creators` list and re-run the script + +## Requirements + +- Python 3.x +- `civis` Python package +- Superuser/admin access to the target database +- Appropriate Civis Platform API credentials diff --git a/python/grant_schemas/automate_schema_grants.py b/python/grant_schemas/automate_schema_grants.py new file mode 100644 index 0000000..bf577ba --- /dev/null +++ b/python/grant_schemas/automate_schema_grants.py @@ -0,0 +1,142 @@ +""" +This script automates granting SELECT access (and optionally USAGE) on database schemas to specified groups. +For each configured schema, it grants permissions to all associated groups on all tables and views within that schema. + +This script must be run with authorized superuser account credential on the affected database. + +Configuration: +- Set the DATABASE environment variable to specify which database to use +- Edit the SCHEMA_GRANTS_CONFIG list below to map schemas to their authorized groups +- Set DRY_RUN=True to preview changes without executing them +- Set GRANT_USAGE=True to also grant USAGE permissions on schemas +- Set GRANT_FUTURE=True to grant permissions on future tables/views (default: True) + +IMPORTANT: ALTER DEFAULT PRIVILEGES in Redshift only applies to objects created by specific users. +You must specify the 'table_creators' list for each schema to include all users who might create tables. +Otherwise, default privileges will only apply to tables created by the user running this script. + +Note: In Redshift, "ALL TABLES" includes tables, views, and external tables. +""" + +import civis +import os +import logging +from distutils.util import strtobool + +# Setting up logging +LOG = logging.getLogger(__name__) +FORMAT = "%(asctime)-15s %(levelname)s:%(name)s.%(funcName)s:%(lineno)s %(message)s" +logging.basicConfig(level=logging.INFO, format=FORMAT) + +# ======================================== +# CONFIGURATION: Edit this list for your specific use case +# ======================================== +SCHEMA_GRANTS_CONFIG = [ + { + 'schema_name': 'example_schema', + 'groups': ['example_group_1', 'example_group_2', 'read_only_users'], + 'table_creators': [], # Optional: usernames who can create tables in this schema + 'notes': 'Example schema - replace with your actual schemas and groups' + }, + # Add more schema-to-groups mappings here as needed + # { + # 'schema_name': 'analytics_schema', + # 'groups': ['analysts', 'data_engineers', 'reporting_users'], + # 'table_creators': ['etl_user', 'data_engineer_bot'], # Users who create tables + # 'notes': 'Analytics schema for reporting team' + # }, +] + + +def get_schema_grants_config(): + """ + Returns the schema grants configuration from the code-based SCHEMA_GRANTS_CONFIG. + Returns a dict mapping schema names to their authorized groups: + { + 'schema_name': { + 'schema': 'schema_name', + 'groups': ['group1', 'group2', ...], + 'table_creators': ['user1', 'user2', ...] + }, + ... + } + """ + mapping = {} + + for config in SCHEMA_GRANTS_CONFIG: + schema = config.get('schema_name') + groups = config.get('groups', []) + table_creators = config.get('table_creators', []) + + if schema and groups: + mapping[schema] = { + "schema": schema, + "groups": tuple(groups), + "table_creators": table_creators, + } + + return mapping + + +def main(database, dry_run=True, grant_usage=False, grant_future=True): + grant_commands = [] + schema_grants = get_schema_grants_config() + + for schema_name in schema_grants: + schema = schema_grants[schema_name]["schema"] + groups = schema_grants[schema_name]["groups"] + + if grant_usage: + usage_command = f"GRANT USAGE ON SCHEMA {schema} TO GROUP {', GROUP '.join(groups)};" + grant_commands.append(usage_command) + + # Grant on existing tables and views + # Note: In Redshift, "ALL TABLES" includes tables, views, and external tables + select_command = f"GRANT SELECT ON ALL TABLES IN SCHEMA {schema} TO GROUP {', GROUP '.join(groups)};" + grant_commands.append(select_command) + + # Grant on future tables and views (if enabled) + # IMPORTANT: ALTER DEFAULT PRIVILEGES only applies to objects created by specific users + if grant_future: + table_creators = schema_grants[schema_name].get("table_creators", []) + + if table_creators: + # Grant for each specified table creator + for creator in table_creators: + for group in groups: + future_command = f"ALTER DEFAULT PRIVILEGES FOR USER {creator} IN SCHEMA {schema} GRANT SELECT ON TABLES TO GROUP {group};" + grant_commands.append(future_command) + else: + # No table_creators specified - grant for the current user running the script + # This will only apply to tables created by this user! + for group in groups: + future_command = f"ALTER DEFAULT PRIVILEGES IN SCHEMA {schema} GRANT SELECT ON TABLES TO GROUP {group};" + grant_commands.append(future_command) + LOG.warning(f"No table_creators specified for schema '{schema}'. Default privileges will only apply to tables created by the user running this script.") + + query = "\n".join(grant_commands) + + if dry_run: + LOG.info( + "Running in dry run mode. The following SQL generated but not executed:\n\n" + ) + LOG.info(query) + else: + LOG.info("Running in full mode. The following SQL will be executed:\n\n") + LOG.info(query) + future = civis.io.query_civis(query, database=database, hidden=False) + LOG.info(future.result()) + + +if __name__ == "__main__": + # Different Platform/cloud environments use slightly different formats for Boolean parameters; + # This provides some assurance that "truthy" values are assigned properly. + DRY_RUN_PARAM = strtobool(str(os.environ.get('DRY_RUN', 'True'))) + GRANT_USAGE = strtobool(str(os.environ.get('GRANT_USAGE', 'False'))) + GRANT_FUTURE = strtobool(str(os.environ.get('GRANT_FUTURE', 'True'))) + DATABASE = os.environ.get('DATABASE') + + if not DATABASE: + raise ValueError("DATABASE environment variable must be set") + + main(database=DATABASE, dry_run=DRY_RUN_PARAM, grant_usage=GRANT_USAGE, grant_future=GRANT_FUTURE) From ea4adc1185fa42bca15f81783f4a6aef74cb223b Mon Sep 17 00:00:00 2001 From: lhoesly Date: Wed, 25 Feb 2026 15:06:57 -0500 Subject: [PATCH 2/3] flake8 --- .../grant_schemas/automate_schema_grants.py | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/python/grant_schemas/automate_schema_grants.py b/python/grant_schemas/automate_schema_grants.py index bf577ba..03b10c7 100644 --- a/python/grant_schemas/automate_schema_grants.py +++ b/python/grant_schemas/automate_schema_grants.py @@ -62,26 +62,26 @@ def get_schema_grants_config(): } """ mapping = {} - + for config in SCHEMA_GRANTS_CONFIG: schema = config.get('schema_name') groups = config.get('groups', []) table_creators = config.get('table_creators', []) - + if schema and groups: mapping[schema] = { "schema": schema, "groups": tuple(groups), "table_creators": table_creators, } - + return mapping def main(database, dry_run=True, grant_usage=False, grant_future=True): grant_commands = [] schema_grants = get_schema_grants_config() - + for schema_name in schema_grants: schema = schema_grants[schema_name]["schema"] groups = schema_grants[schema_name]["groups"] @@ -94,25 +94,34 @@ def main(database, dry_run=True, grant_usage=False, grant_future=True): # Note: In Redshift, "ALL TABLES" includes tables, views, and external tables select_command = f"GRANT SELECT ON ALL TABLES IN SCHEMA {schema} TO GROUP {', GROUP '.join(groups)};" grant_commands.append(select_command) - + # Grant on future tables and views (if enabled) # IMPORTANT: ALTER DEFAULT PRIVILEGES only applies to objects created by specific users if grant_future: table_creators = schema_grants[schema_name].get("table_creators", []) - + if table_creators: # Grant for each specified table creator for creator in table_creators: for group in groups: - future_command = f"ALTER DEFAULT PRIVILEGES FOR USER {creator} IN SCHEMA {schema} GRANT SELECT ON TABLES TO GROUP {group};" + future_command = ( + f"ALTER DEFAULT PRIVILEGES FOR USER {creator} IN SCHEMA {schema} " + f"GRANT SELECT ON TABLES TO GROUP {group};" + ) grant_commands.append(future_command) else: # No table_creators specified - grant for the current user running the script # This will only apply to tables created by this user! for group in groups: - future_command = f"ALTER DEFAULT PRIVILEGES IN SCHEMA {schema} GRANT SELECT ON TABLES TO GROUP {group};" + future_command = ( + f"ALTER DEFAULT PRIVILEGES IN SCHEMA {schema} " + f"GRANT SELECT ON TABLES TO GROUP {group};" + ) grant_commands.append(future_command) - LOG.warning(f"No table_creators specified for schema '{schema}'. Default privileges will only apply to tables created by the user running this script.") + LOG.warning( + f"No table_creators specified for schema '{schema}'. " + f"Default privileges will only apply to tables created by the user running this script." + ) query = "\n".join(grant_commands) @@ -135,8 +144,8 @@ def main(database, dry_run=True, grant_usage=False, grant_future=True): GRANT_USAGE = strtobool(str(os.environ.get('GRANT_USAGE', 'False'))) GRANT_FUTURE = strtobool(str(os.environ.get('GRANT_FUTURE', 'True'))) DATABASE = os.environ.get('DATABASE') - + if not DATABASE: raise ValueError("DATABASE environment variable must be set") - + main(database=DATABASE, dry_run=DRY_RUN_PARAM, grant_usage=GRANT_USAGE, grant_future=GRANT_FUTURE) From 0a0ff9aa2bcaca5105613edc59b7806b2ceda5df Mon Sep 17 00:00:00 2001 From: Maya Mallaby-Kay Date: Tue, 3 Mar 2026 09:25:13 -0700 Subject: [PATCH 3/3] linting --- .../grant_schemas/automate_schema_grants.py | 62 ++++++++++++------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/python/grant_schemas/automate_schema_grants.py b/python/grant_schemas/automate_schema_grants.py index 03b10c7..2644629 100644 --- a/python/grant_schemas/automate_schema_grants.py +++ b/python/grant_schemas/automate_schema_grants.py @@ -1,19 +1,26 @@ """ -This script automates granting SELECT access (and optionally USAGE) on database schemas to specified groups. -For each configured schema, it grants permissions to all associated groups on all tables and views within that schema. +This script automates granting SELECT access (and optionally USAGE) on +database schemas to specified groups. +For each configured schema, it grants permissions to all associated groups +on all tables and views within that schema. -This script must be run with authorized superuser account credential on the affected database. +This script must be run with authorized superuser account credential on the +affected database. Configuration: - Set the DATABASE environment variable to specify which database to use -- Edit the SCHEMA_GRANTS_CONFIG list below to map schemas to their authorized groups +- Edit the SCHEMA_GRANTS_CONFIG list below to map schemas to their authorized + groups - Set DRY_RUN=True to preview changes without executing them - Set GRANT_USAGE=True to also grant USAGE permissions on schemas -- Set GRANT_FUTURE=True to grant permissions on future tables/views (default: True) +- Set GRANT_FUTURE=True to grant permissions on future tables/views + (default: True) -IMPORTANT: ALTER DEFAULT PRIVILEGES in Redshift only applies to objects created by specific users. -You must specify the 'table_creators' list for each schema to include all users who might create tables. -Otherwise, default privileges will only apply to tables created by the user running this script. +IMPORTANT: ALTER DEFAULT PRIVILEGES in Redshift only applies to objects +created by specific users. You must specify the 'table_creators' list + for each schema to include all users who might create tables. +Otherwise, default privileges will only apply to tables created + by the user running this script. Note: In Redshift, "ALL TABLES" includes tables, views, and external tables. """ @@ -33,10 +40,10 @@ # ======================================== SCHEMA_GRANTS_CONFIG = [ { - 'schema_name': 'example_schema', - 'groups': ['example_group_1', 'example_group_2', 'read_only_users'], - 'table_creators': [], # Optional: usernames who can create tables in this schema - 'notes': 'Example schema - replace with your actual schemas and groups' + "schema_name": "example_schema", + "groups": ["example_group_1", "example_group_2", "read_only_users"], + "table_creators": [], # Optional: usernames who can create tables in this schema + "notes": "Example schema - replace with your actual schemas and groups", }, # Add more schema-to-groups mappings here as needed # { @@ -64,9 +71,9 @@ def get_schema_grants_config(): mapping = {} for config in SCHEMA_GRANTS_CONFIG: - schema = config.get('schema_name') - groups = config.get('groups', []) - table_creators = config.get('table_creators', []) + schema = config.get("schema_name") + groups = config.get("groups", []) + table_creators = config.get("table_creators", []) if schema and groups: mapping[schema] = { @@ -87,12 +94,15 @@ def main(database, dry_run=True, grant_usage=False, grant_future=True): groups = schema_grants[schema_name]["groups"] if grant_usage: - usage_command = f"GRANT USAGE ON SCHEMA {schema} TO GROUP {', GROUP '.join(groups)};" + usage_command = ( + f"GRANT USAGE ON SCHEMA {schema} TO GROUP {', GROUP '.join(groups)};" + ) grant_commands.append(usage_command) # Grant on existing tables and views # Note: In Redshift, "ALL TABLES" includes tables, views, and external tables - select_command = f"GRANT SELECT ON ALL TABLES IN SCHEMA {schema} TO GROUP {', GROUP '.join(groups)};" + select_command = f"""GRANT SELECT ON ALL TABLES IN SCHEMA {schema} + TO GROUP {', GROUP '.join(groups)};""" grant_commands.append(select_command) # Grant on future tables and views (if enabled) @@ -120,7 +130,8 @@ def main(database, dry_run=True, grant_usage=False, grant_future=True): grant_commands.append(future_command) LOG.warning( f"No table_creators specified for schema '{schema}'. " - f"Default privileges will only apply to tables created by the user running this script." + f"Default privileges will only apply to tables created" + " by the user running this script." ) query = "\n".join(grant_commands) @@ -140,12 +151,17 @@ def main(database, dry_run=True, grant_usage=False, grant_future=True): if __name__ == "__main__": # Different Platform/cloud environments use slightly different formats for Boolean parameters; # This provides some assurance that "truthy" values are assigned properly. - DRY_RUN_PARAM = strtobool(str(os.environ.get('DRY_RUN', 'True'))) - GRANT_USAGE = strtobool(str(os.environ.get('GRANT_USAGE', 'False'))) - GRANT_FUTURE = strtobool(str(os.environ.get('GRANT_FUTURE', 'True'))) - DATABASE = os.environ.get('DATABASE') + DRY_RUN_PARAM = strtobool(str(os.environ.get("DRY_RUN", "True"))) + GRANT_USAGE = strtobool(str(os.environ.get("GRANT_USAGE", "False"))) + GRANT_FUTURE = strtobool(str(os.environ.get("GRANT_FUTURE", "True"))) + DATABASE = os.environ.get("DATABASE") if not DATABASE: raise ValueError("DATABASE environment variable must be set") - main(database=DATABASE, dry_run=DRY_RUN_PARAM, grant_usage=GRANT_USAGE, grant_future=GRANT_FUTURE) + main( + database=DATABASE, + dry_run=DRY_RUN_PARAM, + grant_usage=GRANT_USAGE, + grant_future=GRANT_FUTURE, + )