Skip to content

Commit

Permalink
Allow multiple groups for user definition
Browse files Browse the repository at this point in the history
  • Loading branch information
thomas-vogels committed Nov 15, 2021
1 parent 76eac1b commit b64eccb
Show file tree
Hide file tree
Showing 9 changed files with 179 additions and 126 deletions.
4 changes: 2 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ RUN python3 -m venv "/opt/local/$PROJ_NAME/venv" && \
python3 -m pip install --upgrade pip==20.3.4 --disable-pip-version-check --no-cache-dir && \
python3 -m pip install --requirement /tmp/requirements-all.txt --disable-pip-version-check --no-cache-dir

# Create an empty .pgpass file to help with create_user and update_user commands.
RUN echo '# Format to set password (used by create_user and update_user): *:5439:*:<user>:<password>' > /home/arthur/.pgpass \
# Create an empty .pgpass file to help with update_user command.
RUN echo '# Format to set password (used by update_user): *:5439:*:<user>:<password>' > /home/arthur/.pgpass \
&& chmod go= /home/arthur/.pgpass

# Note that at runtime we (can or may) mount the local directory here.
Expand Down
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ you can simply use `../arthur-redshift-etl/` to find your way back to this ETL c

Although the Redshift cluster can be administered using the AWS console and `psql`, some
helper scripts will make setting up the cluster consistently much easier.
(See below for `initialize` and `create_user`.)
(See below for `initialize`, `create_groups`, and `create_users`.)

Also, add the AWS IAM role that the database owner may assume within Redshift
to your settings file so that Redshift has the needed permissions to access the
Expand Down Expand Up @@ -165,7 +165,8 @@ Don't forget to run `terminate_emr_cluster.sh` when you're done.
| Sub-command | Goal |
| ---- | ---- |
| `initialize` | Create schemas, groups and users |
| `create_user` | Create (or configure) users that are not mentioned in the configuration file |
| `create_groups` | Create groups that are mentioned in the configuration file |
| `create_users` | Create users that are mentioned in the configuration file |

```shell
# The commands to setup the data warehouse users and groups or any database is by ADMIN (connected to `dev`)
Expand Down
2 changes: 1 addition & 1 deletion etc/arthur_completion.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ _arthur_completion()
create_groups
create_index
create_schemas
create_user
create_users
delete_finished_pipelines
design
explain
Expand Down
51 changes: 27 additions & 24 deletions python/etl/commands.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ def build_full_parser(prog_name):
CreateGroupsCommand,
CreateIndexCommand,
CreateSchemasCommand,
CreateUserCommand,
CreateUsersCommand,
DeleteFinishedPipelinesCommand,
ExplainQueryCommand,
ExtractToS3Command,
Expand Down Expand Up @@ -681,7 +681,7 @@ def __init__(self):
"Make sure that all groups mentioned in the configuration file actually exist."
" (This allows to specify a group (as reader or writer) on a schema when that"
" group does not appear with a user and thus may not have been previously"
" created using a 'create_user' call.)",
" created with 'create_users'.)",
)

def add_arguments(self, parser):
Expand All @@ -692,34 +692,35 @@ def callback(self, args):
etl.data_warehouse.create_groups(dry_run=args.dry_run)


class CreateUserCommand(SubCommand):
class CreateUsersCommand(SubCommand):
def __init__(self):
super().__init__(
"create_user",
"add new user",
"Add new user and set group membership, optionally create a personal schema."
"create_users",
"add users to cluster",
"Add users to cluster and set group membership."
" It is ok to re-initialize a user defined in a settings file."
" Note that you have to set a password for the user in your '~/.pgpass' file"
" before invoking this command. The password must be valid in Redshift,"
" so must contain upper-case and lower-case characters as well as numbers.",
" This will add them to any new groups."
" NOTE we currently do not remove users from groups.",
# Old command name that we want to phase out:
aliases=["create_user"],
)

def add_arguments(self, parser):
add_standard_arguments(parser, ["dry-run"])
parser.add_argument("-g", "--group", help="add user to specified group")
parser.add_argument("-g", "--group", help="DEPRECATED (specify group in configuration file)")
parser.add_argument(
"-a", "--add-user-schema", help="add new schema, writable for the user", action="store_true"
"-a", "--add-user-schema", help="DEPRECATED (use 'update_user' instead)", action="store_true"
)
parser.add_argument("username", help="name for new user")
parser.add_argument("name", help="name of user", nargs="*")

def callback(self, args):
if args.group:
logger.warning("Ignoring specified group, using configuration instead")
if args.add_user_schema:
logger.warning("Ignoring request to add user schema, use 'update_user' instead.")

with etl.db.log_error():
etl.data_warehouse.create_new_user(
args.username,
group=args.group,
add_user_schema=args.add_user_schema,
dry_run=args.dry_run,
)
etl.data_warehouse.create_users(args.name, dry_run=args.dry_run)


class ListUsersCommand(SubCommand):
Expand All @@ -731,7 +732,7 @@ def __init__(self):
)

def add_arguments(self, parser):
parser.add_argument("-t", "--transpose", help="group list by user's groups", action="store_true")
parser.add_argument("-t", "--transpose", help="group list by users' groups", action="store_true")

def callback(self, args):
with etl.db.log_error():
Expand All @@ -742,8 +743,8 @@ class UpdateUserCommand(SubCommand):
def __init__(self):
super().__init__(
"update_user",
"update user's group, password, and path",
"For an existing user, update group membership, password, and search path."
"update user's password, their schema and search path",
"For an existing user, update password, create a schema, and update the search path."
" Note that you have to set a password for the user in your '~/.pgpass' file"
" before invoking this command if you want to update the password. The password must"
" be valid in Redshift, so must contain upper-case and lower-case characters as well"
Expand All @@ -752,17 +753,19 @@ def __init__(self):

def add_arguments(self, parser):
add_standard_arguments(parser, ["dry-run"])
parser.add_argument("-g", "--group", help="add user to specified group")
parser.add_argument("-g", "--group", help="DEPRECATED (specify group in configuration file)")
parser.add_argument(
"-a", "--add-user-schema", help="add new schema, writable for the user", action="store_true"
)
parser.add_argument("name", help="name of user")

def callback(self, args):
if args.group:
logger.warning("Ignoring specified group, using configuration instead")

with etl.db.log_error():
etl.data_warehouse.update_user(
args.username,
group=args.group,
args.name,
add_user_schema=args.add_user_schema,
dry_run=args.dry_run,
)
Expand Down
18 changes: 10 additions & 8 deletions python/etl/config/dw.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

class DataWarehouseUser:
"""
Data warehouse users have always a name and group associated with them.
Data warehouse users have always a name and a list of groups associated with them.
Users may have a schema "belong" to them which they then have write access to. This is useful
for system users, mostly, since end users should treat the data warehouse as read-only.
Expand All @@ -20,13 +20,13 @@ class DataWarehouseUser:
def __init__(self, user_info) -> None:
self.name = user_info["name"]
self.comment = user_info.get("comment")
# TODO(tom): Support an array of group names using "#/$defs/identifier_list".
if "group" in user_info:
# Backward compatibility: support single group "group".
self.group = user_info["group"]
self.groups = [user_info["group"]]
elif "groups" in user_info:
self.groups = user_info["groups"]
else:
# Forward compatibility: for now, just pick the only element from the list.
[self.group] = user_info["groups"]
self.groups = []
self.schema = user_info.get("schema")


Expand Down Expand Up @@ -188,6 +188,7 @@ def __init__(self, settings) -> None:
self._admin_access = dw_settings["admin_access"]
self._etl_access = dw_settings["etl_access"]
self._check_access_to_cluster()

self.users = self.parse_users(dw_settings)
schema_owner_map = {user.schema: user.name for user in self.users if user.schema}

Expand All @@ -205,7 +206,7 @@ def __init__(self, settings) -> None:
# Schemas may grant access to groups that have no bootstrapped users.
# So we "union" groups mentioned for users and groups mentioned for schemas.
self.groups = sorted(
{user.group for user in self.users}.union(
{group for user in self.users for group in user.groups}.union(
{group for schema in self.schemas for group in schema.groups}
)
)
Expand All @@ -220,12 +221,13 @@ def __init__(self, settings) -> None:

@staticmethod
def parse_default_group(dw_settings: dict) -> str:
"""Return default group for users based on a user called "default"."""
"""Return default group based on a user called "default"."""
try:
[user_settings] = [user for user in dw_settings["users"] if user["name"] == "default"]
except ValueError:
raise ETLConfigError("failed to find user 'default'")
return DataWarehouseUser(user_settings).group
# TODO(tom): make sure there's exactly one group.
return DataWarehouseUser(user_settings).groups[0]

def parse_schemas(
self, partial_settings: Iterable[dict], schema_owner_map: dict
Expand Down
7 changes: 1 addition & 6 deletions python/etl/config/settings.schema
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,7 @@
"comment": { "type": "string" },
"description": { "type": "string" },
"group": { "$ref": "#/$defs/identifier" },
"groups": {
"items": { "$ref": "#/$defs/identifier" },
"maxItems": 1,
"minItems": 1,
"type": "array"
},
"groups": { "$ref": "#/$defs/identifier_list" },
"name": { "$ref": "#/$defs/identifier" },
"schema": { "$ref": "#/$defs/identifier" }
},
Expand Down
Loading

0 comments on commit b64eccb

Please sign in to comment.