diff --git a/charmcraft.yaml b/charmcraft.yaml index 583dca224c..5f15bfaebe 100644 --- a/charmcraft.yaml +++ b/charmcraft.yaml @@ -31,9 +31,6 @@ requires: interface: debug-ssh image: interface: github_runner_image_v0 - mongodb: - interface: mongodb_client - limit: 1 planner: interface: github_runner_planner_v0 @@ -156,16 +153,14 @@ config: type: int default: 0 description: >- - The number of non-reactive virtual machine runners spawned by the charm per each - combination of flavor and base. Currently, if the charm is integrated with MongoDB - this config option will be ignored. + The number of virtual machine runners spawned by the charm per each + combination of flavor and image. max-total-virtual-machines: type: int default: 0 description: >- - If the github-runner charm is integrated with MongoDB, the reactive mode will not spawn - new virtual machines if there are max-total-virtual-machines of more virtual machines - managed by the charm. + Maximum number of virtual machines managed by the charm. The charm will not spawn + new runners beyond this limit. Set to 0 (default) for no cap. flavor-label-combinations: type: string default: "" diff --git a/docs/changelog.md b/docs/changelog.md index e8dc13543c..120c953524 100644 --- a/docs/changelog.md +++ b/docs/changelog.md @@ -2,10 +2,14 @@ This changelog documents user-relevant changes to the GitHub runner charm. +## 2026-03-25 + +- Removed `mongodb` relation. MongoDB-based reactive runner spawning is no longer supported. **If you have an active MongoDB relation, remove it with `juju remove-relation` before upgrading.** +- Updated documentation to clarify that reactive MongoDB-based runner spawning is no longer supported. + ## 2026-03-20 - The pressure reconciler is now always used, replacing the legacy reconcile mode. When no planner relation is configured, it uses `base-virtual-machines` as static pressure to maintain the configured minimum runner count. -- HTTP server endpoints (`/runner/check`, `/runner/flush`) now use `RunnerManager` directly instead of the legacy `RunnerScaler`. - Set GitHub API pagination page size to 100 (up from PyGithub default of 30), reducing API calls when listing runners. ## 2026-03-18 diff --git a/docs/how-to/index.rst b/docs/how-to/index.rst index d4fd31cb07..fcf7c6f562 100644 --- a/docs/how-to/index.rst +++ b/docs/how-to/index.rst @@ -40,7 +40,6 @@ basic operations you can complete with the charm. Debug with SSH Integrate with COS Spawn OpenStack runner - Set up reactive spawning Update and refresh ------------------ diff --git a/docs/how-to/reactive.md b/docs/how-to/reactive.md deleted file mode 100644 index 2cbb460040..0000000000 --- a/docs/how-to/reactive.md +++ /dev/null @@ -1,114 +0,0 @@ -# How to set up reactive spawning - -The charm provides an experimental feature to spawn runners reactively, depending on the jobs requested by GitHub. -This feature is only available for runners on OpenStack cloud and is -disabled by default and can be enabled by integrating the charm with a MongoDB charm. - -## Requirements -For the purposes of this how-to-guide, we assume that you have a machine model (named "machine-model") -that can be used to deploy runners on an OpenStack cloud and [MongoDB](https://charmhub.io/mongodb), -and a k8s model (named "k8s-model") for the [webhook router](https://charmhub.io/github-runner-webhook-router). - -## Steps -We are going to showcase the steps required to set up a reactive spawning environment with three runner flavors (large, large-arm, small) and a MongoDB database as a message queue. - -Note, that the specific revisions/channels in the steps are only marked here for reproducibility, you should adapt the revisions/channels to your needs. - - -### GitHub runner applications - - -For this how-to-guide, we decided to have deployed three GitHub Runner charm applications: `large`, `large-arm`, `small` . We need -to deploy those with these names, to comply with the routing table defined below. - -```shell -juju switch machine-model -juju deploy github-runner large --channel latest/stable .... -juju deploy github-runner large-arm --channel latest/stable .... -juju deploy github-runner small --channel latest/stable .... -``` - -Please refer to [How to spawn OpenStack runner](openstack-runner.md). -for more information on how to deploy the runners. - -### MongoDB - -You need to deploy a MongoDB application to use as a message queue. -You can choose to use the machine charm or the k8s charm version, although we recommend using -the machine charm as the k8s version may not be reachable from outside the k8s cluster. - -```shell -juju switch machine-model -juju deploy mongodb --channel 6/edge --revision 188 -juju expose mongodb -juju offer mongodb:database -juju grant consume -``` - -Integrate with the runner charms - -```shell -juju integrate large mongodb -juju integrate large-arm mongodb -juju integrate small mongodb -``` - -### Define a webhook in your organisation or repository where the self-hosted runners are registered - -On your repository or organisation's page on GitHub, you need to go to the settings and create a Webhook -(e.g. `https://github.com/canonical/github-runner-operator/settings/hooks`). Please make sure to select - -- the Webhook URL to be the URL of the webhook router -- the content type `application/json` -- the secret you defined in the webhook router (if you have so, which is recommended for security reasons) -- the individual event "Workflow jobs" (and only this, as all other events will just be rejected by the webhook router) - -### Webhook router - -The webhook router is a k8s charm, therefore you need to deploy it on a k8s model. - -First, define a routing table to decide which labels should be routed to which GitHub Runner charm application: - -```shell -cat < routing_table.yaml -- large: [large, x64] -- large-arm: [large, arm64] -- small: [small] -EOF -``` - -We decide to route all jobs with any label combination in the set `large,x64` to the large application, `large,arm64` to large-arm, -and labels with `small` to small. -This means, depending on which labels your users are setting in the workflow file, a VM of a different runner application will be used to -execute the job. - -Switch to the k8s model and deploy the webhook router charm: - -```shell -juju switch k8s-model -juju deploy github-runner-webhook-router --channel latest/edge --revision 30 --config flavours=@routing_table.yaml --config default-flavour=small --config webhook-secret= -juju consume -juju integrate github-runner-webhook-router mongodb -``` - ->[!IMPORTANT] -> The webhook router needs to be deployed with the name `github-runner-webhook-router`, as the database name is currently hardcoded in the charm. - - -In this example we use "small" as the default runner application, to which all jobs with empty labels (or default labels such as `self-hosted`,`linux`) -are routed to. - - -In order to be reachable from GitHub, you need to make the webhook publicly available, you will need an ingress or the traefik charm to expose the webhook router: - -```shell -juju deploy nginx-ingress-integrator --channel latest/edge --revision 117 --config path-routes='/' --config service-hostname='githubu-runner-webhook-router.my.domain' --config trust=True -juju integrate nginx-ingress-integrator github-runner-webhook-router -``` - -### COS integration -You will probably also need some observability. -The GitHub Runner and MongoDB machine charm provide COS integration through the `cos-agent` endpoint, and the -GitHub Runner Webhook Router charm provides the usual endpoints (`logging`, `metrics-endpoint`, `grafana-dashboard`). Please refer to -[How to integrate with COS](integrate-with-cos.md) and [Canonical Observability Stack (COS) documentation](https://charmhub.io/topics/canonical-observability-stack) -for more information. \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index d62964da36..e228c899e8 100644 --- a/docs/index.md +++ b/docs/index.md @@ -57,7 +57,6 @@ Thinking about using the GitHub runner charm for your next project? [Get in touc 1. [Debug with SSH](how-to/debug-with-ssh.md) 1. [Integrate with COS](how-to/integrate-with-cos.md) 1. [Spawn OpenStack runner](how-to/openstack-runner.md) - 1. [Set up reactive spawning](how-to/reactive.md) 1. [Comply with security requirements](how-to/comply-security.md) 1. [Upgrade](how-to/upgrade.md) 1. [Contribute](how-to/contribute.md) diff --git a/docs/reference/charm-architecture.md b/docs/reference/charm-architecture.md index 29a76aa339..ccf35a48ff 100644 --- a/docs/reference/charm-architecture.md +++ b/docs/reference/charm-architecture.md @@ -29,40 +29,26 @@ Container_Boundary(c1, "GitHub Runner Charm"){ Component(openstackrunnermanager, "OpenstackRunnerManager", "", "") Component(runnermanager, "RunnerManager", "", "") - Component(runnerscaler, "RunnerScaler", "", "") + Component(pressurereconciler, "PressureReconciler", "", "") - Rel(runnerscaler, runnermanager, "uses", "") + Rel(pressurereconciler, runnermanager, "uses", "") Rel(runnermanager, cloudrunnermanager, "uses", "") Rel(runnermanager, githubrunnermanager, "uses", "") Rel(openstackrunnermanager, cloudrunnermanager, "implements", "") } -Container_Boundary(c2, "Reactive Processes"){ - Component(runnerprocess, "github_runner_manager.reactive.runner", "Reactive Process", "") -} - Rel(githubrunnermanager, github, "manages VMs", "") Rel(openstackrunnermanager, osrunnign, "manages VMs", "") -Rel(runnermanager, runnerprocess, "creates/deletes processes", "") - -Rel(runnerprocess, github, "manages VMs", "") -Rel(runnerprocess, osrunnign, "manages VMs", "") - UpdateLayoutConfig($c4ShapeInRow="3", $c4BoundaryInRow="2") ``` -The `RunnerScaler` is the entry point component to reconcile the desired number of runners using the `RunnerManager`. +The `PressureReconciler` is the entry point component to reconcile the desired number of runners using the `RunnerManager`. The `RunnerManager` is the main component of the charm. The `RunnerManager` interacts with the other charm components in the following ways: * `CloudRunnerManager`: To interact with the compute infrastructure to create and manage - self-hosted runners. OpenStack is currently the only available cloud implementation. + self-hosted runners. OpenStack is currently the only available cloud implementation. * `GithubRunnerManager`: To interact with the GitHub API. -In the case of reactive runners, the `RunnerManager` will also create processes that -will be in charge of consuming events that were created from GitHub webhooks, and starting GitHub runners in a -reactive manner. Those events are stored in `mongodb` and are enqueued by -the charm [`github-runner-webhook-router`](https://github.com/canonical/github-runner-webhook-router). - ## Virtual machines To ensure a clean and isolated environment for every runner, self-hosted runners use OpenStack virtual machines. The charm spawns virtual machines, setting resources based on charm configurations. Virtual machines will not be reused between jobs, this is [similar to how GitHub hosts their runners due to security concerns](https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#self-hosted-runner-security). diff --git a/docs/reference/cos.md b/docs/reference/cos.md index eba0f02552..aaaca2b6a5 100644 --- a/docs/reference/cos.md +++ b/docs/reference/cos.md @@ -60,8 +60,6 @@ Relevant log files are (replace `` with the name of the application an - `/var/log/github-runner-manager/-.log` : It contains logs from the runner manager application. This is particularly useful for resolving issues related to reconciliation, such as problems with spawning VMs or contacting GitHub. - `/var/log/juju/unit--.log` : It contains the Juju debug logs. These are relevant for inspecting issues with the charm operating the runner manager application. -- `/var/log/reactive_runner/.log` : It contains logs of the process of spawning a runner on demand. This is particularly relevant for troubleshooting issues -, such as queue consumption issues or the inability to spawn VMs on OpenStack. - `/var/log/github-runner-metrics.log` : This contains the logs used to calculate metrics. Use Loki to filter through multiple files or units. diff --git a/docs/reference/integrations.md b/docs/reference/integrations.md index fef754ab89..8cd4a9f586 100644 --- a/docs/reference/integrations.md +++ b/docs/reference/integrations.md @@ -16,16 +16,3 @@ juju integrate github-runner tmate-ssh-server ``` -### `mongodb` - -_Interface_: mongodb_client -_Supported charms_: [mongodb](https://charmhub.io/mongodb), [mongodb-k8s](https://charmhub.io/mongodb-k8s) - -The mongodb integration provides the necessary information for the runner manager to access -the mongodb database, which is required for the (experimental) reactive spawning feature. -Integrating the charm with mongodb lets the charm automatically go into reactive mode. - -Example mongodb_client integrate command: -``` -juju integrate github-runner mongodb -``` \ No newline at end of file diff --git a/github-runner-manager/pyproject.toml b/github-runner-manager/pyproject.toml index 7612bab1a2..a5877fdd63 100644 --- a/github-runner-manager/pyproject.toml +++ b/github-runner-manager/pyproject.toml @@ -3,7 +3,7 @@ [project] name = "github-runner-manager" -version = "0.16.0" +version = "0.17.0" authors = [ { name = "Canonical IS DevOps", email = "is-devops-team@canonical.com" }, ] diff --git a/github-runner-manager/src/github_runner_manager/cli.py b/github-runner-manager/src/github_runner_manager/cli.py index c00bd069e4..cb36dca00b 100644 --- a/github-runner-manager/src/github_runner_manager/cli.py +++ b/github-runner-manager/src/github_runner_manager/cli.py @@ -112,7 +112,7 @@ def main( log_level: The log level. Raises: - ClickException: If no non-reactive combinations are configured. + ClickException: If no runner combinations are configured. """ logging.basicConfig( level=log_level, @@ -123,9 +123,9 @@ def main( config = ApplicationConfiguration.from_yaml_file(StringIO(config_file.read())) lock = Lock() - combinations = config.non_reactive_configuration.combinations + combinations = config.runner_configuration.combinations if not combinations: - raise click.ClickException("No non-reactive combinations configured.") + raise click.ClickException("No runner combinations configured.") runner_manager = build_runner_manager(config, combinations[0]) pressure_reconciler = build_pressure_reconciler(config, runner_manager, lock) diff --git a/github-runner-manager/src/github_runner_manager/configuration/__init__.py b/github-runner-manager/src/github_runner_manager/configuration/__init__.py index be8947d8c0..264cdd90d2 100644 --- a/github-runner-manager/src/github_runner_manager/configuration/__init__.py +++ b/github-runner-manager/src/github_runner_manager/configuration/__init__.py @@ -7,11 +7,9 @@ ApplicationConfiguration, Flavor, Image, - NonReactiveCombination, - NonReactiveConfiguration, ProxyConfig, - QueueConfig, - ReactiveConfiguration, + RunnerCombination, + RunnerConfiguration, SSHDebugConnection, SupportServiceConfig, UserInfo, diff --git a/github-runner-manager/src/github_runner_manager/configuration/base.py b/github-runner-manager/src/github_runner_manager/configuration/base.py index 4dbfecfdba..29eb1f69a6 100644 --- a/github-runner-manager/src/github_runner_manager/configuration/base.py +++ b/github-runner-manager/src/github_runner_manager/configuration/base.py @@ -8,7 +8,7 @@ from typing import Optional, TextIO import yaml -from pydantic import AnyHttpUrl, BaseModel, Field, IPvAnyAddress, MongoDsn, root_validator +from pydantic import AnyHttpUrl, BaseModel, Field, IPvAnyAddress, root_validator from github_runner_manager.configuration import github from github_runner_manager.openstack_cloud.configuration import OpenStackConfiguration @@ -23,11 +23,11 @@ # user would be the current user running the application. @dataclass class UserInfo: - """The user to run the reactive process. + """The user running the application. Attributes: - user: The user for running the reactive processes. - group: The user group for running the reactive processes. + user: The username. + group: The user group. """ user: str @@ -47,8 +47,7 @@ class ApplicationConfiguration(BaseModel): extra_labels: Extra labels to add to the runner. github_config: GitHub configuration. service_config: The configuration for supporting services. - non_reactive_configuration: Configuration for non-reactive mode. - reactive_configuration: Configuration for reactive mode. + runner_configuration: Runner flavor/image configuration. openstack_configuration: Configuration for authorization to a OpenStack host. planner_url: Base URL of the planner service. planner_token: Bearer token to authenticate against the planner service. @@ -60,12 +59,11 @@ class ApplicationConfiguration(BaseModel): extra_labels: list[str] github_config: github.GitHubConfiguration | None service_config: "SupportServiceConfig" - non_reactive_configuration: "NonReactiveConfiguration" - reactive_configuration: "ReactiveConfiguration | None" + runner_configuration: "RunnerConfiguration" openstack_configuration: OpenStackConfiguration planner_url: Optional[AnyHttpUrl] = None planner_token: Optional[str] = None - reconcile_interval: int + reconcile_interval: int = Field(ge=1) @staticmethod def from_yaml_file(file: TextIO) -> "ApplicationConfiguration": @@ -197,18 +195,18 @@ class SSHDebugConnection(BaseModel): local_proxy_port: int = 3129 -class NonReactiveConfiguration(BaseModel): - """Configuration for non-reactive mode. +class RunnerConfiguration(BaseModel): + """Runner flavor/image configuration. Attributes: - combinations: Different combinations of flavor and image to spawn in non-reactive mode. + combinations: Different combinations of flavor and image to spawn. """ - combinations: "list[NonReactiveCombination]" + combinations: "list[RunnerCombination]" -class NonReactiveCombination(BaseModel): - """Combination of image and flavor that the application can spawn in non-reactive mode. +class RunnerCombination(BaseModel): + """Combination of image and flavor that the application can spawn. Attributes: image: Information about the image to spawn. @@ -246,36 +244,6 @@ def check_max_ge_base(cls, values: dict) -> dict: return values -class ReactiveConfiguration(BaseModel): - """Configuration for reactive mode. - - Attributes: - queue: Queue to listen for reactive requests to spawn runners. - max_total_virtual_machines: Maximum number of instances to spawn by the application. - This value will be only checked in reactive mode, and will include all the instances - (reactive and non-reactive) spawned by the application. - images: List of valid images to spawn in reactive mode. - flavors: List of valid flavors to spawn in reactive mode. - """ - - queue: "QueueConfig" - max_total_virtual_machines: int - images: "list[Image]" - flavors: "list[Flavor]" - - -class QueueConfig(BaseModel): - """The configuration for the message queue. - - Attributes: - mongodb_uri: The URI of the MongoDB database. - queue_name: The name of the queue. - """ - - mongodb_uri: MongoDsn - queue_name: str - - class Image(BaseModel): """Information for an image with its associated labels. @@ -303,6 +271,5 @@ class Flavor(BaseModel): # For pydantic to work with forward references. ApplicationConfiguration.update_forward_refs() SupportServiceConfig.update_forward_refs() -NonReactiveConfiguration.update_forward_refs() -NonReactiveCombination.update_forward_refs() -ReactiveConfiguration.update_forward_refs() +RunnerConfiguration.update_forward_refs() +RunnerCombination.update_forward_refs() diff --git a/github-runner-manager/src/github_runner_manager/manager/models.py b/github-runner-manager/src/github_runner_manager/manager/models.py index 80cb64ab62..45426e9bae 100644 --- a/github-runner-manager/src/github_runner_manager/manager/models.py +++ b/github-runner-manager/src/github_runner_manager/manager/models.py @@ -27,13 +27,13 @@ class InstanceID: Attributes: name: Name of the instance to use. prefix: Prefix corresponding to the application (charm application unit). - reactive: Identifies if the runner is reactive. suffix: Random suffix for the InstanceID. """ prefix: str suffix: str - reactive: bool | None = None + # Legacy infix (e.g. "n-" or "r-") preserved so existing VMs can be looked up by name. + _legacy_infix: str = field(default="", repr=False) @property def name(self) -> str: @@ -42,13 +42,7 @@ def name(self) -> str: Returns: Name of the instance """ - if self.reactive is True: - runner_type = "r-" - elif self.reactive is False: - runner_type = "n-" - else: - runner_type = "" - return f"{self.prefix}-{runner_type}{self.suffix}" + return f"{self.prefix}-{self._legacy_infix}{self.suffix}" @classmethod def build_from_name(cls, prefix: str, name: str) -> "InstanceID": @@ -69,26 +63,23 @@ def build_from_name(cls, prefix: str, name: str) -> "InstanceID": else: raise ValueError(f"Invalid runner name {name} for prefix {prefix}") - # The separator from prefix and suffix indicates whether the runner is reactive. - # To maintain backwards compatibility, if there is no r- or n- (reactive or - # non-reactive), we assume non-reactive and keep the full suffix. - reactive = None + # Backward compatibility: older VMs were created with an r- (reactive) or n- + # (non-reactive) infix. We preserve it so .name reconstructs the original server + # name, which is needed for OpenStack lookups and deletions. + legacy_infix = "" separator = suffix[:2] - if separator == "r-": - reactive = True - suffix = suffix[2:] - elif separator == "n-": - reactive = False + if separator in ("r-", "n-"): + legacy_infix = separator suffix = suffix[2:] return cls( prefix=prefix, - reactive=reactive, suffix=suffix, + _legacy_infix=legacy_infix, ) @classmethod - def build(cls, prefix: str, reactive: bool = False) -> "InstanceID": + def build(cls, prefix: str) -> "InstanceID": r"""Generate an InstanceID for a runner. It should be valid for all the CloudProviders and GitHub. @@ -103,7 +94,6 @@ def build(cls, prefix: str, reactive: bool = False) -> "InstanceID": Args: prefix: Prefix for the InstanceID. - reactive: If the instance ID to generate is a reactive runner. Returns: Instance ID of the runner. @@ -112,7 +102,7 @@ def build(cls, prefix: str, reactive: bool = False) -> "InstanceID": InstanceIDInvalidError: If the instance name is not valid (too long). """ suffix = secrets.token_hex(INSTANCE_SUFFIX_LENGTH // 2) - instance_id = cls(prefix=prefix, reactive=reactive, suffix=suffix) + instance_id = cls(prefix=prefix, suffix=suffix) # By default, for OpenStack with MySQL, the limit is 64 characters. if len(instance_id.name) > 64: raise InstanceIDInvalidError( diff --git a/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py b/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py index 9cedc0a423..21ccaa37d4 100644 --- a/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py +++ b/github-runner-manager/src/github_runner_manager/manager/pressure_reconciler.py @@ -20,7 +20,7 @@ from typing import Optional from github_runner_manager.configuration import ApplicationConfiguration -from github_runner_manager.configuration.base import NonReactiveCombination, UserInfo +from github_runner_manager.configuration.base import RunnerCombination, UserInfo from github_runner_manager.errors import IssueMetricEventError, MissingServerConfigError from github_runner_manager.manager.runner_manager import ( RunnerInstance, @@ -420,7 +420,7 @@ def build_pressure_reconciler( Returns: A fully constructed PressureReconciler. """ - first = config.non_reactive_configuration.combinations[0] + first = config.runner_configuration.combinations[0] planner_client: PlannerClient | None = None has_url = bool(config.planner_url) has_token = bool(config.planner_token) @@ -447,7 +447,7 @@ def build_pressure_reconciler( def build_runner_manager( - config: ApplicationConfiguration, combination: NonReactiveCombination + config: ApplicationConfiguration, combination: RunnerCombination ) -> RunnerManager: """Build a RunnerManager from application config and a flavor/image combination. diff --git a/github-runner-manager/src/github_runner_manager/manager/runner_manager.py b/github-runner-manager/src/github_runner_manager/manager/runner_manager.py index cc35f09ba9..3565f6331c 100644 --- a/github-runner-manager/src/github_runner_manager/manager/runner_manager.py +++ b/github-runner-manager/src/github_runner_manager/manager/runner_manager.py @@ -1,8 +1,6 @@ # Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. -# RunnerInfo is duplicated in runner_scaler (legacy), will be removed in follow-up PR. -# pylint: disable=duplicate-code """Module for managing the GitHub self-hosted runners hosted on cloud instances.""" import copy @@ -38,10 +36,9 @@ # the time waiting before each health check against the platform provider. RUNNER_CREATION_WAITING_TIMES = (60, 60, 120, 240, 480) -# For the reconcile loop, specially for reactive runner (as it is outside of this loop), -# we do not want to delete runners that are offline and not busy in the platform and -# that are not very old in the cloud, as they could be just starting. The creation time will -# be equal to all the possible wait times in creation plus an extra amount. +# Do not delete runners that are offline and not busy in the platform and not very old in +# the cloud, as they could still be starting. The creation time equals all possible wait +# times in creation plus an extra buffer. RUNNER_MAXIMUM_CREATION_TIME = CREATE_SERVER_TIMEOUT + sum(RUNNER_CREATION_WAITING_TIMES) + 120 IssuedMetricEventsStats = dict[Type[metric_events.Event], int] @@ -170,15 +167,12 @@ def __init__( self._platform: PlatformProvider = platform_provider self._labels = labels - def create_runners( - self, num: int, metadata: RunnerMetadata, reactive: bool = False - ) -> tuple[InstanceID, ...]: + def create_runners(self, num: int, metadata: RunnerMetadata) -> tuple[InstanceID, ...]: """Create runners. Args: num: Number of runners to create. metadata: Metadata information for the runner. - reactive: If the runner is reactive. Returns: List of instance ID of the runners. @@ -197,7 +191,6 @@ def create_runners( # assign for example the id of the runner if it was not provided. metadata=copy.copy(metadata), labels=labels, - reactive=reactive, ) for _ in range(num) ] @@ -209,9 +202,7 @@ def _spawn_runners( ) -> tuple[InstanceID, ...]: """Spawn runners in parallel using multiprocessing. - Multiprocessing is only used if there are more than one runner to spawn. Otherwise, - the runner is created in the current process, which is required for reactive, - where we don't assume to spawn another process inside the reactive process. + Multiprocessing is only used if there are more than one runner to spawn. The length of the create_runner_args is number _create_runner invocation, and therefore the number of runner spawned. @@ -599,14 +590,12 @@ class _CreateRunnerArgs: platform_provider: To manage self-hosted runner on the Platform side. metadata: Metadata for the runner to create. labels: List of labels to add to the runners. - reactive: If the runner is reactive. """ cloud_runner_manager: CloudRunnerManager platform_provider: PlatformProvider metadata: RunnerMetadata labels: list[str] - reactive: bool @staticmethod def _create_runner(args: _CreateRunnerArgs) -> InstanceID: @@ -623,7 +612,7 @@ def _create_runner(args: _CreateRunnerArgs) -> InstanceID: Raises: RunnerError: On error creating OpenStack runner. """ - instance_id = InstanceID.build(args.cloud_runner_manager.name_prefix, args.reactive) + instance_id = InstanceID.build(args.cloud_runner_manager.name_prefix) runner_context, runner_info = args.platform_provider.get_runner_context( instance_id=instance_id, metadata=args.metadata, labels=args.labels ) diff --git a/github-runner-manager/src/github_runner_manager/manager/runner_scaler.py b/github-runner-manager/src/github_runner_manager/manager/runner_scaler.py deleted file mode 100644 index d542b271be..0000000000 --- a/github-runner-manager/src/github_runner_manager/manager/runner_scaler.py +++ /dev/null @@ -1,462 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Module for scaling the runners amount.""" - -import logging -import time -from dataclasses import dataclass - -import github_runner_manager.reactive.runner_manager as reactive_runner_manager -from github_runner_manager.configuration import ApplicationConfiguration, UserInfo -from github_runner_manager.constants import GITHUB_SELF_HOSTED_ARCH_LABELS -from github_runner_manager.errors import ( - CloudError, - IssueMetricEventError, - MissingServerConfigError, - ReconcileError, -) -from github_runner_manager.manager.runner_manager import ( - FlushMode, - IssuedMetricEventsStats, - RunnerInstance, - RunnerManager, - RunnerMetadata, -) -from github_runner_manager.manager.vm_manager import HealthState -from github_runner_manager.metrics import events as metric_events -from github_runner_manager.metrics.reconcile import ( - BUSY_RUNNERS_COUNT, - EXPECTED_RUNNERS_COUNT, - IDLE_RUNNERS_COUNT, - RECONCILE_DURATION_SECONDS, -) -from github_runner_manager.openstack_cloud.models import OpenStackServerConfig -from github_runner_manager.openstack_cloud.openstack_runner_manager import ( - OpenStackRunnerManager, - OpenStackRunnerManagerConfig, -) -from github_runner_manager.platform.factory import platform_factory -from github_runner_manager.platform.platform_provider import Platform, PlatformRunnerState -from github_runner_manager.reactive.types_ import ReactiveProcessConfig - -logger = logging.getLogger(__name__) - - -@dataclass -class RunnerInfo: - """Information on the runners. - - Attributes: - online: The number of runner in online state. - busy: The number of the runner in busy state. - offline: The number of runner in offline state. - unknown: The number of runner in unknown state. - runners: The names of the online runners. - busy_runners: The names of the busy runners. - """ - - online: int - busy: int - offline: int - unknown: int - runners: tuple[str, ...] - busy_runners: tuple[str, ...] - - -@dataclass -class _ReconcileResult: - """The result of the reconcile. - - Attributes: - runner_diff: The number of runners created/removed. - metric_stats: The metric stats. - """ - - runner_diff: int - metric_stats: IssuedMetricEventsStats - - -@dataclass -class _ReconcileMetricData: - """Data used to issue the reconciliation metric. - - Attributes: - start_timestamp: The start timestamp of the reconciliation. - end_timestamp: The end timestamp of the reconciliation. - metric_stats: The metric stats issued by the runner manager. - runner_list: The list of runners in the cloud. - flavor: The name of the flavor in the reconciliation event. - expected_runner_quantity: The expected number of runners. - May be None if reactive mode is enabled. - """ - - start_timestamp: float - end_timestamp: float - metric_stats: IssuedMetricEventsStats - runner_list: tuple[RunnerInstance, ...] - flavor: str - expected_runner_quantity: int - - -class RunnerScaler: - """Manage the reconcile of runners.""" - - # Disable too many locals due to this function is collecting and processing configurations. - @classmethod - def build( # pylint: disable-msg=too-many-locals - cls, - application_configuration: ApplicationConfiguration, - user: UserInfo, - python_path: str | None = None, - ) -> "RunnerScaler": - """Create a RunnerScaler from application and OpenStack configuration. - - Args: - application_configuration: Main configuration for the application. - user: The user to run reactive process. - python_path: The PYTHONPATH to access the github-runner-manager library. - - Returns: - A new RunnerScaler. - """ - labels = list(application_configuration.extra_labels) - server_config = None - base_quantity = 0 - if combinations := application_configuration.non_reactive_configuration.combinations: - combination = combinations[0] - labels += combination.image.labels - labels += combination.flavor.labels - server_config = OpenStackServerConfig( - image=combination.image.name, - # Pending to add support for more flavor label combinations - flavor=combination.flavor.name, - network=application_configuration.openstack_configuration.network, - ) - base_quantity = combination.base_virtual_machines - - openstack_runner_manager_config = OpenStackRunnerManagerConfig( - allow_external_contributor=application_configuration.allow_external_contributor, - prefix=application_configuration.openstack_configuration.vm_prefix, - credentials=application_configuration.openstack_configuration.credentials, - server_config=server_config, - service_config=application_configuration.service_config, - ) - platform_provider = platform_factory( - vm_prefix=application_configuration.openstack_configuration.vm_prefix, - github_config=application_configuration.github_config, - ) - - runner_manager = RunnerManager( - manager_name=application_configuration.name, - platform_provider=platform_provider, - cloud_runner_manager=OpenStackRunnerManager( - config=openstack_runner_manager_config, - user=user, - ), - labels=labels, - ) - - max_quantity = 0 - reactive_runner_config = None - if reactive_config := application_configuration.reactive_configuration: - # The charm is not able to determine which architecture the runner is running on, - # so we add all architectures to the supported labels. - supported_labels = set(labels) | GITHUB_SELF_HOSTED_ARCH_LABELS - reactive_runner_config = ReactiveProcessConfig( - queue=reactive_config.queue, - manager_name=application_configuration.name, - github_configuration=application_configuration.github_config, - cloud_runner_manager=openstack_runner_manager_config, - supported_labels=supported_labels, - labels=labels, - ) - max_quantity = reactive_config.max_total_virtual_machines - return cls( - runner_manager=runner_manager, - reactive_process_config=reactive_runner_config, - user=user, - base_quantity=base_quantity, - max_quantity=max_quantity, - platform_name=Platform.GITHUB, - python_path=python_path, - ) - - # The `user` argument will be removed once the charm no longer uses the github-runner-manager - # as a library. The `user` is currently an argument as github-runner-manager as a library needs - # it to be set to a hardcoded value, while as an application the value would be the current - # user. - # Disable the too many arguments for now as `user` will be removed later on. - def __init__( # pylint: disable=too-many-arguments, too-many-positional-arguments - self, - runner_manager: RunnerManager, - reactive_process_config: ReactiveProcessConfig | None, - user: UserInfo, - base_quantity: int, - max_quantity: int, - python_path: str | None = None, - platform_name: Platform = Platform.GITHUB, - ): - """Construct the object. - - Args: - runner_manager: The RunnerManager to perform runner reconcile. - reactive_process_config: Reactive runner configuration. - user: The user to run the reactive process. - base_quantity: The number of intended non-reactive runners. - max_quantity: The number of maximum runners for reactive. - platform_name: The name of the platform used for spawning runners. - python_path: The PYTHONPATH to access the github-runner-manager library. - """ - self._manager = runner_manager - self._reactive_config = reactive_process_config - self._user = user - self._base_quantity = base_quantity - self._max_quantity = max_quantity - self._platform_name = platform_name - self._python_path = python_path - - EXPECTED_RUNNERS_COUNT.labels(self._manager.manager_name).set(self._base_quantity) - - def get_runner_info(self) -> RunnerInfo: - """Get information on the runners. - - Returns: - The information on the runners. - """ - runner_list = self._manager.get_runners() - online = 0 - busy = 0 - offline = 0 - unknown = 0 - online_runners = [] - busy_runners = [] - for runner in runner_list: - match runner.platform_state: - case PlatformRunnerState.BUSY: - online += 1 - online_runners.append(runner.name) - busy += 1 - busy_runners.append(runner.name) - case PlatformRunnerState.IDLE: - online += 1 - online_runners.append(runner.name) - case PlatformRunnerState.OFFLINE: - offline += 1 - case _: - unknown += 1 - return RunnerInfo( - online=online, - busy=busy, - offline=offline, - unknown=unknown, - runners=tuple(online_runners), - busy_runners=tuple(busy_runners), - ) - - def flush(self, flush_mode: FlushMode = FlushMode.FLUSH_IDLE) -> int: - """Flush the runners. - - Args: - flush_mode: Determines the types of runner to be flushed. - - Returns: - Number of runners flushed. - """ - metric_stats = self._manager.cleanup() - if self._reactive_config is not None: - reactive_runner_manager.flush_reactive_processes() - delete_metric_stats = self._manager.flush_runners(flush_mode=flush_mode) - events = set(delete_metric_stats.keys()) | set(metric_stats.keys()) - metric_stats = { - event_name: delete_metric_stats.get(event_name, 0) + metric_stats.get(event_name, 0) - for event_name in events - } - return metric_stats.get(metric_events.RunnerStop, 0) - - def reconcile(self) -> int: - """Reconcile the quantity of runners. - - Returns: - The Change in number of runners or reactive processes. - - Raises: - ReconcileError: If an expected error occurred during the reconciliation. - """ - logger.info( - "Start reconcile. base_quantity %s. max_quantity: %s.", - self._base_quantity, - self._max_quantity, - ) - - metric_stats = {} - start_timestamp = time.time() - - expected_runner_quantity = self._base_quantity - - try: - if self._reactive_config is not None: - logger.info("Reactive configuration detected, spawning runners in reactive mode.") - reconcile_result = reactive_runner_manager.reconcile( - expected_quantity=self._max_quantity, - runner_manager=self._manager, - reactive_process_config=self._reactive_config, - user=self._user, - python_path=self._python_path, - ) - reconcile_diff = reconcile_result.processes_diff - metric_stats = reconcile_result.metric_stats - else: - reconcile_result = self._reconcile_non_reactive(self._base_quantity) - reconcile_diff = reconcile_result.runner_diff - metric_stats = reconcile_result.metric_stats - except CloudError as exc: - logger.error("Failed to reconcile runners.") - raise ReconcileError("Failed to reconcile runners.") from exc - finally: - runner_list = self._manager.get_runners() - self._log_runners(runner_list) - end_timestamp = time.time() - reconcile_metric_data = _ReconcileMetricData( - start_timestamp=start_timestamp, - end_timestamp=end_timestamp, - metric_stats=metric_stats, - runner_list=runner_list, - flavor=self._manager.manager_name, - expected_runner_quantity=expected_runner_quantity, - ) - RECONCILE_DURATION_SECONDS.labels(self._manager.manager_name).observe( - end_timestamp - start_timestamp - ) - _issue_reconciliation_metric(reconcile_metric_data, self._manager.manager_name) - - logger.info("Finished reconciliation.") - - return reconcile_diff - - def _reconcile_non_reactive(self, expected_quantity: int) -> _ReconcileResult: - """Reconcile the quantity of runners in non-reactive mode. - - Args: - expected_quantity: The number of intended runners. - - Returns: - The reconcile result. - """ - delete_metric_stats = None - metric_stats = self._manager.cleanup() - runners = self._manager.get_runners() - logger.info("Reconcile runners from %s to %s", len(runners), expected_quantity) - runner_diff = expected_quantity - len(runners) - if runner_diff > 0: - try: - self._manager.create_runners( - num=runner_diff, metadata=RunnerMetadata(platform_name=self._platform_name) - ) - except MissingServerConfigError: - logging.exception( - "Unable to spawn runner due to missing server configuration, " - "such as, image." - ) - elif runner_diff < 0: - delete_metric_stats = self._manager.delete_runners(-runner_diff) - else: - logger.info("No changes to the number of runners.") - # Merge the two metric stats. - if delete_metric_stats is not None: - metric_stats = { - event_name: delete_metric_stats.get(event_name, 0) - + metric_stats.get(event_name, 0) - for event_name in set(delete_metric_stats) | set(metric_stats) - } - return _ReconcileResult(runner_diff=runner_diff, metric_stats=metric_stats) - - @staticmethod - def _log_runners(runner_list: tuple[RunnerInstance, ...]) -> None: - """Log information about the runners found. - - Args: - runner_list: The list of runners. - """ - for runner in runner_list: - logger.info( - "Runner %s: state=%s, health=%s", - runner.name, - runner.platform_state, - runner.health, - ) - busy_runners = [ - runner for runner in runner_list if runner.platform_state == PlatformRunnerState.BUSY - ] - idle_runners = [ - runner for runner in runner_list if runner.platform_state == PlatformRunnerState.IDLE - ] - offline_healthy_runners = [ - runner - for runner in runner_list - if runner.platform_state == PlatformRunnerState.OFFLINE - and runner.health == HealthState.HEALTHY - ] - unhealthy_states = {HealthState.UNHEALTHY, HealthState.UNKNOWN} - unhealthy_runners = [runner for runner in runner_list if runner.health in unhealthy_states] - logger.info("Found %s busy runners: %s", len(busy_runners), busy_runners) - logger.info("Found %s idle runners: %s", len(idle_runners), idle_runners) - logger.info( - "Found %s offline runners that are healthy: %s", - len(offline_healthy_runners), - offline_healthy_runners, - ) - logger.info("Found %s unhealthy runners: %s", len(unhealthy_runners), unhealthy_runners) - - -def _issue_reconciliation_metric( - reconcile_metric_data: _ReconcileMetricData, manager_name: str -) -> None: - """Issue the reconciliation metric. - - Args: - reconcile_metric_data: The data used to issue the reconciliation metric. - manager_name: The name of the manager. - """ - idle_runners = { - runner.name - for runner in reconcile_metric_data.runner_list - if runner.platform_state == PlatformRunnerState.IDLE - } - - offline_healthy_runners = { - runner.name - for runner in reconcile_metric_data.runner_list - if runner.platform_state == PlatformRunnerState.OFFLINE - and runner.health == HealthState.HEALTHY - } - available_runners = idle_runners | offline_healthy_runners - active_runners = { - runner.name - for runner in reconcile_metric_data.runner_list - if runner.platform_state == PlatformRunnerState.BUSY - } - logger.info("Current available runners (idle + healthy offline): %s", available_runners) - logger.info("Current active runners: %s", active_runners) - - BUSY_RUNNERS_COUNT.labels(manager_name).set(len(active_runners)) - IDLE_RUNNERS_COUNT.labels(manager_name).set(len(idle_runners)) - - try: - metric_events.issue_event( - metric_events.Reconciliation( - timestamp=time.time(), - flavor=reconcile_metric_data.flavor, - crashed_runners=reconcile_metric_data.metric_stats.get( - metric_events.RunnerStart, 0 - ) - - reconcile_metric_data.metric_stats.get(metric_events.RunnerStop, 0), - idle_runners=len(available_runners), - active_runners=len(active_runners), - expected_runners=reconcile_metric_data.expected_runner_quantity, - duration=reconcile_metric_data.end_timestamp - - reconcile_metric_data.start_timestamp, - ) - ) - except IssueMetricEventError: - logger.exception("Failed to issue Reconciliation metric") diff --git a/github-runner-manager/src/github_runner_manager/metrics/events.py b/github-runner-manager/src/github_runner_manager/metrics/events.py index 48b295b616..26976bf974 100644 --- a/github-runner-manager/src/github_runner_manager/metrics/events.py +++ b/github-runner-manager/src/github_runner_manager/metrics/events.py @@ -131,8 +131,7 @@ class Reconciliation(Event): crashed_runners: The number of crashed runners. idle_runners: The number of idle runners. active_runners: The number of active runners. - expected_runners: The expected number of runners. This is optional as it is not suitable - for reactive runners. + expected_runners: The expected number of runners. duration: The duration of the reconciliation in seconds. """ @@ -140,7 +139,7 @@ class Reconciliation(Event): crashed_runners: int idle_runners: int active_runners: int - expected_runners: int | None + expected_runners: int duration: NonNegativeFloat diff --git a/github-runner-manager/src/github_runner_manager/reactive/__init__.py b/github-runner-manager/src/github_runner_manager/reactive/__init__.py deleted file mode 100644 index 512c0261c6..0000000000 --- a/github-runner-manager/src/github_runner_manager/reactive/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Package for code implementing reactive scheduling.""" diff --git a/github-runner-manager/src/github_runner_manager/reactive/consumer.py b/github-runner-manager/src/github_runner_manager/reactive/consumer.py deleted file mode 100644 index cd74b9d544..0000000000 --- a/github-runner-manager/src/github_runner_manager/reactive/consumer.py +++ /dev/null @@ -1,363 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Module responsible for consuming jobs from the message queue.""" - -import contextlib -import logging -import signal -import sys -from contextlib import closing -from time import sleep -from types import FrameType -from typing import Generator, cast -from urllib.parse import urlparse - -from kombu import Connection, Message -from kombu.exceptions import KombuError -from kombu.simple import SimpleQueue -from pydantic import BaseModel, HttpUrl, ValidationError, validator - -from github_runner_manager.manager.models import RunnerMetadata -from github_runner_manager.manager.runner_manager import RunnerManager -from github_runner_manager.platform.platform_provider import JobNotFoundError, PlatformProvider -from github_runner_manager.reactive.types_ import QueueConfig - -logger = logging.getLogger(__name__) - -Labels = set[str] - -PROCESS_COUNT_HEADER_NAME = "X-Process-Count" -WAIT_TIME_IN_SEC = 60 -RETRY_LIMIT = 5 -# Exponential backoff configuration for message retries -BACKOFF_BASE_SECONDS = 60 -BACKOFF_MAX_SECONDS = 1800 -# This control message is for testing. The reactive process will stop consuming messages -# when the message is sent. This message does not come from the router. -END_PROCESSING_PAYLOAD = "__END__" - - -class JobDetails(BaseModel): - """A class to translate the payload. - - Attributes: - labels: The labels of the job. - url: The URL of the job to check its status. - """ - - labels: Labels - url: HttpUrl - - @validator("url") - @classmethod - def check_job_url_path_is_not_empty(cls, v: HttpUrl) -> HttpUrl: - """Check that the job_url path is not empty. - - Args: - v: The job_url to check. - - Returns: - The job_url if it is valid. - - Raises: - ValueError: If the job_url path is empty. - """ - if not v.path: - raise ValueError("path must be provided") - return v - - -class JobError(Exception): - """Raised when a job error occurs.""" - - -class QueueError(Exception): - """Raised when an error when communicating with the queue occurs.""" - - -def get_queue_size(queue_config: QueueConfig) -> int: - """Get the size of the message queue. - - Args: - queue_config: The configuration for the message queue. - - Returns: - The size of the queue. - - Raises: - QueueError: If an error when communicating with the queue occurs. - """ - try: - with Connection(queue_config.mongodb_uri) as conn: - with closing(SimpleQueue(conn, queue_config.queue_name)) as simple_queue: - return simple_queue.qsize() - except KombuError as exc: - raise QueueError("Error when communicating with the queue") from exc - - -def _calculate_backoff_time(retry_count: int) -> int: - """Calculate exponential backoff time for retries. - - Args: - retry_count: The current retry count (starting from 1). - - Returns: - The backoff time in seconds, capped at BACKOFF_MAX_SECONDS. - """ - backoff_time = BACKOFF_BASE_SECONDS * (2 ** (retry_count - 1)) - return min(backoff_time, BACKOFF_MAX_SECONDS) - - -# Ignore `consume` too complex as it is pending re-design. -def consume( # noqa: C901 - queue_config: QueueConfig, - runner_manager: RunnerManager, - platform_provider: PlatformProvider, - supported_labels: Labels, -) -> None: - """Consume a job from the message queue. - - Log the job details and acknowledge the message. - If the job details are invalid, reject the message and raise an error. - - Args: - queue_config: The configuration for the message queue. - runner_manager: The runner manager used to create the runner. - platform_provider: Platform provider. - supported_labels: The supported labels for the runner. If the job has unsupported labels, - the message is requeued. - - Raises: - QueueError: If an error when communicating with the queue occurs. - """ - try: - with ( - Connection(queue_config.mongodb_uri) as conn, - closing(SimpleQueue(conn, queue_config.queue_name)) as simple_queue, - signal_handler(signal.SIGTERM), - ): - # Get messages until we can spawn a runner. - while True: - msg = simple_queue.get(block=True) - # Payload to stop the processing - if msg.payload == END_PROCESSING_PAYLOAD: - msg.ack() - break - - msg.headers[PROCESS_COUNT_HEADER_NAME] = ( - msg.headers.get(PROCESS_COUNT_HEADER_NAME, 0) + 1 - ) - msg_process_count = msg.headers[PROCESS_COUNT_HEADER_NAME] - - job_details = _parse_job_details(msg) - logger.info("Received reactive job: %s", job_details) - - if msg_process_count > RETRY_LIMIT: - logger.warning( - "Retry limit reach for job %s with labels: %s", - job_details.url, - job_details.labels, - ) - msg.reject(requeue=False) - continue - - if msg_process_count > 1: - backoff_time = _calculate_backoff_time(msg_process_count) - logger.info( - "Pause job %s with retry count %s for %s seconds (exponential backoff)", - job_details.url, - msg_process_count, - backoff_time, - ) - sleep(backoff_time) - - if not _validate_labels( - labels=job_details.labels, supported_labels=supported_labels - ): - logger.error( - "Found unsupported job labels in %s. " - "Will not spawn a runner and reject the message.", - job_details.labels, - ) - # We currently do not expect this to happen, but we should handle it. - # We do not want to requeue the message as it will be rejected again. - # This may change in the future when messages for multiple - # flavours are sent to the same queue. - msg.reject(requeue=False) - continue - try: - metadata = _build_runner_metadata(job_details.url) - except ValueError: - msg.reject(requeue=False) - break - try: - if platform_provider.check_job_been_picked_up( - metadata=metadata, job_url=job_details.url - ): - logger.info("reactive job: %s already picked up.", job_details) - msg.ack() - continue - except JobNotFoundError: - logger.warning( - "Unable to find the job %s. Not retrying this job.", job_details.url - ) - msg.reject(requeue=False) - _spawn_runner( - runner_manager=runner_manager, - job_url=job_details.url, - msg=msg, - platform_provider=platform_provider, - metadata=metadata, - ) - break - except KombuError as exc: - raise QueueError("Error when communicating with the queue") from exc - - -def _build_runner_metadata(job_url: str) -> RunnerMetadata: - """Build runner metadata from the job url. - - Args: - job_url: The job URL. - - Returns: - RunnerMetadata for GitHub. - - Raises: - ValueError: If the URL is not a GitHub URL. - """ - parsed_url = urlparse(job_url) - # We only support github.com URLs now - if "github.com" in parsed_url.netloc: - return RunnerMetadata() - - logger.error("Invalid URL for a job. Only GitHub URLs are supported. url: %s", job_url) - raise ValueError(f"Invalid job url {job_url}. Only GitHub URLs are supported.") - - -def _parse_job_details(msg: Message) -> JobDetails: - """Parse JobDetails from a message.""" - try: - job_details = cast(JobDetails, JobDetails.parse_raw(msg.payload)) - except ValidationError as exc: - logger.error("Found invalid job details, will reject the message.") - msg.reject(requeue=False) - raise JobError(f"Invalid job details: {msg.payload}") from exc - logger.info( - "Received job with labels %s and job_url %s", - job_details.labels, - job_details.url, - ) - return job_details - - -def _validate_labels(labels: Labels, supported_labels: Labels) -> bool: - """Validate the labels of the job. - - Args: - labels: The labels of the job. - supported_labels: The supported labels for the runner. - - Returns: - True if the labels are valid, False otherwise. - """ - return {label.lower() for label in labels} <= {label.lower() for label in supported_labels} - - -def _spawn_runner( - runner_manager: RunnerManager, - job_url: HttpUrl, - msg: Message, - platform_provider: PlatformProvider, - metadata: RunnerMetadata, -) -> None: - """Spawn a runner. - - A runner is only spawned if the job has not yet been picked up by a runner. - After spawning a runner, it is checked if the job has been picked up. - - If the job has been picked up, the message is acknowledged. - If the job has not been picked up after 5 minutes, the message is rejected and requeued. - - Args: - runner_manager: The runner manager to use. - job_url: The URL of the job. - msg: The message to acknowledge or reject. - platform_provider: Platform provider. - metadata: RunnerMetadata for the runner to spawn.. - """ - logger.info("Spawning new reactive runner for job %s", job_url) - instance_ids = runner_manager.create_runners(1, metadata=metadata, reactive=True) - if not instance_ids: - logger.error("Failed to spawn a runner for job %s. Will reject the message.", job_url) - msg.reject(requeue=True) - return - logger.info("Reactive runner spawned %s", instance_ids) - - for attempt in range(5): - sleep(WAIT_TIME_IN_SEC) - logger.info("Checking if job picked up for reactive runner %s", instance_ids) - try: - if platform_provider.check_job_been_picked_up(metadata=metadata, job_url=job_url): - logger.info("Job picked %s. reactive runner ok %s", job_url, instance_ids) - msg.ack() - break - except JobNotFoundError: - logger.warning("Job not found after spawning runner. Retry (%s)/5", attempt) - else: - logger.info( - "Job %s not picked by reactive runner %s. Probably picked up by another job", - job_url, - instance_ids, - ) - msg.reject(requeue=True) - - -@contextlib.contextmanager -def signal_handler(signal_code: signal.Signals) -> Generator[None, None, None]: - """Set a signal handler and after the context, restore the default handler. - - The signal handler exits the process. - - Args: - signal_code: The signal code to handle. - """ - _set_signal_handler(signal_code) - try: - yield - finally: - _restore_signal_handler(signal_code) - - -def _set_signal_handler(signal_code: signal.Signals) -> None: - """Set a signal handler which exits the process. - - Args: - signal_code: The signal code to handle. - """ - - def sigterm_handler(signal_code: int, _: FrameType | None) -> None: - """Handle a signal. - - Call sys.exit with the signal code. Kombu should automatically - requeue unacknowledged messages. - - Args: - signal_code: The signal code to handle. - """ - print( - f"Signal '{signal.strsignal(signal_code)}' received. Will terminate.", file=sys.stderr - ) - sys.exit(signal_code) - - signal.signal(signal_code, sigterm_handler) - - -def _restore_signal_handler(signal_code: signal.Signals) -> None: - """Restore the default signal handler. - - Args: - signal_code: The signal code to restore. - """ - signal.signal(signal_code, signal.SIG_DFL) diff --git a/github-runner-manager/src/github_runner_manager/reactive/process_manager.py b/github-runner-manager/src/github_runner_manager/reactive/process_manager.py deleted file mode 100644 index 821d7f3e3b..0000000000 --- a/github-runner-manager/src/github_runner_manager/reactive/process_manager.py +++ /dev/null @@ -1,191 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Module for managing processes which spawn runners reactively.""" - -import logging -import os -import shutil -import signal - -# All commands run by subprocess are secure. -import subprocess # nosec -from pathlib import Path - -from github_runner_manager import constants -from github_runner_manager.configuration import UserInfo -from github_runner_manager.reactive.types_ import ReactiveProcessConfig -from github_runner_manager.utilities import secure_run_subprocess - -logger = logging.getLogger(__name__) - -REACTIVE_RUNNER_LOG_DIR = Path("/var/log/reactive_runner") - -PYTHON_BIN = "/usr/bin/python3" -REACTIVE_RUNNER_SCRIPT_MODULE = "github_runner_manager.reactive.runner" -REACTIVE_RUNNER_CMD_LINE_PREFIX = f"{PYTHON_BIN} -m {REACTIVE_RUNNER_SCRIPT_MODULE}" -PID_CMD_COLUMN_WIDTH = len(REACTIVE_RUNNER_CMD_LINE_PREFIX) -PIDS_COMMAND_LINE = [ - "ps", - "axo", - f"cmd:{PID_CMD_COLUMN_WIDTH},pid", - "--no-headers", - "--sort=-start_time", -] -UBUNTU_USER = "ubuntu" -RUNNER_CONFIG_ENV_VAR = "RUNNER_CONFIG" - - -class ReactiveRunnerError(Exception): - """Raised when a reactive runner error occurs.""" - - -def reconcile( - quantity: int, - reactive_process_config: ReactiveProcessConfig, - user: UserInfo, - python_path: str | None = None, -) -> int: - """Reconcile the number of reactive runner processes. - - Args: - quantity: The number of processes to spawn. - reactive_process_config: The reactive runner configuration. - user: The user to run the reactive process. - python_path: The PYTHONPATH to access the github-runner-manager library. - - Raises a ReactiveRunnerError if the runner fails to spawn. - - Returns: - The number of reactive runner processes spawned/killed. - """ - pids = _get_pids() - current_quantity = len(pids) - logger.info( - "Reactive runner processes: current quantity %s, expected quantity %s", - current_quantity, - quantity, - ) - delta = quantity - current_quantity - if delta > 0: - logger.info("Will spawn %d new reactive runner process(es)", delta) - _setup_logging_for_processes(user.user, user.group) - for _ in range(delta): - _spawn_runner(reactive_process_config, python_path) - elif delta < 0: - logger.info("Will kill %d process(es).", -delta) - for pid in pids[:-delta]: - logger.info("Killing reactive runner process with pid %s", pid) - try: - os.kill(pid, signal.SIGTERM) - except ProcessLookupError: - # There can be a race condition that the process has already terminated. - # We just ignore and log the fact. - logger.info( - "Failed to kill process with pid %s. Process might have terminated it self.", - pid, - ) - else: - logger.info("No changes to number of reactive runner processes needed.") - - return delta - - -def kill_reactive_processes() -> None: - """Kill all reactive processes.""" - pids = _get_pids() - if pids: - for pid in pids: - try: - logger.info("Killing reactive runner process with pid %s", pid) - os.kill(pid, signal.SIGTERM) - except ProcessLookupError: - logger.info( - "Failed to kill process with pid %s. Process might have terminated it self.", - pid, - ) - else: - logger.info("No reactive processes to flush") - - -def _get_pids() -> list[int]: - """Get the PIDs of the reactive runners processes. - - Returns: - The PIDs of the reactive runner processes sorted by start time in descending order. - - Raises: - ReactiveRunnerError: If the command to get the PIDs fails - """ - result = secure_run_subprocess(cmd=PIDS_COMMAND_LINE) - if result.returncode != 0: - raise ReactiveRunnerError("Failed to get list of processes") - - # stdout will look like - # - # ps axo cmd:57,pid --no-headers --sort=-start_time 2302635 - # -bash 2302498 - # /bin/sh -c /usr/bin/python3 -m github_runner_manager.reac 1757306 - # /usr/bin/python3 -m github_runner_manager.reactive.runner 1757308 - - # we filter for the command line of the reactive runner processes and extract the PID - - return [ - int(line.rstrip().rsplit(maxsplit=1)[-1]) - for line in result.stdout.decode().split("\n") - if line.startswith(REACTIVE_RUNNER_CMD_LINE_PREFIX) - ] - - -def _setup_logging_for_processes(user: str, group: str) -> None: - """Set up the log dir. - - Args: - user: The user for logging. - group: The group owning the logs. - """ - REACTIVE_RUNNER_LOG_DIR.mkdir(exist_ok=True) - shutil.chown( - REACTIVE_RUNNER_LOG_DIR, - user=user, - group=group, - ) - - -def _spawn_runner(reactive_process_config: ReactiveProcessConfig, python_path: str | None) -> None: - """Spawn a runner. - - Args: - reactive_process_config: The runner configuration to pass to the spawned runner process. - python_path: The PYTHONPATH to access the github-runner-manager library. - """ - env = { - RUNNER_CONFIG_ENV_VAR: reactive_process_config.json(), - } - if python_path is not None: - env["PYTHONPATH"] = str(python_path) - # We do not want to wait for the process to finish, so we do not use with statement. - # We trust the command. - command = " ".join( - [ - PYTHON_BIN, - "-m", - REACTIVE_RUNNER_SCRIPT_MODULE, - ">>", - # $$ will be replaced by the PID of the process, so we can track the error log easily. - f"{REACTIVE_RUNNER_LOG_DIR}/$$.log", - "2>&1", - ] - ) - logger.debug("Spawning a new reactive runner process with command: %s", command) - process = subprocess.Popen( # pylint: disable=consider-using-with # nosec - command, - shell=True, - env=env, - stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, - user=constants.RUNNER_MANAGER_USER, - group=constants.RUNNER_MANAGER_GROUP, - ) - - logger.info("Spawned a new reactive runner process with pid %s", process.pid) diff --git a/github-runner-manager/src/github_runner_manager/reactive/runner.py b/github-runner-manager/src/github_runner_manager/reactive/runner.py deleted file mode 100644 index ee60680da2..0000000000 --- a/github-runner-manager/src/github_runner_manager/reactive/runner.py +++ /dev/null @@ -1,74 +0,0 @@ -#!/usr/bin/env python3 -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Script to spawn a reactive runner process.""" - -import getpass -import grp -import logging -import os -import sys - -from github_runner_manager.configuration import UserInfo -from github_runner_manager.manager.runner_manager import RunnerManager -from github_runner_manager.openstack_cloud.openstack_runner_manager import OpenStackRunnerManager -from github_runner_manager.platform.factory import platform_factory -from github_runner_manager.reactive.consumer import consume -from github_runner_manager.reactive.process_manager import RUNNER_CONFIG_ENV_VAR -from github_runner_manager.reactive.types_ import ReactiveProcessConfig - - -def setup_root_logging() -> None: - """Set up logging for the reactive runner process.""" - # setup root logger to log in a file which will be picked up by grafana agent and sent to Loki - logging.basicConfig( - stream=sys.stdout, - level=logging.INFO, - format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", - ) - - -def main() -> None: - """Spawn a process that consumes a message from the queue to create a runner. - - Raises: - ValueError: If the required environment variables are not set - """ - runner_config_str = os.environ.get(RUNNER_CONFIG_ENV_VAR) - - if not runner_config_str: - raise ValueError( - f"Missing {RUNNER_CONFIG_ENV_VAR} environment variable. " - "Please set it to the message queue URI." - ) - - runner_config = ReactiveProcessConfig.parse_raw(runner_config_str) - - setup_root_logging() - queue_config = runner_config.queue - - user = UserInfo(getpass.getuser(), grp.getgrgid(os.getgid()).gr_name) - openstack_runner_manager = OpenStackRunnerManager( - config=runner_config.cloud_runner_manager, user=user - ) - platform_provider = platform_factory( - vm_prefix=runner_config.cloud_runner_manager.prefix, - github_config=runner_config.github_configuration, - ) - runner_manager = RunnerManager( - manager_name=runner_config.manager_name, - platform_provider=platform_provider, - cloud_runner_manager=openstack_runner_manager, - labels=runner_config.labels, - ) - consume( - queue_config=queue_config, - runner_manager=runner_manager, - platform_provider=platform_provider, - supported_labels=runner_config.supported_labels, - ) - - -if __name__ == "__main__": - main() diff --git a/github-runner-manager/src/github_runner_manager/reactive/runner_manager.py b/github-runner-manager/src/github_runner_manager/reactive/runner_manager.py deleted file mode 100644 index 2adbe20972..0000000000 --- a/github-runner-manager/src/github_runner_manager/reactive/runner_manager.py +++ /dev/null @@ -1,136 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Module for reconciling amount of runner and reactive runner processes.""" - -import logging -from dataclasses import dataclass - -from github_runner_manager.configuration import UserInfo -from github_runner_manager.manager.runner_manager import ( - FlushMode, - IssuedMetricEventsStats, - RunnerManager, -) -from github_runner_manager.platform.github_provider import PlatformRunnerState -from github_runner_manager.reactive import process_manager -from github_runner_manager.reactive.consumer import get_queue_size -from github_runner_manager.reactive.types_ import ReactiveProcessConfig - -logger = logging.getLogger(__name__) - - -@dataclass -class ReconcileResult: - """The result of the reconciliation. - - Attributes: - processes_diff: The number of reactive processes created/removed. - metric_stats: The stats of the issued metric events - """ - - processes_diff: int - metric_stats: IssuedMetricEventsStats - - -def reconcile( - expected_quantity: int, - runner_manager: RunnerManager, - reactive_process_config: ReactiveProcessConfig, - user: UserInfo, - python_path: str | None = None, -) -> ReconcileResult: - """Reconcile runners reactively. - - The reconciliation attempts to make the following equation true: - quantity_of_current_runners + amount_of_reactive_processes_consuming_jobs - == expected_quantity - - A few examples: - - 1. If there are 5 runners and 5 reactive processes and the quantity is 10, - no action is taken. - 2. If there are 5 runners and 5 reactive processes and the quantity is 15, - 5 reactive processes are created. - 3. If there are 5 runners and 5 reactive processes and quantity is 7, - 3 reactive processes are killed. - 4. If there are 5 runners and 5 reactive processes and quantity is 5, - all reactive processes are killed. - 5. If there are 5 runners and 5 reactive processes and quantity is 4, - 1 runner is killed and all reactive processes are killed. - - - So if the quantity is equal to the sum of the current runners and reactive processes, - no action is taken, - - If the quantity is greater than the sum of the current - runners and reactive processes, additional reactive processes are created. - - If the quantity is greater than or equal to the quantity of the current runners, - but less than the sum of the current runners and reactive processes, - additional reactive processes will be killed. - - If the quantity is less than the sum of the current runners, - additional runners are killed and all reactive processes are killed. - - In addition to this behaviour, reconciliation also checks the queue at the start and - removes all idle runners if the queue is empty, to ensure that - no idle runners are left behind if there are no new jobs. - - Args: - expected_quantity: Number of intended amount of runners + reactive processes. - runner_manager: The runner manager to interact with current running runners. - reactive_process_config: The reactive runner config. - user: The user to run the reactive process. - python_path: The PYTHONPATH to access the github-runner-manager library. - - Returns: - The number of reactive processes created. If negative, its absolute value is equal - to the number of processes killed. - """ - cleanup_metric_stats = runner_manager.cleanup() - flush_metric_stats = {} - delete_metric_stats = {} - - if get_queue_size(reactive_process_config.queue) == 0: - logger.info("Reactive reconcile. Flushing on empty queue") - flush_metric_stats = runner_manager.flush_runners(FlushMode.FLUSH_IDLE) - - # Only count runners which are online on GitHub to prevent machines to be just in - # construction to be counted and then killed immediately by the process manager. - all_runners = runner_manager.get_runners() - runners = [ - runner - for runner in all_runners - if runner.platform_state in (PlatformRunnerState.IDLE, PlatformRunnerState.BUSY) - ] - runner_diff = expected_quantity - len(runners) - - if runner_diff >= 0: - process_quantity = runner_diff - else: - delete_metric_stats = runner_manager.delete_runners(-runner_diff) - process_quantity = 0 - - metric_stats = { - event_name: delete_metric_stats.get(event_name, 0) - + cleanup_metric_stats.get(event_name, 0) - + flush_metric_stats.get(event_name, 0) - for event_name in set(delete_metric_stats) - | set(cleanup_metric_stats) - | set(flush_metric_stats) - } - - processes_created = process_manager.reconcile( - quantity=process_quantity, - reactive_process_config=reactive_process_config, - user=user, - python_path=python_path, - ) - - return ReconcileResult(processes_diff=processes_created, metric_stats=metric_stats) - - -def flush_reactive_processes() -> None: - """Flush all the reactive processes.""" - process_manager.kill_reactive_processes() diff --git a/github-runner-manager/src/github_runner_manager/reactive/types_.py b/github-runner-manager/src/github_runner_manager/reactive/types_.py deleted file mode 100644 index e9f7b9b0f8..0000000000 --- a/github-runner-manager/src/github_runner_manager/reactive/types_.py +++ /dev/null @@ -1,32 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""Module containing reactive scheduling related types.""" - -from pydantic import BaseModel - -from github_runner_manager.configuration.base import QueueConfig -from github_runner_manager.configuration.github import GitHubConfiguration -from github_runner_manager.openstack_cloud.openstack_runner_manager import ( - OpenStackRunnerManagerConfig, -) - - -class ReactiveProcessConfig(BaseModel): - """The configuration for the reactive runner to spawn. - - Attributes: - queue: The queue configuration. - manager_name: Name of the manager. - github_configuration: Configuration for GitHub. - cloud_runner_manager: The OpenStack runner manager configuration. - supported_labels: The supported labels for the runner. - labels: Labels to use for the runners. - """ - - queue: QueueConfig - manager_name: str - github_configuration: GitHubConfiguration | None - cloud_runner_manager: OpenStackRunnerManagerConfig - supported_labels: set[str] - labels: list[str] diff --git a/github-runner-manager/src/github_runner_manager/reconcile_service.py b/github-runner-manager/src/github_runner_manager/reconcile_service.py deleted file mode 100644 index 42cac2f43f..0000000000 --- a/github-runner-manager/src/github_runner_manager/reconcile_service.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -"""The reconcile service for managing the self-hosted runner.""" - -import getpass -import grp -import logging -import os -import uuid -from pathlib import Path -from threading import Lock -from time import sleep - -from github_runner_manager.configuration import ApplicationConfiguration -from github_runner_manager.configuration.base import UserInfo -from github_runner_manager.manager.runner_scaler import RunnerScaler - -logger = logging.getLogger(__name__) - -RECONCILE_ID_FILE = Path("~").expanduser() / "reconcile.id" -RECONCILE_SERVICE_START_MSG = "Starting the reconcile service..." -RECONCILE_START_MSG = "Start reconciliation" -RECONCILE_END_MSG = "End reconciliation" - - -def get_runner_scaler( - app_config: ApplicationConfiguration, python_path: str | None = None -) -> RunnerScaler: - """Get runner scaler. - - Args: - app_config: The configuration of github-runner-manager. - python_path: The PYTHONPATH to access the github-runner-manager library. - - Returns: - The RunnerScaler object. - """ - user = UserInfo(getpass.getuser(), grp.getgrgid(os.getgid()).gr_name) - return RunnerScaler.build(app_config, user, python_path) - - -def start_reconcile_service( - app_config: ApplicationConfiguration, python_path: str | None, lock: Lock -) -> None: - """Start the reconcile server. - - Args: - app_config: The configuration of the application. - python_path: The PYTHONPATH to access the github-runner-manager library. - lock: The lock representing modification access to the managed set of runners. - """ - logger.info(RECONCILE_SERVICE_START_MSG) - - # This is used for in test to distinguish which reconcile run the unit is at. - reconcile_id = uuid.uuid4() - RECONCILE_ID_FILE.write_text(str(reconcile_id), encoding="utf-8") - - while True: - with lock: - logger.info(RECONCILE_START_MSG) - logger.info("Reconcile ID: %s", reconcile_id) - runner_scaler = get_runner_scaler(app_config, python_path=python_path) - delta = runner_scaler.reconcile() - logger.info("Change in number of runner after reconcile: %s", delta) - logger.info(RECONCILE_END_MSG) - reconcile_id = uuid.uuid4() - RECONCILE_ID_FILE.write_text(str(reconcile_id), encoding="utf-8") - - sleep(app_config.reconcile_interval * 60) diff --git a/github-runner-manager/tests/integration/factories.py b/github-runner-manager/tests/integration/factories.py index 2a114103e0..88402f749c 100644 --- a/github-runner-manager/tests/integration/factories.py +++ b/github-runner-manager/tests/integration/factories.py @@ -157,7 +157,7 @@ def create_default_config( planner_url: Planner service URL. Omitted from config when not provided. planner_token: Planner service token. Omitted from config when not provided. reconcile_interval: Minutes between delete-loop reconciliation ticks. - base_virtual_machines: Floor for non-reactive desired runners. + base_virtual_machines: Floor for desired runners. Returns: Configuration dictionary for the application. @@ -233,7 +233,7 @@ def create_default_config( "repo_policy_compliance": None, "custom_pre_job_script": None, }, - "non_reactive_configuration": { + "runner_configuration": { "combinations": [ { "image": { @@ -246,7 +246,6 @@ def create_default_config( } ] }, - "reactive_configuration": None, "openstack_configuration": { "vm_prefix": test_config.vm_prefix, "network": openstack_config.network, diff --git a/github-runner-manager/tests/unit/factories/runner_instance_factory.py b/github-runner-manager/tests/unit/factories/runner_instance_factory.py index dc8f8d9031..eceadb66e2 100644 --- a/github-runner-manager/tests/unit/factories/runner_instance_factory.py +++ b/github-runner-manager/tests/unit/factories/runner_instance_factory.py @@ -32,7 +32,6 @@ class Meta: model = InstanceID prefix = factory.Faker("word") - reactive = factory.Iterator([True, False, None]) suffix = factory.LazyAttribute(lambda _: secrets.token_hex(6)) diff --git a/github-runner-manager/tests/unit/manager/test_models.py b/github-runner-manager/tests/unit/manager/test_models.py index 0a6c4289b4..705cdb5b1f 100644 --- a/github-runner-manager/tests/unit/manager/test_models.py +++ b/github-runner-manager/tests/unit/manager/test_models.py @@ -12,8 +12,8 @@ def test_new_instance_id(): """ arrange: Having an Application prefix. act: Create a new InstanceId. - assert: The new instance fields are correct, that is, same prefix and the - name starts with the prefix and reactive is False. + assert: The new instance fields are correct: same prefix, name starts with prefix, + and no legacy infix is present. """ prefix = "theprefix" @@ -21,25 +21,18 @@ def test_new_instance_id(): assert instance_id.name == str(instance_id) assert instance_id.prefix == prefix - assert not instance_id.reactive assert instance_id.name.startswith(prefix) + assert "-n-" not in instance_id.name -@pytest.mark.parametrize( - "reactive", - [ - pytest.param(True, id="reactive job name"), - pytest.param(False, id="non reactive job name"), - ], -) -def test_build_instance_id_from_name(reactive): +def test_build_instance_id_from_name(): """ arrange: Create a new InstanceID. act: With the name of the previous instance ID and the prefix, create a new one. assert: Both instances should be equal. """ prefix = "theprefix" - instance_id = InstanceID.build(prefix, reactive) + instance_id = InstanceID.build(prefix) name = instance_id.name new_instance_id = InstanceID.build_from_name(prefix, name) @@ -47,7 +40,6 @@ def test_build_instance_id_from_name(reactive): assert new_instance_id == instance_id assert new_instance_id.name == instance_id.name assert new_instance_id.suffix == instance_id.suffix - assert new_instance_id.reactive == reactive def test_build_instance_id_from_name_fails_with_wrong_prefix(): @@ -76,17 +68,49 @@ def test_build_instance_id_fails_when_very_long_name(): _ = InstanceID.build(prefix) -def test_backward_compatible_names(): +def test_backward_compatible_names_without_type_prefix(): """ - arrange: A prefix and a unit name without reactive information. + arrange: A prefix and a name without r-/n- type prefix (old format). act: Create the instance ID from the prefix and name. - assert: New name from the instance is the same as the original name. Also prefix and suffix. + assert: Suffix is parsed correctly and the original name is preserved. """ prefix = "unit-0" name = "unit-0-96950f351751" instance_id = InstanceID.build_from_name(prefix, name) + assert instance_id.prefix == prefix + assert instance_id.suffix == "96950f351751" + assert instance_id.name == "unit-0-96950f351751" + + +def test_backward_compatible_names_with_reactive_prefix(): + """ + arrange: A prefix and a name with legacy r- (reactive) prefix. + act: Create the instance ID from the prefix and name. + assert: The r- prefix is preserved in .name so the VM can be looked up. + """ + prefix = "unit-0" + name = "unit-0-r-96950f351751" + + instance_id = InstanceID.build_from_name(prefix, name) + + assert instance_id.prefix == prefix + assert instance_id.suffix == "96950f351751" + assert instance_id.name == name + + +def test_backward_compatible_names_with_non_reactive_prefix(): + """ + arrange: A prefix and a name with legacy n- (non-reactive) prefix. + act: Create the instance ID from the prefix and name. + assert: The n- prefix is preserved in .name so the VM can be looked up. + """ + prefix = "unit-0" + name = "unit-0-n-96950f351751" + + instance_id = InstanceID.build_from_name(prefix, name) + assert instance_id.prefix == prefix assert instance_id.suffix == "96950f351751" assert instance_id.name == name diff --git a/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py b/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py index e186d39e7c..4ead51b420 100644 --- a/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py +++ b/github-runner-manager/tests/unit/manager/test_pressure_reconciler.py @@ -560,7 +560,7 @@ def test_build_pressure_reconciler_no_planner_config(): combination = MagicMock() combination.base_virtual_machines = 2 combination.max_total_virtual_machines = 10 - mock_config.non_reactive_configuration.combinations = [combination] + mock_config.runner_configuration.combinations = [combination] reconciler = build_pressure_reconciler(mock_config, MagicMock(), Lock()) @@ -594,7 +594,7 @@ def test_build_pressure_reconciler_partial_planner_config_raises( combination = MagicMock() combination.base_virtual_machines = 2 combination.max_total_virtual_machines = 10 - mock_config.non_reactive_configuration.combinations = [combination] + mock_config.runner_configuration.combinations = [combination] with pytest.raises(ValueError, match="[Pp]artial"): build_pressure_reconciler(mock_config, MagicMock(), Lock()) diff --git a/github-runner-manager/tests/unit/manager/test_runner_manager.py b/github-runner-manager/tests/unit/manager/test_runner_manager.py index e138a9959b..b85f3357a6 100644 --- a/github-runner-manager/tests/unit/manager/test_runner_manager.py +++ b/github-runner-manager/tests/unit/manager/test_runner_manager.py @@ -225,7 +225,7 @@ def test_runner_manager_create_runners() -> None: labels=["label1", "label2"], ) - (instance_id,) = runner_manager.create_runners(1, RunnerMetadata(), True) + (instance_id,) = runner_manager.create_runners(1, RunnerMetadata()) assert instance_id cloud_runner_manager.create_runner.assert_called_once() diff --git a/github-runner-manager/tests/unit/metrics/test_runner.py b/github-runner-manager/tests/unit/metrics/test_runner.py index 11d0e3787a..5e6844a3f4 100644 --- a/github-runner-manager/tests/unit/metrics/test_runner.py +++ b/github-runner-manager/tests/unit/metrics/test_runner.py @@ -72,16 +72,12 @@ def test_pull_runner_metrics_errors(caplog: pytest.LogCaptureFixture): get_ssh_connection_side_effects = [] # Setup for instance not exists test_instances.append( - ( - not_exists_instance := InstanceID( - prefix="instance-not-exists", reactive=False, suffix="1" - ) - ) + (not_exists_instance := InstanceID(prefix="instance-not-exists", suffix="1")) ) get_instance_side_effects.append(None) # Setup for instance fail ssh connection test_instances.append( - (fail_ssh_conn_instance := InstanceID(prefix="fail-ssh-conn", reactive=False, suffix="2")) + (fail_ssh_conn_instance := InstanceID(prefix="fail-ssh-conn", suffix="2")) ) fail_ssh_conn_instance_mock = MagicMock() fail_ssh_conn_instance_mock.instance_id = fail_ssh_conn_instance @@ -89,11 +85,7 @@ def test_pull_runner_metrics_errors(caplog: pytest.LogCaptureFixture): get_ssh_connection_side_effects.append(SSHError()) # Setup for instance fail pull file test_instances.append( - ( - fail_pull_file_instance := InstanceID( - prefix="fail-pull-file", reactive=False, suffix="3" - ) - ) + (fail_pull_file_instance := InstanceID(prefix="fail-pull-file", suffix="3")) ) fail_pull_file_instance_mock = MagicMock() fail_pull_file_instance_mock.instance_id = fail_pull_file_instance diff --git a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_cloud.py b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_cloud.py index 0052d2f7ce..553c501fe8 100644 --- a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_cloud.py +++ b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_cloud.py @@ -334,7 +334,7 @@ def test__delete_keypair_fail( assert: None is returned and the failure is logged. """ mock_openstack_conn.delete_keypair = MagicMock(return_value=False) - test_key_instance_id = InstanceID(prefix="test-key-delete", reactive=False, suffix="fail") + test_key_instance_id = InstanceID(prefix="test-key-delete", suffix="fail") assert ( openstack_cloud._delete_keypair( @@ -358,7 +358,7 @@ def test__delete_keypair_error( mock_openstack_conn.delete_keypair = MagicMock( side_effect=[openstack.exceptions.ResourceTimeout()] ) - test_key_instance_id = InstanceID(prefix="test-key-delete", reactive=False, suffix="fail") + test_key_instance_id = InstanceID(prefix="test-key-delete", suffix="fail") assert ( openstack_cloud._delete_keypair( @@ -379,9 +379,9 @@ def test_delete_instances_partial_server_delete_failure( act: when delete_instances method is called. assert: successfully deleted instance IDs are returned and failed instances are logged. """ - successful_delete_id = InstanceID(prefix="success", reactive=False, suffix="") - already_deleted_id = InstanceID(prefix="already_deleted", reactive=False, suffix="") - timeout_id = InstanceID(prefix="timeout error", reactive=False, suffix="") + successful_delete_id = InstanceID(prefix="success", suffix="") + already_deleted_id = InstanceID(prefix="already_deleted", suffix="") + timeout_id = InstanceID(prefix="timeout error", suffix="") mock_cloud = FakeOpenstackCloud( initial_servers=[successful_delete_id, timeout_id], server_to_errors={timeout_id: openstack.exceptions.ResourceTimeout()}, @@ -412,8 +412,8 @@ def test_delete_instances( assert: deleted instance IDs are returned. """ mock_openstack_conn.delete_server = MagicMock(side_effect=[True, False]) - successful_delete_id = InstanceID(prefix="success", reactive=False, suffix="") - already_deleted_id = InstanceID(prefix="already_deleted", reactive=False, suffix="") + successful_delete_id = InstanceID(prefix="success", suffix="") + already_deleted_id = InstanceID(prefix="already_deleted", suffix="") deleted_instance_ids = openstack_cloud.delete_instances( instance_ids=[successful_delete_id, already_deleted_id] diff --git a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py index 3d6c9d423d..9f341b35b3 100644 --- a/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py +++ b/github-runner-manager/tests/unit/openstack_cloud/test_openstack_runner_manager.py @@ -206,7 +206,7 @@ def test_delete_vms(runner_manager: OpenStackRunnerManager): act: when delete_vms method is called. assert: the mocked service call is made and the deleted instance IDs are returned. """ - test_instance_ids = [InstanceID(prefix="test-prefix", reactive=None, suffix="test-suffix")] + test_instance_ids = [InstanceID(prefix="test-prefix", suffix="test-suffix")] mock_cloud = MagicMock() mock_cloud.delete_instances = MagicMock(return_value=test_instance_ids) runner_manager._openstack_cloud = mock_cloud diff --git a/github-runner-manager/tests/unit/reactive/__init__.py b/github-runner-manager/tests/unit/reactive/__init__.py deleted file mode 100644 index c6a4ad808e..0000000000 --- a/github-runner-manager/tests/unit/reactive/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. diff --git a/github-runner-manager/tests/unit/reactive/test_consumer.py b/github-runner-manager/tests/unit/reactive/test_consumer.py deleted file mode 100644 index 7cbaea8539..0000000000 --- a/github-runner-manager/tests/unit/reactive/test_consumer.py +++ /dev/null @@ -1,512 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - -import secrets -from contextlib import closing -from random import randint -from unittest import mock -from unittest.mock import ANY, MagicMock - -import pytest -from kombu import Connection, Message -from kombu.exceptions import KombuError -from pydantic import HttpUrl - -from github_runner_manager.manager.models import RunnerMetadata -from github_runner_manager.platform.github_provider import GitHubRunnerPlatform -from github_runner_manager.platform.platform_provider import PlatformProvider -from github_runner_manager.reactive import consumer -from github_runner_manager.reactive.consumer import ( - PROCESS_COUNT_HEADER_NAME, - RETRY_LIMIT, - WAIT_TIME_IN_SEC, - JobError, - Labels, - get_queue_size, -) -from github_runner_manager.reactive.types_ import QueueConfig - -IN_MEMORY_URI = "memory://" -FAKE_JOB_ID = "8200803099" -FAKE_JOB_URL = f"https://api.github.com/repos/fakeuser/gh-runner-test/actions/runs/{FAKE_JOB_ID}" - - -@pytest.fixture(name="queue_config") -def queue_config_fixture() -> QueueConfig: - """Return a QueueConfig object.""" - queue_name = secrets.token_hex(16) - - # we use construct to avoid pydantic validation as IN_MEMORY_URI is not a valid URL - return QueueConfig.construct(mongodb_uri=IN_MEMORY_URI, queue_name=queue_name) - - -@pytest.fixture(name="mock_sleep", autouse=True) -def mock_sleep_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: - """Mock the sleep function.""" - monkeypatch.setattr(consumer, "sleep", mock_sleep := MagicMock()) - return mock_sleep - - -@pytest.mark.parametrize( - "labels,supported_labels", - [ - pytest.param({"label1", "label2"}, {"label1", "label2"}, id="label==supported_labels"), - pytest.param(set(), {"label1", "label2"}, id="empty labels"), - pytest.param({"label1"}, {"label1", "label3"}, id="labels subset of supported_labels"), - pytest.param({"LaBeL1", "label2"}, {"label1", "laBeL2"}, id="case insensitive labels"), - ], -) -def test_consume(labels: Labels, supported_labels: Labels, queue_config: QueueConfig, mock_sleep): - """ - arrange: A job with valid labels placed in the message queue which has not yet been picked up. - act: Call consume. - assert: A runner is created, the message is removed from the queue, sleep is called once. - """ - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue(job_details.json(), queue_config.queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - github_platform_mock = MagicMock(spec=GitHubRunnerPlatform) - github_platform_mock.check_job_been_picked_up.side_effect = [False, True] - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=github_platform_mock, - supported_labels=supported_labels, - ) - - runner_manager_mock.create_runners.assert_called_once_with(1, metadata=ANY, reactive=True) - - _assert_queue_is_empty(queue_config.queue_name) - - mock_sleep.assert_called_once_with(WAIT_TIME_IN_SEC) - - -def test_consume_after_in_progress(queue_config: QueueConfig): - """ - arrange: Two jobs, the first one in progress and the second one queued. - act: Call consume. - assert: The first one is acked and the second one is run. That is, the queue - is empty at the end. - """ - labels = {"label", "label"} - job_details_in_progress = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - - other_job_url = FAKE_JOB_URL + "1" - job_details_queued = consumer.JobDetails( - labels=labels, - url=other_job_url, - ) - _put_in_queue(job_details_in_progress.json(), queue_config.queue_name) - _put_in_queue(job_details_queued.json(), queue_config.queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - platform_provider_mock = MagicMock(spec=PlatformProvider) - - job_picked_up_for_queued_iter = iter([False, True]) - - def _check_job_been_picked_up(metadata: RunnerMetadata, job_url: HttpUrl): - """Check if a job has been picked up.""" - # For the in progress job, return in progress - if job_url == FAKE_JOB_URL: - return True - # For the queued job, first return it as queued and then as in progress - return next(job_picked_up_for_queued_iter) - - platform_provider_mock.check_job_been_picked_up.side_effect = _check_job_been_picked_up - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=platform_provider_mock, - supported_labels=labels, - ) - - runner_manager_mock.create_runners.assert_called_once_with(1, metadata=ANY, reactive=True) - - _assert_queue_is_empty(queue_config.queue_name) - - -def test_consume_reject_if_job_gets_not_picked_up(queue_config: QueueConfig): - """ - arrange: A job placed in the message queue which will not get picked up. - act: Call consume. - assert: The message is requeued. - """ - labels = {secrets.token_hex(16), secrets.token_hex(16)} - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue(job_details.json(), queue_config.queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - github_platform_mock = MagicMock(spec=GitHubRunnerPlatform) - github_platform_mock.check_job_been_picked_up.return_value = False - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=github_platform_mock, - supported_labels=labels, - ) - - _assert_msg_has_been_requeued( - queue_config.queue_name, job_details.json(), headers={PROCESS_COUNT_HEADER_NAME: 1} - ) - - -def test_consume_reject_if_spawning_failed(queue_config: QueueConfig): - """ - arrange: A job placed in the message queue and spawning a runner fails. - act: Call consume. - assert: The message is requeued. - """ - labels = {secrets.token_hex(16), secrets.token_hex(16)} - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue(job_details.json(), queue_config.queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - runner_manager_mock.create_runners.return_value = tuple() - - github_platform_mock = MagicMock(spec=GitHubRunnerPlatform) - github_platform_mock.check_job_been_picked_up.side_effect = [False] - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=github_platform_mock, - supported_labels=labels, - ) - - _assert_msg_has_been_requeued( - queue_config.queue_name, job_details.json(), headers={PROCESS_COUNT_HEADER_NAME: 1} - ) - - -def test_consume_raises_queue_error(monkeypatch: pytest.MonkeyPatch, queue_config: QueueConfig): - """ - arrange: A mocked SimpleQueue that raises a KombuError. - act: Call consume. - assert: A QueueError is raised. - """ - monkeypatch.setattr(consumer, "SimpleQueue", MagicMock(side_effect=KombuError)) - with pytest.raises(consumer.QueueError) as exc_info: - consumer.consume( - queue_config=queue_config, - runner_manager=MagicMock(spec=GitHubRunnerPlatform), - platform_provider=MagicMock(spec=GitHubRunnerPlatform), - supported_labels={"label1", "label2"}, - ) - assert "Error when communicating with the queue" in str(exc_info.value) - - -@pytest.mark.parametrize( - "size", - [ - pytest.param(0, id="empty queue"), - pytest.param(1, id="queue with 1 item"), - pytest.param(randint(2, 10), id="queue with multiple items"), - ], -) -def test_get_queue_size(size: int, queue_config: QueueConfig): - """ - arrange: A queue with a given size. - act: Call get_queue_size. - assert: The size of the queue is returned. - """ - for _ in range(size): - _put_in_queue("test", queue_config.queue_name) - assert get_queue_size(queue_config) == size - - -def test_get_queue_size_raises_queue_error( - monkeypatch: pytest.MonkeyPatch, queue_config: QueueConfig -): - """ - arrange: A queue with a given size and a mocked SimpleQueue that raises a KombuError. - act: Call get_queue_size. - assert: A QueueError is raised. - """ - monkeypatch.setattr(consumer, "SimpleQueue", MagicMock(side_effect=KombuError)) - with pytest.raises(consumer.QueueError) as exc_info: - get_queue_size(queue_config) - assert "Error when communicating with the queue" in str(exc_info.value) - - -@pytest.mark.parametrize( - "job_str", - [ - pytest.param( - '{"labels": ["label1", "label2"], "status": "completed"}', id="job url missing" - ), - pytest.param( - '{"status": "completed", "url": "https://example.com/path"}', id="labels missing" - ), - pytest.param( - '{"labels": ["label1", "label2"], "status": "completed", ' - '"url": "https://example.com"}', - id="job url without path", - ), - pytest.param("no json at all", id="invalid json"), - ], -) -def test_job_details_validation_error(job_str: str, queue_config: QueueConfig): - """ - arrange: A job placed in the message queue with invalid details. - act: Call consume - assert: A JobError is raised and the message is not requeued. - """ - queue_name = queue_config.queue_name - _put_in_queue(job_str, queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - github_platform_mock = MagicMock(spec=GitHubRunnerPlatform) - github_platform_mock.check_job_been_picked_up.return_value = True - - with pytest.raises(JobError) as exc_info: - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=github_platform_mock, - supported_labels={"label1", "label2"}, - ) - assert "Invalid job details" in str(exc_info.value) - - _assert_queue_is_empty(queue_config.queue_name) - - -@pytest.mark.parametrize( - "labels,supported_labels", - [ - pytest.param({"label1", "unsupported"}, {"label1"}, id="additional unsupported label"), - pytest.param({"label1"}, set(), id="empty supported labels"), - pytest.param({"label1", "label2"}, {"label1", "label3"}, id="overlapping labels"), - pytest.param({"label1", "label2"}, {"label3", "label4"}, id="no overlap"), - pytest.param({"LaBeL1", "label2"}, {"label1", "laBeL3"}, id="case insensitive labels"), - ], -) -def test_consume_reject_if_labels_not_supported( - labels: Labels, supported_labels: Labels, queue_config: QueueConfig -): - """ - arrange: A job placed in the message queue with unsupported labels. - act: Call consume. - assert: No runner is spawned and the message is removed from the queue. - """ - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue(job_details.json(), queue_config.queue_name) - _put_in_queue(consumer.END_PROCESSING_PAYLOAD, queue_config.queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - github_platform_mock = MagicMock(spec=GitHubRunnerPlatform) - github_platform_mock.check_job_been_picked_up.side_effect = [False, True] - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=github_platform_mock, - supported_labels=supported_labels, - ) - - runner_manager_mock.create_runners.assert_not_called() - _assert_queue_is_empty(queue_config.queue_name) - - -def test_consume_retried_job_success(queue_config: QueueConfig, mock_sleep: MagicMock): - """ - arrange: A job placed in the message queue which is processed before. - act: Call consume. - assert: A runner is spawned, the message is removed from the queue, and sleep is called two - times with exponential backoff for the retry and normal wait time for spawn check. - """ - labels = {secrets.token_hex(16), secrets.token_hex(16)} - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue( - job_details.json(), queue_config.queue_name, headers={PROCESS_COUNT_HEADER_NAME: 1} - ) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - platform_mock = MagicMock(spec=PlatformProvider) - platform_mock.check_job_been_picked_up.side_effect = [False, True] - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=platform_mock, - supported_labels=labels, - ) - - runner_manager_mock.create_runners.assert_called_once_with(1, metadata=ANY, reactive=True) - - _assert_queue_is_empty(queue_config.queue_name) - - # First sleep is exponential backoff for retry (count=2: 120s), - # second is from _spawn_runner (60s) - mock_sleep.assert_has_calls([mock.call(120), mock.call(WAIT_TIME_IN_SEC)]) - - -def test_consume_retried_job_failure(queue_config: QueueConfig, mock_sleep: MagicMock): - """ - arrange: A job placed in the message queue which is processed before. Mock runner spawn fail. - act: Call consume. - assert: The message requeued. Sleep called once with exponential backoff. - """ - labels = {secrets.token_hex(16), secrets.token_hex(16)} - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue( - job_details.json(), queue_config.queue_name, headers={PROCESS_COUNT_HEADER_NAME: 1} - ) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - runner_manager_mock.create_runners.return_value = tuple() - - platform_mock = MagicMock(spec=GitHubRunnerPlatform) - platform_mock.check_job_been_picked_up.side_effect = [False] - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=platform_mock, - supported_labels=labels, - ) - - _assert_msg_has_been_requeued( - queue_config.queue_name, job_details.json(), headers={PROCESS_COUNT_HEADER_NAME: 2} - ) - - # Sleep with exponential backoff for retry count 2: 120 seconds - mock_sleep.assert_called_once_with(120) - - -def test_consume_retried_job_failure_past_limit(queue_config: QueueConfig, mock_sleep: MagicMock): - """ - arrange: A job placed in the message queue which is at the retry limit. - act: Call consume. - assert: Message not requeue, and not processed. - """ - labels = {secrets.token_hex(16), secrets.token_hex(16)} - job_details = consumer.JobDetails( - labels=labels, - url=FAKE_JOB_URL, - ) - _put_in_queue( - job_details.json(), - queue_config.queue_name, - headers={PROCESS_COUNT_HEADER_NAME: RETRY_LIMIT}, - ) - _put_in_queue(consumer.END_PROCESSING_PAYLOAD, queue_config.queue_name) - - runner_manager_mock = MagicMock(spec=consumer.RunnerManager) - platform_mock = MagicMock(spec=GitHubRunnerPlatform) - - consumer.consume( - queue_config=queue_config, - runner_manager=runner_manager_mock, - platform_provider=platform_mock, - supported_labels=labels, - ) - - runner_manager_mock.create_runners.assert_not_called() - platform_mock.check_job_been_picked_up.assert_not_called() - _assert_queue_is_empty(queue_config.queue_name) - - -def _put_in_queue(msg: str, queue_name: str, headers: dict[str, str | int] | None = None) -> None: - """Put a job in the message queue. - - Args: - msg: The job details. - queue_name: The name of the queue - headers: The message headers. Not set if None. - """ - with Connection(IN_MEMORY_URI) as conn: - with closing(conn.SimpleQueue(queue_name)) as simple_queue: - simple_queue.put(msg, headers=headers, retry=True) - - -def _consume_from_queue(queue_name: str) -> Message: - """Consume a job from the message queue. - - Args: - queue_name: The name of the queue - - Returns: - The message consumed from the queue. - """ - with Connection(IN_MEMORY_URI) as conn: - with closing(conn.SimpleQueue(queue_name)) as simple_queue: - return simple_queue.get(block=False) - - -def _assert_queue_is_empty(queue_name: str) -> None: - """Assert that the queue is empty. - - Args: - queue_name: The name of the queue. - """ - with Connection(IN_MEMORY_URI) as conn: - with closing(conn.SimpleQueue(queue_name)) as simple_queue: - assert simple_queue.qsize() == 0 - - -def _assert_msg_has_been_requeued( - queue_name: str, payload: str, headers: dict[str, str | int] | None -) -> None: - """Assert that the message is requeued. - - This will consume message from the queue and assert that the payload is the same as the given. - - Args: - queue_name: The name of the queue. - payload: The payload of the message to assert. - headers: The headers to assert for if present. - """ - with Connection(IN_MEMORY_URI) as conn: - with closing(conn.SimpleQueue(queue_name)) as simple_queue: - msg = simple_queue.get(block=False) - assert msg.payload == payload - if headers is not None: - assert msg.headers == headers - - -@pytest.mark.parametrize( - "retry_count,expected_backoff", - [ - pytest.param(1, 60, id="first retry - 60 seconds"), - pytest.param(2, 120, id="second retry - 120 seconds"), - pytest.param(3, 240, id="third retry - 240 seconds"), - pytest.param(4, 480, id="fourth retry - 480 seconds"), - pytest.param(5, 960, id="fifth retry - 960 seconds"), - pytest.param(6, 1800, id="sixth retry - capped at max 1800 seconds"), - pytest.param(10, 1800, id="high retry count - capped at max 1800 seconds"), - ], -) -def test_calculate_backoff_time(retry_count: int, expected_backoff: int): - """ - arrange: Given a retry count. - act: Call _calculate_backoff_time. - assert: The correct exponential backoff time is returned, capped at the maximum. - """ - from github_runner_manager.reactive.consumer import _calculate_backoff_time - - assert _calculate_backoff_time(retry_count) == expected_backoff diff --git a/github-runner-manager/tests/unit/reactive/test_process_manager.py b/github-runner-manager/tests/unit/reactive/test_process_manager.py deleted file mode 100644 index 1c6f719b96..0000000000 --- a/github-runner-manager/tests/unit/reactive/test_process_manager.py +++ /dev/null @@ -1,213 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. -import os -import secrets -import subprocess -from pathlib import Path -from subprocess import CompletedProcess -from unittest.mock import MagicMock - -import pytest - -from github_runner_manager.configuration import UserInfo -from github_runner_manager.reactive.process_manager import ( - PIDS_COMMAND_LINE, - PYTHON_BIN, - REACTIVE_RUNNER_SCRIPT_MODULE, - ReactiveRunnerError, - kill_reactive_processes, - reconcile, -) -from github_runner_manager.reactive.types_ import QueueConfig, ReactiveProcessConfig -from github_runner_manager.utilities import secure_run_subprocess - -EXAMPLE_MQ_URI = "http://example.com" - - -@pytest.fixture(name="log_dir", autouse=True) -def log_dir_path_fixture(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path: - """Return the path to the log file.""" - log_file_path = tmp_path / "logs" - monkeypatch.setattr( - "github_runner_manager.reactive.process_manager.REACTIVE_RUNNER_LOG_DIR", log_file_path - ) - monkeypatch.setattr("shutil.chown", lambda *args, **kwargs: None) - return log_file_path - - -@pytest.fixture(name="secure_run_subprocess_mock") -def secure_run_subprocess_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: - """Mock the ps command.""" - secure_run_subprocess_mock = MagicMock(spec=secure_run_subprocess) - monkeypatch.setattr( - "github_runner_manager.reactive.process_manager.secure_run_subprocess", - secure_run_subprocess_mock, - ) - return secure_run_subprocess_mock - - -@pytest.fixture(name="os_kill_mock", autouse=True) -def os_kill_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: - """Mock the os.kill function.""" - os_kill_mock = MagicMock(spec=os.kill) - monkeypatch.setattr("os.kill", os_kill_mock) - return os_kill_mock - - -@pytest.fixture(name="subprocess_popen_mock") -def subprocess_popen_mock_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: - """Mock the subprocess.Popen function.""" - popen_result = MagicMock(spec=subprocess.Popen, pid=1234, returncode=0) - subprocess_popen_mock = MagicMock( - spec=subprocess.Popen, - return_value=popen_result, - ) - monkeypatch.setattr("subprocess.Popen", subprocess_popen_mock) - return subprocess_popen_mock - - -@pytest.fixture(name="reactive_process_config") -def reactive_process_config_fixture() -> ReactiveProcessConfig: - """Return a ReactiveProcessConfig object.""" - queue_name = secrets.token_hex(16) - - # we use construct to avoid pydantic validation as IN_MEMORY_URI is not a valid URL - queue_config = QueueConfig.construct(mongodb_uri=EXAMPLE_MQ_URI, queue_name=queue_name) - return ReactiveProcessConfig.construct(queue=queue_config) - - -def test_reconcile_spawns_runners( - secure_run_subprocess_mock: MagicMock, - subprocess_popen_mock: MagicMock, - log_dir: Path, - reactive_process_config: ReactiveProcessConfig, - user_info: UserInfo, -): - """ - arrange: Mock that two reactive runner processes are active. - act: Call reconcile with a quantity of 5. - assert: Three runners are spawned. Log file is setup. - """ - _arrange_reactive_processes(secure_run_subprocess_mock, count=2) - - delta = reconcile(5, reactive_process_config=reactive_process_config, user=user_info) - - assert delta == 3 - assert subprocess_popen_mock.call_count == 3 - assert log_dir.exists() - - -def test_reconcile_does_not_spawn_runners( - secure_run_subprocess_mock: MagicMock, - subprocess_popen_mock: MagicMock, - reactive_process_config: ReactiveProcessConfig, - user_info: UserInfo, -): - """ - arrange: Mock that two reactive runner processes are active. - act: Call reconcile with a quantity of 2. - assert: No runners are spawned. - """ - _arrange_reactive_processes(secure_run_subprocess_mock, count=2) - - delta = reconcile(2, reactive_process_config=reactive_process_config, user=user_info) - - assert delta == 0 - assert subprocess_popen_mock.call_count == 0 - - -def test_reconcile_kills_processes_for_too_many_processes( - secure_run_subprocess_mock: MagicMock, - subprocess_popen_mock: MagicMock, - os_kill_mock: MagicMock, - reactive_process_config: ReactiveProcessConfig, - user_info: UserInfo, -): - """ - arrange: Mock that 3 reactive runner processes are active. - act: Call reconcile with a quantity of 1. - assert: 2 processes are killed. - """ - _arrange_reactive_processes(secure_run_subprocess_mock, count=3) - delta = reconcile(1, reactive_process_config=reactive_process_config, user=user_info) - - assert delta == -2 - assert subprocess_popen_mock.call_count == 0 - assert os_kill_mock.call_count == 2 - - -def test_reconcile_ignore_process_not_found_on_kill( - secure_run_subprocess_mock: MagicMock, - subprocess_popen_mock: MagicMock, - os_kill_mock: MagicMock, - reactive_process_config: ReactiveProcessConfig, - user_info: UserInfo, -): - """ - arrange: Mock 3 reactive processes and os.kill to fail once with a ProcessLookupError. - act: Call reconcile with a quantity of 1. - assert: The returned delta is still -2. - """ - _arrange_reactive_processes(secure_run_subprocess_mock, count=3) - os_kill_mock.side_effect = [None, ProcessLookupError] - delta = reconcile(1, reactive_process_config=reactive_process_config, user=user_info) - - assert delta == -2 - assert subprocess_popen_mock.call_count == 0 - assert os_kill_mock.call_count == 2 - - -def test_reconcile_raises_reactive_runner_error_on_ps_failure( - secure_run_subprocess_mock: MagicMock, - reactive_process_config: ReactiveProcessConfig, - user_info: UserInfo, -): - """ - arrange: Mock that the ps command fails. - act: Call reconcile with a quantity of 1. - assert: A ReactiveRunnerError is raised. - """ - secure_run_subprocess_mock.return_value = CompletedProcess( - args=PIDS_COMMAND_LINE, - returncode=1, - stdout=b"", - stderr=b"error", - ) - - with pytest.raises(ReactiveRunnerError) as err: - reconcile(1, reactive_process_config=reactive_process_config, user=user_info) - - assert "Failed to get list of processes" in str(err.value) - - -def _arrange_reactive_processes(secure_run_subprocess_mock: MagicMock, count: int): - """Mock reactive runner processes are active. - - Args: - secure_run_subprocess_mock: The mock to use for the ps command. - count: The number of processes. - """ - process_cmds_before = "\n".join( - [f"{PYTHON_BIN} -m {REACTIVE_RUNNER_SCRIPT_MODULE}\t{i}" for i in range(count)] - ) - - secure_run_subprocess_mock.return_value = CompletedProcess( - args=PIDS_COMMAND_LINE, - returncode=0, - stdout=f"CMD\n{process_cmds_before}".encode("utf-8"), - stderr=b"", - ) - - -def test_reactive_flush( - os_kill_mock: MagicMock, - secure_run_subprocess_mock: MagicMock, -): - """ - arrange: Mock 3 reactive processes. - act: Run flush for reactive. - assert: Find 3 os.kill calls. - """ - _arrange_reactive_processes(secure_run_subprocess_mock, count=3) - kill_reactive_processes() - assert os_kill_mock.call_count == 3 diff --git a/github-runner-manager/tests/unit/reactive/test_runner_manager.py b/github-runner-manager/tests/unit/reactive/test_runner_manager.py deleted file mode 100644 index c47d68dbaf..0000000000 --- a/github-runner-manager/tests/unit/reactive/test_runner_manager.py +++ /dev/null @@ -1,477 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. -import logging -from dataclasses import fields -from random import randint -from unittest.mock import MagicMock - -import pytest - -import github_runner_manager.reactive.process_manager -from github_runner_manager.configuration import UserInfo -from github_runner_manager.manager.runner_manager import ( - FlushMode, - IssuedMetricEventsStats, - RunnerInstance, - RunnerManager, -) -from github_runner_manager.metrics.events import RunnerStart, RunnerStop -from github_runner_manager.platform.platform_provider import PlatformRunnerState -from github_runner_manager.reactive.runner_manager import reconcile -from github_runner_manager.reactive.types_ import QueueConfig, ReactiveProcessConfig - -logger = logging.getLogger(__name__) - -TEST_METRIC_EVENTS = {RunnerStart: 1, RunnerStop: 2} -TEST_DELETE_RUNNER_METRIC_EVENTS = {RunnerStart: 1, RunnerStop: 1} - - -@pytest.fixture(name="runner_manager") -def runner_manager_fixture() -> MagicMock: - """Return a mock of the RunnerManager.""" - mock = MagicMock(spec=RunnerManager) - mock.cleanup.return_value = TEST_METRIC_EVENTS - mock.delete_runners.return_value = TEST_DELETE_RUNNER_METRIC_EVENTS - return mock - - -@pytest.fixture(name="reactive_process_manager") -def reactive_process_manager_fixture(monkeypatch: pytest.MonkeyPatch) -> MagicMock: - """Return a mock of the process manager.""" - reactive_process_manager = MagicMock(spec=github_runner_manager.reactive.process_manager) - monkeypatch.setattr( - "github_runner_manager.reactive.runner_manager.process_manager", - reactive_process_manager, - ) - reactive_process_manager.reconcile.side_effect = lambda *args, **kwargs: kwargs["quantity"] - return reactive_process_manager - - -@pytest.fixture(name="reactive_process_config") -def reactive_process_config_fixture(): - """Return a mock of the ReactiveProcessConfig.""" - reactive_process_config = MagicMock(spec=ReactiveProcessConfig) - reactive_process_config.queue = MagicMock(spec=QueueConfig) - return reactive_process_config - - -@pytest.mark.parametrize( - "runner_quantity, desired_quantity, expected_process_quantity", - [ - pytest.param(5, 5, 0, id="zero processes to spawn"), - pytest.param(5, 10, 5, id="5 processes to spawn"), - pytest.param(5, 7, 2, id="2 processes to spawn"), - pytest.param(0, 5, 5, id="no runners running"), - pytest.param(0, 0, 0, id="zero quantity"), - ], -) -def test_reconcile_positive_runner_diff( - runner_quantity: int, - desired_quantity: int, - expected_process_quantity: int, - runner_manager: MagicMock, - reactive_process_manager: MagicMock, - reactive_process_config: MagicMock, - user_info: UserInfo, - monkeypatch: pytest.MonkeyPatch, -): - """ - arrange: Mock the difference of amount of runners and desired quantity to be positive. - act: Call reconcile. - assert: The cleanup method of runner manager is called and the reconcile method of - process manager is called with the expected quantity. - """ - runners = [ - MagicMock(spec=[field.name for field in fields(RunnerInstance)]) - for _ in range(runner_quantity) - ] - for runner in runners: - runner.platform_state = PlatformRunnerState.IDLE - runner_manager.get_runners.return_value = tuple(runners) - _set_queue_non_empty(monkeypatch) - - reconcile(desired_quantity, runner_manager, reactive_process_config, user_info) - - runner_manager.cleanup.assert_called_once() - reactive_process_manager.reconcile.assert_called_once_with( - quantity=expected_process_quantity, - reactive_process_config=reactive_process_config, - user=user_info, - python_path=None, - ) - - -@pytest.mark.parametrize( - "runner_quantity, desired_quantity, expected_number_of_runners_to_delete", - [ - pytest.param(6, 5, 1, id="one additional runner"), - pytest.param(8, 5, 3, id="multiple additional runners"), - pytest.param(10, 0, 10, id="zero desired quantity"), - ], -) -def test_reconcile_negative_runner_diff( - runner_quantity: int, - desired_quantity: int, - expected_number_of_runners_to_delete: int, - runner_manager: MagicMock, - reactive_process_manager: MagicMock, - reactive_process_config: MagicMock, - user_info: UserInfo, - monkeypatch: pytest.MonkeyPatch, -): - """ - arrange: Mock the difference of amount of runners and desired quantity to be negative. - act: Call reconcile. - assert: The additional amount of runners are deleted and the reconcile method of the - process manager is called with zero quantity. - """ - runners = [ - MagicMock(spec=[field.name for field in fields(RunnerInstance)]) - for _ in range(runner_quantity) - ] - for runner in runners: - runner.platform_state = PlatformRunnerState.IDLE - - runner_manager.get_runners.return_value = tuple(runners) - _set_queue_non_empty(monkeypatch) - - reconcile(desired_quantity, runner_manager, reactive_process_config, user_info) - - runner_manager.cleanup.assert_called_once() - runner_manager.delete_runners.assert_called_once_with(expected_number_of_runners_to_delete) - reactive_process_manager.reconcile.assert_called_once_with( - quantity=0, - reactive_process_config=reactive_process_config, - user=user_info, - python_path=None, - ) - - -def test_reconcile_flushes_idle_runners_when_queue_is_empty( - runner_manager: MagicMock, - reactive_process_manager: MagicMock, - reactive_process_config: MagicMock, - user_info: UserInfo, - monkeypatch: pytest.MonkeyPatch, -): - """ - arrange: Mock the dependencies and set the queue size to 0. - act: Call reconcile with random quantity. - assert: The flush_runners method of runner manager is called with FLUSH_IDLE mode. - """ - quantity = randint(0, 10) - _set_queue_empty(monkeypatch) - - reconcile(quantity, runner_manager, reactive_process_config, user_info) - - runner_manager.flush_runners.assert_called_once_with(FlushMode.FLUSH_IDLE) - - -@pytest.mark.usefixtures("reactive_process_manager") -@pytest.mark.parametrize( - "runner_quantity, desired_quantity, cleanup_metric_stats, delete_metric_stats, " - "expected_metrics", - [ - pytest.param( - 1, - 5, - (default_cleanup_stats := {RunnerStart: 1, RunnerStop: 3}), - (default_delete_stats := {RunnerStart: 3, RunnerStop: 4}), - {RunnerStart: 1, RunnerStop: 3}, - id="positive runner diff returns cleanup stats", - ), - pytest.param( - 1, - 5, - default_cleanup_stats, - dict(), - default_cleanup_stats, - id="positive runner diff with empty delete stats", - ), - pytest.param( - 1, - 5, - dict(), - default_delete_stats, - dict(), - id="positive runner diff with empty cleanup stats", - ), - pytest.param(1, 5, dict(), dict(), dict(), id="positive runner diff with empty stats"), - pytest.param( - 0, - 0, - default_cleanup_stats, - default_delete_stats, - default_cleanup_stats, - id="zero runner diff returns cleanup stats", - ), - pytest.param( - 0, - 0, - default_cleanup_stats, - dict(), - default_cleanup_stats, - id="zero runner diff with empty delete stats", - ), - pytest.param( - 0, - 0, - dict(), - default_delete_stats, - dict(), - id="zero runner diff with empty cleanup stats", - ), - pytest.param(0, 0, dict(), dict(), dict(), id="zero runner diff with empty stats"), - pytest.param( - 5, - 1, - {RunnerStart: 1, RunnerStop: 3}, - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 4, RunnerStop: 7}, - id="negative runner diff returns merged stats", - ), - pytest.param( - 5, - 1, - default_cleanup_stats, - dict(), - default_cleanup_stats, - id="negative runner diff with empty delete stats", - ), - pytest.param( - 5, - 1, - dict(), - default_delete_stats, - default_delete_stats, - id="negative runner diff with empty cleanup stats", - ), - pytest.param(5, 1, dict(), dict(), dict(), id="negative runner diff with empty stats"), - pytest.param( - 5, - 1, - {RunnerStart: 3}, - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 6, RunnerStop: 4}, - id="cleanup stats without RunnerStop", - ), - pytest.param( - 5, - 1, - {RunnerStop: 3}, - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 3, RunnerStop: 7}, - id="cleanup stats without RunnerStart", - ), - pytest.param( - 5, - 1, - {RunnerStart: 3}, - {RunnerStop: 4}, - {RunnerStart: 3, RunnerStop: 4}, - id="delete stats without RunnerStart", - ), - pytest.param( - 5, - 1, - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 4}, - {RunnerStart: 7, RunnerStop: 4}, - id="delete stats without RunnerStop", - ), - ], -) -def test_reconcile_returns_issued_metrics( - runner_quantity: int, - desired_quantity: int, - cleanup_metric_stats: IssuedMetricEventsStats, - delete_metric_stats: IssuedMetricEventsStats, - expected_metrics: IssuedMetricEventsStats, - runner_manager: MagicMock, - reactive_process_config: MagicMock, - user_info: UserInfo, - monkeypatch: pytest.MonkeyPatch, -): - """ - arrange: Mock different stats and runner diff combinations and an empty queue. - act: Call reconcile. - assert: The returned metrics are as expected. - """ - runners = [ - MagicMock(spec=[field.name for field in fields(RunnerInstance)]) - for _ in range(runner_quantity) - ] - for runner in runners: - runner.platform_state = PlatformRunnerState.IDLE - runner_manager.get_runners.return_value = tuple(runners) - runner_manager.cleanup.return_value = cleanup_metric_stats - runner_manager.delete_runners.return_value = delete_metric_stats - - _set_queue_non_empty(monkeypatch) - - result = reconcile(desired_quantity, runner_manager, reactive_process_config, user_info) - - assert result.metric_stats == expected_metrics - - -@pytest.mark.usefixtures("reactive_process_manager") -@pytest.mark.parametrize( - "runner_quantity, desired_quantity, cleanup_metric_stats, delete_metric_stats, " - "flush_metric_stats, expected_metrics", - [ - pytest.param( - 1, - 5, - (default_cleanup_stats := {RunnerStart: 1, RunnerStop: 3}), - (default_delete_stats := {RunnerStart: 3, RunnerStop: 4}), - (default_flush_stats := {RunnerStart: 5, RunnerStop: 6}), - {RunnerStart: 6, RunnerStop: 9}, - id="positive runner diff returns cleanup + flush stats", - ), - pytest.param( - 1, - 5, - default_cleanup_stats, - default_delete_stats, - dict(), - default_cleanup_stats, - id="positive runner diff with empty flush stats", - ), - pytest.param( - 1, - 5, - dict(), - default_delete_stats, - default_flush_stats, - default_flush_stats, - id="positive runner diff with empty cleanup stats", - ), - pytest.param( - 1, 5, dict(), dict(), dict(), dict(), id="positive runner diff with empty stats" - ), - pytest.param( - 0, - 0, - default_cleanup_stats, - default_delete_stats, - default_flush_stats, - {RunnerStart: 6, RunnerStop: 9}, - id="zero runner diff returns cleanup + flush stats", - ), - pytest.param( - 0, - 0, - default_cleanup_stats, - default_delete_stats, - dict(), - default_cleanup_stats, - id="zero runner diff with empty flush stats", - ), - pytest.param( - 0, - 0, - dict(), - default_delete_stats, - default_flush_stats, - default_flush_stats, - id="zero runner diff with empty cleanup stats", - ), - pytest.param(0, 0, dict(), dict(), dict(), dict(), id="zero runner diff with empty stats"), - pytest.param( - 5, - 1, - {RunnerStart: 1, RunnerStop: 3}, - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 5, RunnerStop: 6}, - {RunnerStart: 9, RunnerStop: 13}, - id="negative runner diff returns merged stats", - ), - pytest.param( - 5, - 1, - {RunnerStart: 1, RunnerStop: 3}, - {RunnerStart: 3, RunnerStop: 4}, - dict(), - {RunnerStart: 4, RunnerStop: 7}, - id="negative runner diff with empty flush stats", - ), - pytest.param( - 5, - 1, - dict(), - {RunnerStart: 1, RunnerStop: 3}, - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 4, RunnerStop: 7}, - id="negative runner diff with empty cleanup stats", - ), - pytest.param( - 5, - 1, - {RunnerStart: 1, RunnerStop: 3}, - dict(), - {RunnerStart: 3, RunnerStop: 4}, - {RunnerStart: 4, RunnerStop: 7}, - id="negative runner diff with empty delete stats", - ), - pytest.param( - 5, 1, dict(), dict(), dict(), dict(), id="negative runner diff with empty stats" - ), - ], -) -def test_reconcile_empty_queue_returns_issued_metrics( - runner_quantity: int, - desired_quantity: int, - cleanup_metric_stats: IssuedMetricEventsStats, - delete_metric_stats: IssuedMetricEventsStats, - flush_metric_stats: IssuedMetricEventsStats, - expected_metrics: IssuedMetricEventsStats, - runner_manager: MagicMock, - reactive_process_config: MagicMock, - user_info: UserInfo, - monkeypatch: pytest.MonkeyPatch, -): - """ - arrange: Mock different stats and runner diff combinations and an empty queue. - act: Call reconcile. - assert: The returned metrics are as expected. - """ - runners = [ - MagicMock(spec=[field.name for field in fields(RunnerInstance)]) - for _ in range(runner_quantity) - ] - for runner in runners: - runner.platform_state = PlatformRunnerState.IDLE - runner_manager.get_runners.return_value = tuple(runners) - runner_manager.cleanup.return_value = cleanup_metric_stats - runner_manager.delete_runners.return_value = delete_metric_stats - runner_manager.flush_runners.return_value = flush_metric_stats - - _set_queue_empty(monkeypatch) - - result = reconcile(desired_quantity, runner_manager, reactive_process_config, user_info) - - assert result.metric_stats == expected_metrics - - -def _set_queue_non_empty(monkeypatch: pytest.MonkeyPatch) -> None: - """Set the queue size to a random value between 1 and 10. - - Args: - monkeypatch: The pytest monkeypatch fixture used to patch the get_queue_size function. - """ - monkeypatch.setattr( - "github_runner_manager.reactive.runner_manager.get_queue_size", - lambda _: randint(1, 10), - ) - - -def _set_queue_empty(monkeypatch: pytest.MonkeyPatch) -> None: - """Set the queue size to zero. - - Args: - monkeypatch: The pytest monkeypatch fixture used to patch the get_queue_size function. - """ - monkeypatch.setattr( - "github_runner_manager.reactive.runner_manager.get_queue_size", - lambda _: 0, - ) diff --git a/github-runner-manager/tests/unit/test_config.py b/github-runner-manager/tests/unit/test_config.py index 3a22d1357f..cb91fb4fe0 100644 --- a/github-runner-manager/tests/unit/test_config.py +++ b/github-runner-manager/tests/unit/test_config.py @@ -9,7 +9,6 @@ import pytest import yaml -from pydantic import MongoDsn from src.github_runner_manager.configuration import ( ApplicationConfiguration, @@ -17,11 +16,9 @@ GitHubConfiguration, GitHubOrg, Image, - NonReactiveCombination, - NonReactiveConfiguration, ProxyConfig, - QueueConfig, - ReactiveConfiguration, + RunnerCombination, + RunnerConfiguration, SSHDebugConnection, SupportServiceConfig, ) @@ -40,7 +37,7 @@ group: group org: canonical token: githubtoken -non_reactive_configuration: +runner_configuration: combinations: - base_virtual_machines: 1 max_total_virtual_machines: 2 @@ -53,20 +50,6 @@ - arm64 - noble name: image_id -reactive_configuration: - flavors: - - labels: - - flavorlabel - name: flavor - images: - - labels: - - arm64 - - noble - name: image_id - max_total_virtual_machines: 2 - queue: - mongodb_uri: mongodb://user:password@localhost:27017 - queue_name: app_name service_config: dockerhub_mirror: https://docker.example.com proxy_config: @@ -131,9 +114,9 @@ def app_config_fixture() -> ApplicationConfiguration: ) ], ), - non_reactive_configuration=NonReactiveConfiguration( + runner_configuration=RunnerConfiguration( combinations=[ - NonReactiveCombination( + RunnerCombination( image=Image( name="image_id", labels=["arm64", "noble"], @@ -147,25 +130,6 @@ def app_config_fixture() -> ApplicationConfiguration: ) ] ), - reactive_configuration=ReactiveConfiguration( - queue=QueueConfig( - mongodb_uri=MongoDsn( - "mongodb://user:password@localhost:27017", - scheme="mongodb", - user="user", - password="password", - host="localhost", - host_type="int_domain", - port="27017", - ), - queue_name="app_name", - ), - max_total_virtual_machines=2, - images=[ - Image(name="image_id", labels=["arm64", "noble"]), - ], - flavors=[Flavor(name="flavor", labels=["flavorlabel"])], - ), openstack_configuration=OpenStackConfiguration( vm_prefix="test_unit", network="test_network", @@ -208,16 +172,16 @@ def test_load_configuration_from_yaml(app_config: ApplicationConfiguration): assert loaded_app_config == app_config -def test_non_reactive_combination_rejects_max_below_base(): +def test_runner_combination_rejects_max_below_base(): """ - arrange: A NonReactiveCombination where max_total_virtual_machines < base_virtual_machines. + arrange: A RunnerCombination where max_total_virtual_machines < base_virtual_machines. act: Construct the model. assert: A ValidationError is raised. """ with pytest.raises( ValueError, match="max_total_virtual_machines.*must be.*base_virtual_machines" ): - NonReactiveCombination( + RunnerCombination( image=Image(name="img", labels=[]), flavor=Flavor(name="flv", labels=[]), base_virtual_machines=5, @@ -225,13 +189,13 @@ def test_non_reactive_combination_rejects_max_below_base(): ) -def test_non_reactive_combination_allows_zero_max(): +def test_runner_combination_allows_zero_max(): """ - arrange: A NonReactiveCombination where max_total_virtual_machines is 0 (no cap). + arrange: A RunnerCombination where max_total_virtual_machines is 0 (no cap). act: Construct the model. assert: No error is raised. """ - combo = NonReactiveCombination( + combo = RunnerCombination( image=Image(name="img", labels=[]), flavor=Flavor(name="flv", labels=[]), base_virtual_machines=5, diff --git a/github-runner-manager/tests/unit/test_runner_scaler.py b/github-runner-manager/tests/unit/test_runner_scaler.py deleted file mode 100644 index 6abd5df990..0000000000 --- a/github-runner-manager/tests/unit/test_runner_scaler.py +++ /dev/null @@ -1,564 +0,0 @@ -# Copyright 2026 Canonical Ltd. -# See LICENSE file for licensing details. - - -import logging -from datetime import timedelta -from typing import Iterable -from unittest.mock import MagicMock - -import pytest -from pydantic.networks import IPv4Address - -from github_runner_manager.configuration import ( - ApplicationConfiguration, - Flavor, - Image, - NonReactiveCombination, - NonReactiveConfiguration, - ProxyConfig, - QueueConfig, - ReactiveConfiguration, - SSHDebugConnection, - SupportServiceConfig, - UserInfo, -) -from github_runner_manager.configuration.github import ( - GitHubConfiguration, - GitHubOrg, - GitHubPath, - GitHubRepo, -) -from github_runner_manager.manager import runner_manager as runner_manager_module -from github_runner_manager.manager.models import InstanceID -from github_runner_manager.manager.runner_manager import ( - IssuedMetricEventsStats, - RunnerInstance, - RunnerManager, -) -from github_runner_manager.manager.runner_scaler import FlushMode, RunnerInfo, RunnerScaler -from github_runner_manager.manager.vm_manager import VMState -from github_runner_manager.metrics.events import RunnerStart, RunnerStop -from github_runner_manager.openstack_cloud.configuration import ( - OpenStackConfiguration, - OpenStackCredentials, -) -from github_runner_manager.openstack_cloud.models import OpenStackServerConfig -from github_runner_manager.openstack_cloud.openstack_runner_manager import ( - OpenStackRunnerManagerConfig, -) -from github_runner_manager.platform.github_provider import PlatformRunnerState -from github_runner_manager.reactive.types_ import ReactiveProcessConfig -from tests.unit.factories.runner_instance_factory import RunnerInstanceFactory - -logger = logging.getLogger(__name__) - - -def mock_runner_manager_spawn_runners( - create_runner_args: Iterable[RunnerManager._CreateRunnerArgs], -) -> tuple[InstanceID, ...]: - """Mock _spawn_runners method of RunnerManager. - - The _spawn_runners method uses multi-process, which copies the object, e.g., the mocks. - There is easy way to sync the state of the mocks object across processes. Replacing the - _spawn_runner to remove the multi-process.pool is an easier approach. - - Args: - create_runner_args: The arguments for the create_runner method. - - Returns: - The instance ids of the runner spawned. - """ - return tuple(RunnerManager._create_runner(arg) for arg in create_runner_args) - - -@pytest.fixture(scope="function", name="github_path") -def github_path_fixture() -> GitHubPath: - return GitHubRepo(owner="mock_owner", repo="mock_repo") - - -@pytest.fixture(scope="function", name="issue_events_mock") -def issue_events_mock_fixture(monkeypatch: pytest.MonkeyPatch): - issue_events_mock = MagicMock() - monkeypatch.setattr( - "github_runner_manager.manager.runner_scaler.metric_events.issue_event", issue_events_mock - ) - return issue_events_mock - - -@pytest.fixture(scope="function", name="mock_process_manager_subprocess_run") -def mock_process_manager_subprocess_run_fixture(monkeypatch: pytest.MonkeyPatch): - mock_subprocess_run = MagicMock() - monkeypatch.setattr( - "github_runner_manager.reactive.process_manager.secure_run_subprocess", mock_subprocess_run - ) - return mock_subprocess_run - - -@pytest.fixture(scope="function", name="runner_manager") -def runner_manager_fixture( - monkeypatch, mock_runner_managers, github_path: GitHubPath, issue_events_mock -) -> RunnerManager: - mock_cloud, mock_github = mock_runner_managers - monkeypatch.setattr( - "github_runner_manager.manager.runner_manager.RunnerManager._spawn_runners", - mock_runner_manager_spawn_runners, - ) - # Patch out the metrics, as metrics has their own tests. - monkeypatch.setattr( - "github_runner_manager.manager.runner_manager.github_metrics.job", MagicMock() - ) - monkeypatch.setattr( - "github_runner_manager.manager.runner_manager.runner_metrics.issue_events", MagicMock() - ) - - runner_manager = RunnerManager( - manager_name="mock_runners", - platform_provider=mock_github, - cloud_runner_manager=mock_cloud, - labels=["label1", "label2", "arm64", "noble", "flavorlabel"], - ) - return runner_manager - - -@pytest.fixture(scope="function", name="application_configuration") -def application_configuration_fixture() -> ApplicationConfiguration: - """Returns a fixture with a fully populated ApplicationConfiguration.""" - return ApplicationConfiguration( - name="app_name", - extra_labels=["label1", "label2"], - github_config=GitHubConfiguration( - token="githubtoken", path=GitHubOrg(org="canonical", group="group") - ), - service_config=SupportServiceConfig( - proxy_config=ProxyConfig( - http="http://httpproxy.example.com:3128", - https="http://httpsproxy.example.com:3128", - no_proxy="127.0.0.1", - ), - use_aproxy=False, - dockerhub_mirror="https://docker.example.com", - ssh_debug_connections=[ - SSHDebugConnection( - host=IPv4Address("10.10.10.10"), - port=3000, - rsa_fingerprint="SHA256:rsa", - ed25519_fingerprint="SHA256:ed25519", - ) - ], - ), - non_reactive_configuration=NonReactiveConfiguration( - combinations=[ - NonReactiveCombination( - image=Image( - name="image_id", - labels=["arm64", "noble"], - ), - flavor=Flavor( - name="flavor", - labels=["flavorlabel"], - ), - base_virtual_machines=1, - ) - ] - ), - reactive_configuration=ReactiveConfiguration( - queue=QueueConfig( - mongodb_uri="mongodb://user:password@localhost:27017", - queue_name="app_name", - ), - max_total_virtual_machines=2, - images=[ - Image(name="image_id", labels=["arm64", "noble"]), - ], - flavors=[Flavor(name="flavor", labels=["flavorlabel"])], - ), - openstack_configuration=OpenStackConfiguration( - vm_prefix="unit_name", - network="network", - credentials=OpenStackCredentials( - auth_url="auth_url", - project_name="project_name", - username="username", - password="password", - user_domain_name="user_domain_name", - project_domain_name="project_domain_name", - region_name="region", - ), - ), - planner_token="planner-testing-token", - planner_url="http://planner.example.com", - reconcile_interval=10, - ) - - -@pytest.fixture(scope="function", name="runner_scaler_reactive") -def runner_scaler_reactive_fixture( - application_configuration: ApplicationConfiguration, - runner_manager: RunnerManager, - user_info: UserInfo, -) -> RunnerScaler: - runner_scaler = RunnerScaler.build(application_configuration, user_info) - runner_scaler._manager = runner_manager - return runner_scaler - - -@pytest.fixture(scope="function", name="runner_scaler_one_runner") -def runner_scaler_one_runner_fixture( - runner_manager: RunnerManager, user_info: UserInfo -) -> RunnerScaler: - runner_scaler = RunnerScaler( - runner_manager=runner_manager, - reactive_process_config=None, - user=user_info, - base_quantity=1, - max_quantity=0, - ) - runner_scaler.reconcile() - assert_runner_info(runner_scaler, online=1) - return runner_scaler - - -def set_one_runner_state( - runner_scaler: RunnerScaler, - platform_state: PlatformRunnerState | None = None, - cloud_state: VMState | None = None, - health: bool | None = None, - old_runner: bool = False, -): - """Set the runner state for a RunnerScaler with one runner. - - Args: - runner_scaler: The RunnerScaler instance to modify. - platform_state: The github state to set the runner. - cloud_state: The cloud state to set the runner. - health: Whether the runner is healthy. - old_runner: A runner that has had enough time to get created. - """ - logger.info( - "set_one_runner_state: platform_state %s, cloud_state %s, health %s", - platform_state, - cloud_state, - health, - ) - runner_dict = runner_scaler._manager._platform.state.runners - assert len(runner_dict) == 1, "Test arrange failed: One runner should be present" - instance_id = list(runner_dict.keys())[0] - if old_runner: - runner_dict[instance_id].created_at -= timedelta( - seconds=runner_manager_module.RUNNER_MAXIMUM_CREATION_TIME + 1 - ) - if platform_state is not None: - runner_dict[instance_id].platform_state = platform_state - if cloud_state is not None: - runner_dict[instance_id].cloud_state = cloud_state - if health is not None: - runner_dict[instance_id].health = health - logger.info("current runner_dict: %s.", runner_dict) - - -def assert_runner_info( - runner_scaler: RunnerScaler, online: int = 0, busy: int = 0, offline: int = 0, unknown: int = 0 -) -> None: - """Assert runner info contains a certain amount of runners. - - Args: - runner_scaler: The RunnerScaler to get information from. - online: The number of online runners to assert for. - busy: The number of buys runners to assert for. - offline: The number of offline runners to assert for. - unknown: The number of unknown runners to assert for. - """ - info = runner_scaler.get_runner_info() - assert info.offline == offline - assert info.online == online - assert info.busy == busy - assert info.unknown == unknown - assert isinstance(info.runners, tuple) - assert len(info.runners) == online - assert isinstance(info.busy_runners, tuple) - assert len(info.busy_runners) == busy - - -def test_build_runner_scaler( - monkeypatch: pytest.MonkeyPatch, - application_configuration: ApplicationConfiguration, - user_info: UserInfo, -): - """ - arrange: Given ApplicationConfiguration and OpenStackConfiguration. - act: Call RunnerScaler.build - assert: The RunnerScaler was created with the expected configuration. - """ - runner_scaler = RunnerScaler.build(application_configuration, user_info) - assert runner_scaler - # A few comprobations on key data - # Pending to refactor, too invasive. - assert runner_scaler._manager.manager_name == "app_name" - assert runner_scaler._manager._labels == ["label1", "label2", "arm64", "noble", "flavorlabel"] - assert runner_scaler._manager._cloud._config == OpenStackRunnerManagerConfig( - allow_external_contributor=False, - prefix="unit_name", - credentials=OpenStackCredentials( - auth_url="auth_url", - project_name="project_name", - username="username", - password="password", - user_domain_name="user_domain_name", - project_domain_name="project_domain_name", - region_name="region", - ), - server_config=OpenStackServerConfig(image="image_id", flavor="flavor", network="network"), - service_config=SupportServiceConfig( - proxy_config=ProxyConfig( - http="http://httpproxy.example.com:3128", - https="http://httpsproxy.example.com:3128", - no_proxy="127.0.0.1", - ), - use_aproxy=False, - dockerhub_mirror="https://docker.example.com", - ssh_debug_connections=[ - SSHDebugConnection( - host=IPv4Address("10.10.10.10"), - port=3000, - rsa_fingerprint="SHA256:rsa", - ed25519_fingerprint="SHA256:ed25519", - ) - ], - ), - ) - reactive_process_config = runner_scaler._reactive_config - assert reactive_process_config - assert reactive_process_config == ReactiveProcessConfig( - queue=QueueConfig( - mongodb_uri="mongodb://user:password@localhost:27017", - queue_name="app_name", - ), - manager_name="app_name", - github_configuration=GitHubConfiguration( - token="githubtoken", path=GitHubOrg(org="canonical", group="group") - ), - cloud_runner_manager=OpenStackRunnerManagerConfig( - allow_external_contributor=False, - prefix="unit_name", - credentials=OpenStackCredentials( - auth_url="auth_url", - project_name="project_name", - username="username", - password="password", - user_domain_name="user_domain_name", - project_domain_name="project_domain_name", - region_name="region", - ), - server_config=OpenStackServerConfig( - image="image_id", flavor="flavor", network="network" - ), - service_config=SupportServiceConfig( - proxy_config=ProxyConfig( - http="http://httpproxy.example.com:3128", - https="http://httpsproxy.example.com:3128", - no_proxy="127.0.0.1", - ), - use_aproxy=False, - dockerhub_mirror="https://docker.example.com", - ssh_debug_connections=[ - SSHDebugConnection( - host=IPv4Address("10.10.10.10"), - port=3000, - rsa_fingerprint="SHA256:rsa", - ed25519_fingerprint="SHA256:ed25519", - ) - ], - ), - ), - github_token="githubtoken", - supported_labels={"label1", "arm64", "flavorlabel", "label2", "x64", "noble"}, - labels=["label1", "label2", "arm64", "noble", "flavorlabel"], - ) - - -@pytest.mark.parametrize( - "runners, expected_runner_info", - [ - pytest.param( - [], - RunnerInfo(online=0, busy=0, offline=0, unknown=0, runners=(), busy_runners=()), - id="No runners", - ), - pytest.param( - [busy_runner := RunnerInstanceFactory(platform_state=PlatformRunnerState.BUSY)], - RunnerInfo( - online=1, - busy=1, - offline=0, - unknown=0, - runners=(busy_runner.name,), - busy_runners=(busy_runner.name,), - ), - id="One busy runner", - ), - pytest.param( - [idle_runner := RunnerInstanceFactory(platform_state=PlatformRunnerState.IDLE)], - RunnerInfo( - online=1, - busy=0, - offline=0, - unknown=0, - runners=(idle_runner.name,), - busy_runners=(), - ), - id="One idle runner", - ), - pytest.param( - [offline_runner := RunnerInstanceFactory(platform_state=PlatformRunnerState.OFFLINE)], - RunnerInfo( - online=0, - busy=0, - offline=1, - unknown=0, - runners=(), - busy_runners=(), - ), - id="One offline runner", - ), - pytest.param( - [unknown_runner := RunnerInstanceFactory(platform_state=None)], - RunnerInfo( - online=0, - busy=0, - offline=0, - unknown=1, - runners=(), - busy_runners=(), - ), - id="One unknown runner", - ), - pytest.param( - [busy_runner, idle_runner, offline_runner, unknown_runner], - RunnerInfo( - online=2, - busy=1, - offline=1, - unknown=1, - runners=(busy_runner.name, idle_runner.name), - busy_runners=(busy_runner.name,), - ), - id="One runner of each type", - ), - ], -) -def test_runner_scaler_get_runner_info( - runners: list[RunnerInstance], expected_runner_info: RunnerInfo -): - """ - arrange: given a mock runner manager. - act: when RunnerScaler.get_runner_info is called. - assert the expected runner info is extracted. - """ - runner_manager = MagicMock() - runner_manager.get_runners.return_value = runners - runner_scaler = RunnerScaler( - runner_manager=runner_manager, - reactive_process_config=None, - user=MagicMock(), - base_quantity=0, - max_quantity=0, - ) - - assert runner_scaler.get_runner_info() == expected_runner_info - - -@pytest.mark.parametrize( - "cleanup_metrics, flush_metrics, expected_flushed", - [ - pytest.param({}, {}, 0, id="No changes"), - pytest.param({RunnerStart: 1}, {}, 0, id="No runner stop metrics"), - pytest.param({RunnerStop: 1}, {}, 1, id="Runner stop metric from cleanup"), - pytest.param({}, {RunnerStop: 1}, 1, id="Runner stop metric from flush"), - pytest.param( - {RunnerStop: 1}, - {RunnerStop: 1}, - 2, - id="Runner stop metrics from cleanup and flush", - ), - ], -) -def test_runner_scaler_flush_extract_metrics( - cleanup_metrics: IssuedMetricEventsStats, - flush_metrics: IssuedMetricEventsStats, - expected_flushed: int, -): - """ - arrange: given a mocked runner manager with that returns the given metrics. - act: when RunnerScaler.flush is called. - assert: the expected number of flushed runners from metrics is returned. - """ - runner_manager = MagicMock() - runner_manager.cleanup.return_value = cleanup_metrics - runner_manager.flush_runners.return_value = flush_metrics - - runner_scaler = RunnerScaler( - runner_manager=runner_manager, - reactive_process_config=None, - user=MagicMock(), - base_quantity=0, - max_quantity=0, - ) - - assert runner_scaler.flush() == expected_flushed - - -@pytest.mark.parametrize( - "flush_mode, expected_flush_mode", - [ - pytest.param(FlushMode.FLUSH_IDLE, FlushMode.FLUSH_IDLE, id="flush_idle"), - pytest.param(FlushMode.FLUSH_BUSY, FlushMode.FLUSH_BUSY, id="flush_busy"), - ], -) -def test_runner_scaler_flush_mode(flush_mode: FlushMode, expected_flush_mode: FlushMode): - """ - arrange: given a mocked runner manager. - act: when RunnerScaler.flush is called with the given flush mode. - assert: flush_runners is called with expected mode. - """ - runner_manager = MagicMock() - - RunnerScaler( - runner_manager=runner_manager, - reactive_process_config=None, - user=MagicMock(), - base_quantity=0, - max_quantity=0, - ).flush(flush_mode=flush_mode) - - runner_manager.flush_runners.assert_called_with(flush_mode=expected_flush_mode) - - -@pytest.mark.parametrize( - "runners, quantity, expected_diff", - [ - pytest.param([], 0, 0, id="no difference"), - pytest.param([], 1, 1, id="scale up one runner"), - pytest.param([RunnerInstanceFactory()], 0, -1, id="scale down one runner"), - ], -) -def test_runner_scaler__reconcile_non_reactive( - runners: list[RunnerInstance], quantity: int, expected_diff: int -): - """ - arrange: given a mocked runner manager. - act: when RunnerScaler._reconcile_non_reactive is called. - assert: expected runner diff is returned. - """ - runner_manager = MagicMock() - runner_manager.get_runners.return_value = runners - - result = RunnerScaler( - runner_manager=runner_manager, - reactive_process_config=None, - user=MagicMock(), - base_quantity=0, - max_quantity=0, - )._reconcile_non_reactive(expected_quantity=quantity) - - assert result.runner_diff == expected_diff diff --git a/src/charm.py b/src/charm.py index 427884ccef..8d9f7bf7b0 100755 --- a/src/charm.py +++ b/src/charm.py @@ -22,7 +22,6 @@ from typing import Any, Callable, Sequence, TypeVar import ops -from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires from charms.grafana_agent.v0.cos_agent import COSAgentProvider from charms.operator_libs_linux.v1 import systemd from github_runner_manager import constants @@ -50,7 +49,6 @@ DEBUG_SSH_INTEGRATION_NAME, IMAGE_INTEGRATION_NAME, LABELS_CONFIG_NAME, - MONGO_DB_INTEGRATION_NAME, PATH_CONFIG_NAME, PLANNER_INTEGRATION_NAME, TOKEN_CONFIG_NAME, @@ -65,7 +63,6 @@ ImageIntegrationMissingError, ImageNotFoundError, LogrotateSetupError, - MissingMongoDBError, RunnerManagerApplicationError, RunnerManagerApplicationInstallError, RunnerManagerServiceError, @@ -73,10 +70,6 @@ ) from manager_client import GitHubRunnerManagerClient -# This is currently hardcoded and may be moved to a config option in the future. -REACTIVE_MQ_DB_NAME = "github-runner-webhook-router" - - ACTIVE_STATUS_RECONCILIATION_FAILED_MSG = "Last reconciliation failed." FAILED_TO_RECONCILE_RUNNERS_MSG = "Failed to reconcile runners" FAILED_RECONCILE_ACTION_ERR_MSG = ( @@ -126,9 +119,6 @@ def func_with_catch_errors(self: "GithubRunnerCharm", event: EventT) -> None: except TokenError as err: logger.exception("Issue with GitHub token") self.unit.status = BlockedStatus(str(err)) - except MissingMongoDBError as err: - logger.exception("Missing integration data") - self.unit.status = WaitingStatus(str(err)) except ImageIntegrationMissingError: logger.exception("Missing image integration.") self.unit.status = BlockedStatus("Please provide image integration.") @@ -246,14 +236,6 @@ def __init__(self, *args: Any, **kwargs: Any) -> None: self.framework.observe(self.on.check_runners_action, self._on_check_runners_action) self.framework.observe(self.on.flush_runners_action, self._on_flush_runners_action) self.framework.observe(self.on.update_status, self._on_update_status) - self.database = DatabaseRequires( - self, relation_name="mongodb", database_name=REACTIVE_MQ_DB_NAME - ) - self.framework.observe(self.database.on.database_created, self._on_database_created) - self.framework.observe(self.database.on.endpoints_changed, self._on_endpoints_changed) - self.framework.observe( - self.on[MONGO_DB_INTEGRATION_NAME].relation_broken, self._on_mongodb_relation_broken - ) self._manager_client = GitHubRunnerManagerClient( host=manager_service.GITHUB_RUNNER_MANAGER_ADDRESS, @@ -270,7 +252,7 @@ def _setup_state(self) -> CharmState: The charm state. """ try: - return CharmState.from_charm(charm=self, database=self.database) + return CharmState.from_charm(charm=self) except CharmConfigInvalidError as exc: raise ConfigurationError(exc.msg) from exc @@ -540,24 +522,6 @@ def _on_planner_relation_broken(self, _: ops.RelationBrokenEvent) -> None: self._reconcile(state) self.unit.status = ActiveStatus() - @catch_charm_errors - def _on_database_created(self, _: ops.RelationEvent) -> None: - """Handle the MongoDB database created event.""" - state = self._setup_state() - self._reconcile(state) - - @catch_charm_errors - def _on_endpoints_changed(self, _: ops.RelationEvent) -> None: - """Handle the MongoDB endpoints changed event.""" - state = self._setup_state() - self._reconcile(state) - - @catch_charm_errors - def _on_mongodb_relation_broken(self, _: ops.RelationDepartedEvent) -> None: - """Handle the MongoDB relation broken event.""" - state = self._setup_state() - self._reconcile(state) - def _check_image_ready(self) -> None: """Check if image is ready raises error if not. diff --git a/src/charm_state.py b/src/charm_state.py index b8d96133c3..f0e55faed4 100644 --- a/src/charm_state.py +++ b/src/charm_state.py @@ -13,20 +13,12 @@ from urllib.parse import urlsplit import yaml -from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires from github_runner_manager.configuration import ProxyConfig, SSHDebugConnection from github_runner_manager.configuration.github import GitHubPath, parse_github_path from ops import CharmBase from ops.model import SecretNotFoundError -from pydantic import ( - BaseModel, - MongoDsn, - ValidationError, - create_model_from_typeddict, - validator, -) - -from errors import MissingMongoDBError +from pydantic import BaseModel, ValidationError, create_model_from_typeddict, validator + from models import AnyHttpsUrl, FlavorLabel, OpenStackCloudsYAML from utilities import get_env_var @@ -51,7 +43,6 @@ PATH_CONFIG_NAME = "path" RECONCILE_INTERVAL_CONFIG_NAME = "reconcile-interval" RUNNER_HTTP_PROXY_CONFIG_NAME = "runner-http-proxy" -SENSITIVE_PLACEHOLDER = "*****" TEST_MODE_CONFIG_NAME = "test-mode" # bandit thinks this is a hardcoded password. TOKEN_CONFIG_NAME = "token" # nosec @@ -67,7 +58,6 @@ COS_AGENT_INTEGRATION_NAME = "cos-agent" DEBUG_SSH_INTEGRATION_NAME = "debug-ssh" IMAGE_INTEGRATION_NAME = "image" -MONGO_DB_INTEGRATION_NAME = "mongodb" PLANNER_INTEGRATION_NAME = "planner" # Keys and defaults for planner relation app data bag @@ -323,35 +313,6 @@ def _parse_openstack_clouds_config(cls, charm: CharmBase) -> OpenStackCloudsYAML return openstack_clouds_yaml - @validator("reconcile_interval") - @classmethod - def check_reconcile_interval(cls, reconcile_interval: int) -> int: - """Validate the general charm configuration. - - Args: - reconcile_interval: The value of reconcile_interval passed to class instantiation. - - Raises: - ValueError: if an invalid reconcile_interval value of less than 2 has been passed. - - Returns: - The validated reconcile_interval value. - """ - # The reconcile_interval should be at least 2. - # Due to possible race condition with the message acknowledgement with job status checking - # in reactive process. - if reconcile_interval < 2: - logger.error( - "The %s configuration must be greater than or equal to 1", - RECONCILE_INTERVAL_CONFIG_NAME, - ) - raise ValueError( - f"The {RECONCILE_INTERVAL_CONFIG_NAME} configuration needs to be greater or equal" - " to 1" - ) - - return reconcile_interval - @staticmethod def _parse_list(input_: str | list[str] | None) -> list[str]: """Split a comma-separated list of strings into a list of strings. @@ -497,6 +458,10 @@ def from_charm(cls, charm: CharmBase) -> "CharmConfig": raise CharmConfigInvalidError( f"The {RECONCILE_INTERVAL_CONFIG_NAME} config must be int" ) from err + if reconcile_interval < 1: + raise CharmConfigInvalidError( + f"The {RECONCILE_INTERVAL_CONFIG_NAME} config must be greater than or equal to 1" + ) dockerhub_mirror = cast(str, charm.config.get(DOCKERHUB_MIRROR_CONFIG_NAME, "")) or None openstack_clouds_yaml = cls._parse_openstack_clouds_config(charm) @@ -585,8 +550,7 @@ class OpenstackRunnerConfig(BaseModel): Attributes: base_virtual_machines: Number of virtual machine-based runners to spawn. - max_total_virtual_machines: Maximum possible machine number to spawn for the unit in - for reactive processes. + max_total_virtual_machines: Maximum possible machine number to spawn for the unit. flavor_label_combinations: list of FlavorLabel. openstack_network: Network on openstack to use for virtual machines. openstack_image: Openstack image to use for virtual machines. @@ -770,49 +734,6 @@ def _build_planner_config_from_charm(charm: CharmBase) -> PlannerConfig | None: return None -class ReactiveConfig(BaseModel): - """Represents the configuration for reactive scheduling. - - Attributes: - mq_uri: The URI of the MQ to use to spawn runners reactively. - """ - - mq_uri: MongoDsn - - @classmethod - def from_database(cls, database: DatabaseRequires) -> "ReactiveConfig | None": - """Initialize the ReactiveConfig from charm config and integration data. - - Args: - database: The database to fetch integration data from. - - Returns: - The connection information for the reactive MQ or None if not available. - - Raises: - MissingMongoDBError: If the information on howto access MongoDB - is missing in the integration data. - """ - integration_existing = bool(database.relations) - - if not integration_existing: - return None - - uri_field = "uris" # the field is called uris though it's a single uri - relation_data = list(database.fetch_relation_data(fields=[uri_field]).values()) - - # There can be only one database integrated at a time - # with the same interface name. See: metadata.yaml - data = relation_data[0] - - if uri_field in data: - return ReactiveConfig(mq_uri=data[uri_field]) - - raise MissingMongoDBError( - f"Missing {uri_field} for {MONGO_DB_INTEGRATION_NAME} integration" - ) - - # Charm State is a list of all the configurations and states of the charm and # has therefore a lot of attributes. @dataclasses.dataclass(frozen=True) @@ -824,7 +745,6 @@ class CharmState: # pylint: disable=too-many-instance-attributes is_metrics_logging_available: Whether the charm is able to issue metrics. proxy_config: Proxy-related configuration. runner_proxy_config: Proxy-related configuration for the runner. - reactive_config: The charm configuration related to reactive spawning mode. runner_config: The charm configuration related to runner VM configuration. ssh_debug_connections: SSH debug connections configuration information. planner_config: Planner endpoint and token from relation data. @@ -835,7 +755,6 @@ class CharmState: # pylint: disable=too-many-instance-attributes runner_proxy_config: ProxyConfig charm_config: CharmConfig runner_config: OpenstackRunnerConfig - reactive_config: ReactiveConfig | None ssh_debug_connections: list[SSHDebugConnection] planner_config: PlannerConfig | None @@ -851,8 +770,6 @@ def _store_state(cls, state: "CharmState") -> None: state_dict["proxy_config"] = json.loads(state_dict["proxy_config"].json()) state_dict["runner_proxy_config"] = json.loads(state_dict["runner_proxy_config"].json()) state_dict["charm_config"] = json.loads(state_dict["charm_config"].json()) - if state.reactive_config: - state_dict["reactive_config"] = json.loads(state_dict["reactive_config"].json()) state_dict["runner_config"] = json.loads(state_dict["runner_config"].json()) state_dict["ssh_debug_connections"] = [ debug_info.json() for debug_info in state_dict["ssh_debug_connections"] @@ -860,42 +777,14 @@ def _store_state(cls, state: "CharmState") -> None: json_data = json.dumps(state_dict, ensure_ascii=False) CHARM_STATE_PATH.write_text(json_data, encoding="utf-8") - @classmethod - def _log_prev_state(cls, prev_state_dict: dict) -> None: - """Log the previous state of the charm. - - Replace sensitive information before logging. - - Args: - prev_state_dict: The previous state of the charm as a dict. - """ - if logger.isEnabledFor(logging.DEBUG): - prev_state_for_logging = prev_state_dict.copy() - charm_config = prev_state_for_logging.get("charm_config") - if charm_config and "token" in charm_config: - charm_config = charm_config.copy() - charm_config["token"] = SENSITIVE_PLACEHOLDER # nosec - prev_state_for_logging["charm_config"] = charm_config - - reactive_config = prev_state_for_logging.get("reactive_config") - if reactive_config and "mq_uri" in reactive_config: - reactive_config = reactive_config.copy() - reactive_config["mq_uri"] = "*****" - prev_state_for_logging["reactive_config"] = reactive_config - - logger.debug("Previous charm state: %s", prev_state_for_logging) - # Ignore the flake8 function too complex (C901). The function does not have much logic, the # lint is likely triggered with the multiple try-excepts, which are needed. @classmethod - def from_charm( # noqa: C901 - cls, charm: CharmBase, database: DatabaseRequires - ) -> "CharmState": + def from_charm(cls, charm: CharmBase) -> "CharmState": # noqa: C901 """Initialize the state from charm. Args: charm: The charm instance. - database: The database instance. Raises: CharmConfigInvalidError: If an invalid configuration was set. @@ -938,7 +827,6 @@ def from_charm( # noqa: C901 raise CharmConfigInvalidError("Invalid SSH Debug info") from exc planner_config = _build_planner_config_from_charm(charm) - reactive_config = ReactiveConfig.from_database(database) state = cls( is_metrics_logging_available=bool(charm.model.relations[COS_AGENT_INTEGRATION_NAME]), @@ -946,7 +834,6 @@ def from_charm( # noqa: C901 runner_proxy_config=runner_proxy_config, charm_config=charm_config, runner_config=runner_config, - reactive_config=reactive_config, ssh_debug_connections=ssh_debug_connections, planner_config=planner_config, ) diff --git a/src/errors.py b/src/errors.py index 1c1f270880..1707e318ce 100644 --- a/src/errors.py +++ b/src/errors.py @@ -18,10 +18,6 @@ class ConfigurationError(Exception): """Error for juju configuration.""" -class MissingMongoDBError(Exception): - """Error for missing integration data.""" - - class SubprocessError(Exception): """Error for Subprocess calls. diff --git a/src/factories.py b/src/factories.py index 85f13b832f..0fdcd99867 100644 --- a/src/factories.py +++ b/src/factories.py @@ -5,20 +5,15 @@ import logging -from github_runner_manager import constants from github_runner_manager.configuration import ( ApplicationConfiguration, Flavor, Image, - NonReactiveCombination, - NonReactiveConfiguration, - QueueConfig, - ReactiveConfiguration, + RunnerCombination, + RunnerConfiguration, SupportServiceConfig, - UserInfo, ) from github_runner_manager.configuration.github import GitHubConfiguration -from github_runner_manager.manager.runner_scaler import RunnerScaler from github_runner_manager.openstack_cloud.configuration import ( OpenStackConfiguration, OpenStackCredentials, @@ -29,26 +24,6 @@ logger = logging.getLogger(__name__) -def create_runner_scaler(state: CharmState, app_name: str, unit_name: str) -> RunnerScaler: - """Get runner scaler instance for scaling runners. - - Args: - state: The CharmState. - app_name: Name of the application. - unit_name: Unit name for the prefix for instances. - - Returns: - An instance of RunnerScaler. - """ - application_configuration = create_application_configuration(state, app_name, unit_name) - user = UserInfo(constants.RUNNER_MANAGER_USER, constants.RUNNER_MANAGER_GROUP) - - return RunnerScaler.build( - application_configuration=application_configuration, - user=user, - ) - - def create_application_configuration( state: CharmState, app_name: str, unit_name: str ) -> ApplicationConfiguration: @@ -83,8 +58,7 @@ def create_application_configuration( aproxy_redirect_ports=state.charm_config.aproxy_redirect_ports, custom_pre_job_script=state.charm_config.custom_pre_job_script, ) - non_reactive_configuration = _get_non_reactive_configuration(state) - reactive_configuration = _get_reactive_configuration(state, app_name) + runner_configuration = _get_runner_configuration(state) openstack_configuration = create_openstack_configuration(state, unit_name) return ApplicationConfiguration( allow_external_contributor=state.charm_config.allow_external_contributor, @@ -92,8 +66,7 @@ def create_application_configuration( extra_labels=extra_labels, github_config=github_configuration, service_config=service_config, - non_reactive_configuration=non_reactive_configuration, - reactive_configuration=reactive_configuration, + runner_configuration=runner_configuration, openstack_configuration=openstack_configuration, planner_url=state.planner_config.endpoint if state.planner_config else None, planner_token=state.planner_config.token if state.planner_config else None, @@ -101,8 +74,8 @@ def create_application_configuration( ) -def _get_non_reactive_configuration(state: CharmState) -> NonReactiveConfiguration: - """Get NonReactiveConfiguration from CharmState. +def _get_runner_configuration(state: CharmState) -> RunnerConfiguration: + """Get RunnerConfiguration from CharmState. Currently only one image and one flavor is supported. """ @@ -125,51 +98,14 @@ def _get_non_reactive_configuration(state: CharmState) -> NonReactiveConfigurati ), ) combinations = [ - NonReactiveCombination( + RunnerCombination( image=image, flavor=flavor, base_virtual_machines=state.runner_config.base_virtual_machines, max_total_virtual_machines=state.runner_config.max_total_virtual_machines, ) ] - return NonReactiveConfiguration(combinations=combinations) - - -def _get_reactive_configuration(state: CharmState, app_name: str) -> ReactiveConfiguration: - """Get ReactiveConfiguration from CharmState and app_name. - - Currently only one image and one flavor is supported. - """ - if not state.reactive_config: - return None - reactive_config = state.reactive_config - openstack_image = state.runner_config.openstack_image - image_labels = [] - images = [] - flavors = [] - if openstack_image and openstack_image.id: - if openstack_image.tags: - image_labels = openstack_image.tags - image = Image( - name=openstack_image.id, - labels=image_labels, - ) - flavor = Flavor( - name=state.runner_config.flavor_label_combinations[0].flavor, - labels=( - [] - if not state.runner_config.flavor_label_combinations[0].label - else [state.runner_config.flavor_label_combinations[0].label] - ), - ) - images = [image] - flavors = [flavor] - return ReactiveConfiguration( - queue=QueueConfig(mongodb_uri=reactive_config.mq_uri, queue_name=app_name), - max_total_virtual_machines=state.runner_config.max_total_virtual_machines, - images=images, - flavors=flavors, - ) + return RunnerConfiguration(combinations=combinations) def create_openstack_configuration(state: CharmState, unit_name: str) -> OpenStackConfiguration: diff --git a/src/grafana_dashboards/metrics.json b/src/grafana_dashboards/metrics.json index 7b9bcaa67e..973bd269b1 100644 --- a/src/grafana_dashboards/metrics.json +++ b/src/grafana_dashboards/metrics.json @@ -140,7 +140,7 @@ "type": "loki", "uid": "${lokids}" }, - "description": "Shows the number of available (and max expected) runners per application. Note: Available equals idle after reconciliation and is not a real-time metric as it is only updated after each reconciliation interval for each unit.\n\nNote that there is no maximum expected number of reactive runners.", + "description": "Shows the number of available (and max expected) runners per application. Note: Available equals idle after reconciliation and is not a real-time metric as it is only updated after each reconciliation interval for each unit.", "fieldConfig": { "defaults": { "color": { @@ -651,7 +651,7 @@ "type": "loki", "uid": "${lokids}" }, - "description": "This panel totals the number of active/idle/total expected runners reported by charm units after reconciliation. Note that this is not a real-time metric and should only be used to identify trends or to investigate problems. A given point in time shows the aggregation of the last reported values from the reconciliation. This means, for example, that if the graph shows 10 idle runners at a particular time, it doesn't mean that there are 10 idle runners at that time, just that this is the most recently reported value. Also note that runners who are reported as idle at the time of reconciliation may become active immediately afterwards. And active runners may become offline/inactive.\n\nThere is no expected number of runners for runners in reactive mode, which means that the total number of idle + active runners could be greater than expected if reactive applications are not filtered out in this view.", + "description": "This panel totals the number of active/idle/total expected runners reported by charm units after reconciliation. Note that this is not a real-time metric and should only be used to identify trends or to investigate problems. A given point in time shows the aggregation of the last reported values from the reconciliation. This means, for example, that if the graph shows 10 idle runners at a particular time, it doesn't mean that there are 10 idle runners at that time, just that this is the most recently reported value. Also note that runners who are reported as idle at the time of reconciliation may become active immediately afterwards. And active runners may become offline/inactive.", "fieldConfig": { "defaults": { "color": { diff --git a/src/logrotate.py b/src/logrotate.py index bc7cc53c1d..cfb73d9044 100644 --- a/src/logrotate.py +++ b/src/logrotate.py @@ -8,7 +8,6 @@ from charms.operator_libs_linux.v1 import systemd from github_runner_manager.metrics.events import get_metrics_log_path -from github_runner_manager.reactive.process_manager import REACTIVE_RUNNER_LOG_DIR from pydantic import BaseModel from errors import LogrotateSetupError @@ -68,16 +67,6 @@ class LogrotateConfig(BaseModel): ) -REACTIVE_LOGROTATE_CONFIG = LogrotateConfig( - name="reactive-runner", - log_path_glob_pattern=f"{REACTIVE_RUNNER_LOG_DIR}/*.log", - rotate=0, - create=False, - notifempty=False, - frequency=LogrotateFrequency.DAILY, - copytruncate=False, -) - GITHUB_RUNNER_MANAGER_CONFIG = LogrotateConfig( name="github-runner-manager", log_path_glob_pattern=f"{GITHUB_RUNNER_MANAGER_SERVICE_LOG_DIR}/*.log", @@ -122,7 +111,6 @@ def _enable() -> None: def _configure() -> None: """Configure logrotate.""" - _write_config(REACTIVE_LOGROTATE_CONFIG) _write_config(METRICS_LOGROTATE_CONFIG) _write_config(GITHUB_RUNNER_MANAGER_CONFIG) diff --git a/terraform/product/README.md b/terraform/product/README.md index b1b654f034..9dd7242081 100644 --- a/terraform/product/README.md +++ b/terraform/product/README.md @@ -66,6 +66,5 @@ the bundle deployment onto any Kubernetes environment managed by [Juju][Juju]. | [all_app_names](#output_all_app_names) | Names of the all the deployed apps, github-runner plus github-runner-image-builder. | | [all_runner_names](#output_all_runner_names) | Names of the all the deployed github-runner applications. | | [github_runner_image_builder_app_name](#output_github_runner_image_builder_app_name) | Name of the the deployed github-runner-image-builder application. | -| [reactive_runner_names](#output_reactive_runner_names) | Names of the all the deployed github-runner applications that are reactive. | diff --git a/terraform/product/outputs.tf b/terraform/product/outputs.tf index 93dcb8c0ab..deac1bfa34 100644 --- a/terraform/product/outputs.tf +++ b/terraform/product/outputs.tf @@ -6,11 +6,6 @@ output "github_runner_image_builder_app_name" { value = module.github_runner_image_builder.app_name } -output "reactive_runner_names" { - description = "Names of the all the deployed github-runner applications that are reactive." - value = [for github_runner in var.github_runners : github_runner.app_name if lookup(github_runner.config, "max-total-virtual-machines", 0) > 0] -} - output "all_runner_names" { description = "Names of the all the deployed github-runner applications." value = [for github_runner in values(module.github_runner) : github_runner.app_name] diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 0ca9785521..3dca261d80 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -176,9 +176,6 @@ def complete_charm_state_fixture(): tags=["arm64", "noble"], ), ), - reactive_config=charm_state.ReactiveConfig( - mq_uri="mongodb://user:password@localhost:27017", - ), ssh_debug_connections=[ charm_state.SSHDebugConnection( host="10.10.10.10", diff --git a/tests/unit/factories.py b/tests/unit/factories.py index 6d2fbe4aa8..c5b965aeb3 100644 --- a/tests/unit/factories.py +++ b/tests/unit/factories.py @@ -27,7 +27,6 @@ GROUP_CONFIG_NAME, LABELS_CONFIG_NAME, MAX_TOTAL_VIRTUAL_MACHINES_CONFIG_NAME, - MONGO_DB_INTEGRATION_NAME, OPENSTACK_CLOUDS_YAML_CONFIG_NAME, OPENSTACK_FLAVOR_CONFIG_NAME, OPENSTACK_NETWORK_CONFIG_NAME, @@ -111,7 +110,6 @@ class Meta: { COS_AGENT_INTEGRATION_NAME: [], DEBUG_SSH_INTEGRATION_NAME: [], - MONGO_DB_INTEGRATION_NAME: [], PLANNER_INTEGRATION_NAME: [], } ) diff --git a/tests/unit/test_charm.py b/tests/unit/test_charm.py index c0f18a5d0c..625c1bc9ed 100644 --- a/tests/unit/test_charm.py +++ b/tests/unit/test_charm.py @@ -16,7 +16,7 @@ import yaml from github_runner_manager import constants from github_runner_manager.platform.platform_provider import TokenError -from ops.model import BlockedStatus, StatusBase, WaitingStatus +from ops.model import BlockedStatus, StatusBase from ops.testing import Harness from charm import ( @@ -30,7 +30,6 @@ FLAVOR_LABEL_COMBINATIONS_CONFIG_NAME, IMAGE_INTEGRATION_NAME, LABELS_CONFIG_NAME, - MONGO_DB_INTEGRATION_NAME, OPENSTACK_CLOUDS_YAML_CONFIG_NAME, OPENSTACK_FLAVOR_CONFIG_NAME, PATH_CONFIG_NAME, @@ -52,7 +51,6 @@ ImageIntegrationMissingError, ImageNotFoundError, LogrotateSetupError, - MissingMongoDBError, RunnerError, SubprocessError, ) @@ -363,22 +361,6 @@ def test_on_install_failure( assert "mock stderr" in str(exc.value) -def test_charm_goes_into_waiting_state_on_missing_integration_data( - monkeypatch: pytest.MonkeyPatch, harness: Harness -): - """ - arrange: Mock charm._setup_state to raise an MissingIntegrationDataError. - act: Fire config changed event. - assert: Charm is in blocked state. - """ - setup_state_mock = MagicMock(side_effect=MissingMongoDBError("mock error")) - monkeypatch.setattr(GithubRunnerCharm, "_setup_state", setup_state_mock) - harness.update_config({PATH_CONFIG_NAME: "mockorg/repo", TOKEN_CONFIG_NAME: "mocktoken"}) - harness.charm.on.config_changed.emit() - assert isinstance(harness.charm.unit.status, WaitingStatus) - assert "mock error" in harness.charm.unit.status.message - - def test_check_runners_action_with_errors(): mock_event = MagicMock() @@ -685,34 +667,6 @@ def test_attempting_disable_legacy_service_for_upgrade( ) -@pytest.mark.parametrize( - "hook", - [ - pytest.param("database_created", id="Database Created"), - pytest.param("endpoints_changed", id="Endpoints Changed"), - pytest.param("mongodb_relation_broken", id="MongoDB Relation Departed"), - ], -) -def test_database_integration_events_setup_service( - hook: str, monkeypatch: pytest.MonkeyPatch, harness: Harness -): - """ - arrange: Mock charm._setup_service. - act: Fire mongodb relation events. - assert: _setup_service has been called. - """ - setup_service_mock = MagicMock() - relation_mock = MagicMock() - relation_mock.name = "mongodb" - relation_mock.id = 0 - monkeypatch.setattr("charm.GithubRunnerCharm._setup_service", setup_service_mock) - if hook.startswith(MONGO_DB_INTEGRATION_NAME): - getattr(harness.charm.on, hook).emit(relation=relation_mock) - else: - getattr(harness.charm.database.on, hook).emit(relation=relation_mock) - setup_service_mock.assert_called_once() - - def test_planner_relation_changed_writes_flavor(monkeypatch: pytest.MonkeyPatch): """ arrange: Set up charm with mocked _setup_state and _setup_service. diff --git a/tests/unit/test_charm_state.py b/tests/unit/test_charm_state.py index cc53dfe716..5beee9a0a6 100644 --- a/tests/unit/test_charm_state.py +++ b/tests/unit/test_charm_state.py @@ -1,13 +1,11 @@ # Copyright 2026 Canonical Ltd. # See LICENSE file for licensing details. import json -import logging import secrets from unittest.mock import MagicMock import pytest import yaml -from charms.data_platform_libs.v0.data_interfaces import DatabaseRequires from github_runner_manager.configuration.github import GitHubOrg, GitHubRepo from pydantic import BaseModel from pydantic.error_wrappers import ValidationError @@ -51,7 +49,6 @@ SSHDebugConnection, _build_planner_config_from_charm, ) -from errors import MissingMongoDBError from tests.unit.factories import MockGithubRunnerCharmFactory @@ -282,33 +279,6 @@ def test_parse_openstack_clouds_config_valid(valid_yaml_config: str): assert "clouds" in result -@pytest.mark.parametrize("reconcile_interval", [(0), (1)]) -def test_check_reconcile_interval_invalid(reconcile_interval: int): - """ - arrange: Provide an invalid reconcile interval value. - act: Call check_reconcile_interval method with the provided value. - assert: Verify that the method raises ValueError with the correct message. - """ - with pytest.raises(ValueError) as exc_info: - CharmConfig.check_reconcile_interval(reconcile_interval) - assert ( - str(exc_info.value) - == "The reconcile-interval configuration needs to be greater or equal to 1" - ) - - -@pytest.mark.parametrize("reconcile_interval", [(2), (5), (10)]) -def test_check_reconcile_interval_valid(reconcile_interval: int): - """ - arrange: Provide a valid reconcile interval value. - act: Call check_reconcile_interval method with the provided value. - assert: Verify that the method returns the same value. - """ - result = CharmConfig.check_reconcile_interval(reconcile_interval) - - assert result == reconcile_interval - - def test_charm_config_from_charm_invalid_reconcile_interval(): """ arrange: Create a mock CharmBase instance with an invalid reconcile interval. @@ -323,6 +293,19 @@ def test_charm_config_from_charm_invalid_reconcile_interval(): assert str(exc_info.value) == "The reconcile-interval config must be int" +def test_charm_config_from_charm_reconcile_interval_too_low(): + """ + arrange: Create a mock CharmBase instance with a reconcile interval below 1. + act: Call from_charm. + assert: CharmConfigInvalidError is raised. + """ + mock_charm = MockGithubRunnerCharmFactory() + mock_charm.config[RECONCILE_INTERVAL_CONFIG_NAME] = "0" + + with pytest.raises(CharmConfigInvalidError, match="greater than or equal to 1"): + CharmConfig.from_charm(mock_charm) + + def test_charm_config_from_charm_invalid_labels(): """ arrange: Create a mock CharmBase instance with an invalid reconcile interval. @@ -573,81 +556,6 @@ def test_ssh_debug_connection_from_charm(use_runner_http_proxy: bool): assert connections[0].use_runner_http_proxy == use_runner_http_proxy -def test_reactive_config_from_charm(): - """ - arrange: Mock CharmBase instance with relation data and config option set. - act: Call ReactiveConfig.from_charm method. - assert: Verify that the method returns the expected object. - """ - mongodb_uri = "mongodb://user:password@localhost:27017" - mock_charm = MockGithubRunnerCharmFactory() - relation_mock = MagicMock() - app_mock = MagicMock() - relation_mock.app = app_mock - mock_charm.app = app_mock - relation_mock.data = { - app_mock: { - "uris": mongodb_uri, - }, - mock_charm.model.unit: {}, - } - mock_charm.model.relations[charm_state.MONGO_DB_INTEGRATION_NAME] = [relation_mock] - database = DatabaseRequires( - mock_charm, relation_name=charm_state.MONGO_DB_INTEGRATION_NAME, database_name="test" - ) - - connection_info = charm_state.ReactiveConfig.from_database(database) - - assert isinstance(connection_info, charm_state.ReactiveConfig) - assert connection_info.mq_uri == mongodb_uri - - -def test_reactive_config_from_database_returns_none(): - """ - arrange: Mock CharmBase instance without relation data. - act: Call ReactiveConfig.from_database method. - assert: None is returned. - """ - mock_charm = MockGithubRunnerCharmFactory() - mock_charm.model.relations[charm_state.MONGO_DB_INTEGRATION_NAME] = [] - - database = DatabaseRequires( - mock_charm, relation_name=charm_state.MONGO_DB_INTEGRATION_NAME, database_name="test" - ) - - connection_info = charm_state.ReactiveConfig.from_database(database) - - assert connection_info is None - - -def test_reactive_config_from_database_integration_data_missing(): - """ - arrange: Mock CharmBase instance with relation but without data and with config option set. - act: Call ReactiveConfig.from_charm method. - assert: IntegrationDataMissingError is raised. - """ - mock_charm = MockGithubRunnerCharmFactory() - relation_mock = MagicMock() - app_mock = MagicMock() - relation_mock.app = app_mock - relation_mock.data = { - app_mock: {}, - mock_charm.model.unit: {}, - } - mock_charm.model.relations[charm_state.MONGO_DB_INTEGRATION_NAME] = [relation_mock] - - database = DatabaseRequires( - mock_charm, relation_name=charm_state.MONGO_DB_INTEGRATION_NAME, database_name="test" - ) - - with pytest.raises(MissingMongoDBError) as exc: - charm_state.ReactiveConfig.from_database(database) - - assert f"Missing uris for {charm_state.MONGO_DB_INTEGRATION_NAME} integration" in str( - exc.value - ) - - @pytest.fixture def mock_charm_state_path(): """Fixture to mock CHARM_STATE_PATH.""" @@ -662,7 +570,6 @@ def mock_charm_state_data(): "is_metrics_logging_available": True, "proxy_config": {"http": "http://example.com", "https": "https://example.com"}, "charm_config": {"token": secrets.token_hex(16)}, - "reactive_config": {"uri": "mongodb://user:password@localhost:27017"}, "runner_config": { "virtual_machines": 2, }, @@ -695,7 +602,6 @@ def test_charm_state_from_charm_invalid_cases( assert: Ensure CharmConfigInvalidError is raised with the appropriate message. """ mock_charm = MockGithubRunnerCharmFactory() - mock_database = MagicMock(spec=DatabaseRequires) monkeypatch.setattr("charm_state.build_proxy_config_from_charm", MagicMock()) mock_charm_config = MagicMock() mock_charm_config.openstack_clouds_yaml = None @@ -707,7 +613,7 @@ def test_charm_state_from_charm_invalid_cases( monkeypatch.setattr(module, target, MagicMock(side_effect=exc)) with pytest.raises(CharmConfigInvalidError): - CharmState.from_charm(mock_charm, mock_database) + CharmState.from_charm(mock_charm) def test_charm_state_from_charm(monkeypatch: pytest.MonkeyPatch): @@ -717,17 +623,15 @@ def test_charm_state_from_charm(monkeypatch: pytest.MonkeyPatch): assert: Ensure no errors are raised. """ mock_charm = MockGithubRunnerCharmFactory() - mock_database = MagicMock(spec=DatabaseRequires) monkeypatch.setattr("charm_state.build_proxy_config_from_charm", MagicMock()) monkeypatch.setattr(CharmConfig, "from_charm", MagicMock()) monkeypatch.setattr(OpenstackRunnerConfig, "from_charm", MagicMock()) - monkeypatch.setattr(charm_state, "ReactiveConfig", MagicMock()) monkeypatch.setattr("charm_state._build_ssh_debug_connection_from_charm", MagicMock()) monkeypatch.setattr(json, "loads", MagicMock()) monkeypatch.setattr(json, "dumps", MagicMock()) monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock()) - assert CharmState.from_charm(mock_charm, mock_database) + assert CharmState.from_charm(mock_charm) def test_planner_config_from_charm_extracts_endpoint_token(): @@ -810,16 +714,14 @@ def test_parse_virtual_machine_numbers( """ mock_charm = MockGithubRunnerCharmFactory() monkeypatch.setattr(OpenstackImage, "from_charm", MagicMock()) - monkeypatch.setattr(charm_state, "ReactiveConfig", MagicMock()) monkeypatch.setattr(json, "loads", MagicMock()) monkeypatch.setattr(json, "dumps", MagicMock()) monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock()) - mock_database = MagicMock(spec=DatabaseRequires) mock_charm.config[VIRTUAL_MACHINES_CONFIG_NAME] = virtual_machines mock_charm.config[BASE_VIRTUAL_MACHINES_CONFIG_NAME] = base_virtual_machines mock_charm.config[MAX_TOTAL_VIRTUAL_MACHINES_CONFIG_NAME] = max_total_virtual_machines - state = CharmState.from_charm(mock_charm, mock_database) + state = CharmState.from_charm(mock_charm) assert state.runner_config.base_virtual_machines == expected_base assert state.runner_config.max_total_virtual_machines == expected_max @@ -868,17 +770,15 @@ def test_error_parse_virtual_machine_numbers( """ mock_charm = MockGithubRunnerCharmFactory() monkeypatch.setattr(OpenstackImage, "from_charm", MagicMock()) - monkeypatch.setattr(charm_state, "ReactiveConfig", MagicMock()) monkeypatch.setattr(json, "loads", MagicMock()) monkeypatch.setattr(json, "dumps", MagicMock()) monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock()) - mock_database = MagicMock(spec=DatabaseRequires) mock_charm.config[VIRTUAL_MACHINES_CONFIG_NAME] = virtual_machines mock_charm.config[BASE_VIRTUAL_MACHINES_CONFIG_NAME] = base_virtual_machines mock_charm.config[MAX_TOTAL_VIRTUAL_MACHINES_CONFIG_NAME] = max_total_virtual_machines with pytest.raises(CharmConfigInvalidError) as exc_info: - _ = CharmState.from_charm(mock_charm, mock_database) + _ = CharmState.from_charm(mock_charm) assert expected_error_message in str(exc_info.value) @@ -919,7 +819,6 @@ def test_parse_flavor_config( """ mock_charm = MockGithubRunnerCharmFactory() monkeypatch.setattr(OpenstackImage, "from_charm", MagicMock()) - monkeypatch.setattr(charm_state, "ReactiveConfig", MagicMock()) monkeypatch.setattr(json, "loads", MagicMock()) monkeypatch.setattr(json, "dumps", MagicMock()) monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock()) @@ -927,8 +826,7 @@ def test_parse_flavor_config( mock_charm.config[FLAVOR_LABEL_COMBINATIONS_CONFIG_NAME] = flavor_label_combinations mock_charm.config[LABELS_CONFIG_NAME] = labels - mock_database = MagicMock(spec=DatabaseRequires) - state = CharmState.from_charm(mock_charm, mock_database) + state = CharmState.from_charm(mock_charm) assert state.charm_config.labels == expected_labels assert state.runner_config.flavor_label_combinations == expected_flavor_label_combinations @@ -967,7 +865,6 @@ def test_errror_flavor_config( """ mock_charm = MockGithubRunnerCharmFactory() monkeypatch.setattr(OpenstackImage, "from_charm", MagicMock()) - monkeypatch.setattr(charm_state, "ReactiveConfig", MagicMock()) monkeypatch.setattr(json, "loads", MagicMock()) monkeypatch.setattr(json, "dumps", MagicMock()) monkeypatch.setattr(charm_state, "CHARM_STATE_PATH", MagicMock()) @@ -975,27 +872,11 @@ def test_errror_flavor_config( mock_charm.config[FLAVOR_LABEL_COMBINATIONS_CONFIG_NAME] = flavor_label_combinations mock_charm.config[LABELS_CONFIG_NAME] = labels - mock_database = MagicMock(spec=DatabaseRequires) with pytest.raises(CharmConfigInvalidError) as exc_info: - _ = CharmState.from_charm(mock_charm, mock_database) + _ = CharmState.from_charm(mock_charm) assert expected_error_message in str(exc_info.value) -def test_charm_state__log_prev_state_redacts_sensitive_information( - mock_charm_state_data: dict, caplog: pytest.LogCaptureFixture -): - """ - arrange: Arrange charm state data with a token and set log level to DEBUG. - act: Call the __log_prev_state method on the class. - assert: Verify that the method redacts the sensitive information in the log message. - """ - caplog.set_level(logging.DEBUG) - CharmState._log_prev_state(mock_charm_state_data) - - assert mock_charm_state_data["charm_config"]["token"] not in caplog.text - assert charm_state.SENSITIVE_PLACEHOLDER in caplog.text - - @pytest.mark.parametrize( "juju_http, juju_https, juju_no_proxy, runner_http, use_aproxy, " "aproxy_exclude_addresses, aproxy_redirect_ports, expected_proxy, " @@ -1118,10 +999,8 @@ def test_proxy_config( mock_charm.config[APROXY_EXCLUDE_ADDRESSES_CONFIG_NAME] = aproxy_exclude_addresses mock_charm.config[APROXY_REDIRECT_PORTS_CONFIG_NAME] = aproxy_redirect_ports mock_charm.model.relations[IMAGE_INTEGRATION_NAME] = [] - mock_database = MagicMock(spec=DatabaseRequires) - mock_database.relations = [] - charm_state = CharmState.from_charm(mock_charm, mock_database) + charm_state = CharmState.from_charm(mock_charm) assert charm_state.charm_config.use_aproxy == use_aproxy assert ", ".join(charm_state.charm_config.aproxy_exclude_addresses) == aproxy_exclude_addresses @@ -1158,8 +1037,6 @@ def test_invalid_aproxy_config_in_charm_state( mock_charm.config[APROXY_EXCLUDE_ADDRESSES_CONFIG_NAME] = aproxy_exclude_addresses mock_charm.config[APROXY_REDIRECT_PORTS_CONFIG_NAME] = aproxy_redirect_ports mock_charm.model.relations[IMAGE_INTEGRATION_NAME] = [] - mock_database = MagicMock(spec=DatabaseRequires) - mock_database.relations = [] with pytest.raises(CharmConfigInvalidError): - CharmState.from_charm(mock_charm, mock_database) + CharmState.from_charm(mock_charm) diff --git a/tests/unit/test_factories.py b/tests/unit/test_factories.py index e6029e2955..a164e5a081 100644 --- a/tests/unit/test_factories.py +++ b/tests/unit/test_factories.py @@ -6,11 +6,9 @@ ApplicationConfiguration, Flavor, Image, - NonReactiveCombination, - NonReactiveConfiguration, ProxyConfig, - QueueConfig, - ReactiveConfiguration, + RunnerCombination, + RunnerConfiguration, SSHDebugConnection, SupportServiceConfig, ) @@ -19,7 +17,6 @@ OpenStackConfiguration, OpenStackCredentials, ) -from pydantic import MongoDsn from pydantic.networks import IPv4Address import charm_state @@ -71,9 +68,9 @@ def test_create_application_configuration( ) ], ), - non_reactive_configuration=NonReactiveConfiguration( + runner_configuration=RunnerConfiguration( combinations=[ - NonReactiveCombination( + RunnerCombination( image=Image(name="image_id", labels=["arm64", "noble"]), flavor=Flavor(name="flavor", labels=["flavorlabel"]), base_virtual_machines=1, @@ -81,23 +78,6 @@ def test_create_application_configuration( ) ] ), - reactive_configuration=ReactiveConfiguration( - queue=QueueConfig( - mongodb_uri=MongoDsn( - "mongodb://user:password@localhost:27017", - scheme="mongodb", - user="user", - password="password", - host="localhost", - host_type="int_domain", - port="27017", - ), - queue_name="app_name", - ), - max_total_virtual_machines=2, - images=[Image(name="image_id", labels=["arm64", "noble"])], - flavors=[Flavor(name="flavor", labels=["flavorlabel"])], - ), openstack_configuration=OpenStackConfiguration( vm_prefix="unit_name", network="network", diff --git a/tests/unit/test_logrotate.py b/tests/unit/test_logrotate.py index 892aba6513..4230ff0c4a 100644 --- a/tests/unit/test_logrotate.py +++ b/tests/unit/test_logrotate.py @@ -72,7 +72,7 @@ def test_setup_writes_logrotate_config(logrotate_dir: Path): logrotate.setup() assert logrotate.LOGROTATE_CONFIG_DIR.is_dir() assert (logrotate_dir / logrotate.METRICS_LOGROTATE_CONFIG.name).exists() - assert (logrotate_dir / logrotate.REACTIVE_LOGROTATE_CONFIG.name).exists() + assert (logrotate_dir / logrotate.GITHUB_RUNNER_MANAGER_CONFIG.name).exists() @pytest.mark.parametrize("create", [True, False]) diff --git a/tests/unit/test_manager_service.py b/tests/unit/test_manager_service.py index 4f62ba4c20..8795ad8c4e 100644 --- a/tests/unit/test_manager_service.py +++ b/tests/unit/test_manager_service.py @@ -109,8 +109,7 @@ def test_setup_started( # Check some configuration options assert "openstack_configuration:" in config_content assert "manager_proxy_command: ssh -W %h:%p example.com" in config_content - assert "non_reactive_configuration:" in config_content - assert "mongodb_uri: mongodb://user:password@localhost:27017" in config_content + assert "runner_configuration:" in config_content mock_systemd.service_enable.assert_called_once() mock_systemd.service_start.assert_not_called()