diff --git a/docs/docs/.pages b/docs/docs/.pages new file mode 100644 index 0000000..20d8e0c --- /dev/null +++ b/docs/docs/.pages @@ -0,0 +1,4 @@ +nav: + - 'index.md' + - 'features' + - '...' diff --git a/docs/docs/arch/index.md b/docs/docs/arch/index.md deleted file mode 100644 index e69de29..0000000 diff --git a/docs/docs/architecture/index.md b/docs/docs/architecture/index.md new file mode 100644 index 0000000..3f5b4c8 --- /dev/null +++ b/docs/docs/architecture/index.md @@ -0,0 +1,5 @@ +# Architecture + +pgDog is written in async Rust, using the Tokio runtime. This allows the pooler to take advantage of multiple +CPU cores, when available. [Plugins](../features/plugins/index.md) are written as shared libraries +and are loaded into the executable at runtime. diff --git a/docs/docs/architecture/performance.md b/docs/docs/architecture/performance.md new file mode 100644 index 0000000..3a299b3 --- /dev/null +++ b/docs/docs/architecture/performance.md @@ -0,0 +1 @@ +# Performance & benchmarks diff --git a/docs/docs/config/index.md b/docs/docs/config/index.md deleted file mode 100644 index e69de29..0000000 diff --git a/docs/docs/configuration/index.md b/docs/docs/configuration/index.md new file mode 100644 index 0000000..a025a48 --- /dev/null +++ b/docs/docs/configuration/index.md @@ -0,0 +1 @@ +# Configuration diff --git a/docs/docs/features/.pages b/docs/docs/features/.pages new file mode 100644 index 0000000..9badc9a --- /dev/null +++ b/docs/docs/features/.pages @@ -0,0 +1,4 @@ +nav: + - 'index.md' + - 'load-balancer.md' + - '...' diff --git a/docs/docs/features/healthchecks.md b/docs/docs/features/healthchecks.md new file mode 100644 index 0000000..f59e238 --- /dev/null +++ b/docs/docs/features/healthchecks.md @@ -0,0 +1,57 @@ +# Healthchecks + +Databases proxied by pgDog are regularly checked with healtchecks. A healtcheck is a simple query, e.g. +`SELECT 1`, which ensures the database is reachable and able to answer requests. + +If a database fails a healthcheck, it's placed in a list of banned hosts. Banned databases are removed +from the load balancer and will not serve transactions. This allows pgDog to reduce errors clients see +when a database fails, for example due to hardware issues. + +
+ Healtchecks +

Replica failure

+
+ +## Configuration + +Healthchecks are enabled by default and are used for all databases. Healthcheck interval is configurable +on a global and database levels. + +The default healthcheck interval is **30 seconds**. + +```toml +[global] +healthcheck_interval = 30_000 # ms + +[[databases]] +name = "prod" +healthcheck_interval = 60_000 # ms +``` + +### Timeouts + +By default, pgDog gives the database **5 seconds** to answer a healthcheck. If it doesn't receive a reply, +the database will be banned from serving traffic for a configurable amount of time. Both the healthcheck timeout +and the ban time are configurable. + +```toml +[global] +healthcheck_timeout = 5_000 # 5 seconds +ban_time = 60_000 # 1 minute +``` + +### Ban expiration + +By default, a ban has an expiration. Once the ban expires, the replica is unbanned and placed back into +rotation. This is done to maintain a healthy level of traffic across all databases and to allow for intermittent +issues, like network connectivity, to resolve themselves without manual intervention. + +### Failsafe + +If all databases in a cluster are banned due to a healthcheck failure, pgDog assumes that healtchecks +are returning incorrect information and unbans all databases in the cluster. This protects against false positives +and ensures the cluster continues to serve traffic. + +## Learn more + +- [Load balancer](load-balancer.md) diff --git a/docs/docs/features/index.md b/docs/docs/features/index.md index 9af2c8e..b4c32d6 100644 --- a/docs/docs/features/index.md +++ b/docs/docs/features/index.md @@ -1,18 +1,16 @@ -# pgDog features +# Features -pgDog contains multiple foundational and unique features which make it a great choice for modern PostgreSQL deployments. +pgDog contains multiple foundational and unique features which make it a great choice +for modern PostgreSQL deployments. -## Load balancing +Most features are configurable and can be toggled and tuned. Experimental features are marked +as such, and users are advised to test them before deploying to production. Most foundational features like +load balancing, healthchecks, and query routing have been battle-tested and work well in production. -pgDog acts as an application level load balancer (OSI Level 7) for PostgreSQL. It routes transcations -from clients to different Postgres databases, allowing a cluster of replicas to share the load. - -### Healthchecks - -pgDog issues regular health checks to all databases and maintains a list of healthy databases. Transactions -are only routed to healthy hosts, while databases that experience errors are removed from the rotation automatically. - -#### Automatic repair -If a previously unhealthy host is repaired, pgDog will automatically detect this change and place the healthy -database back in rotation. +## Summary +| Feature | Description | State | +|---------|-------------|-------| +| [Transaction mode](transaction-mode.md) | Multiplex transactions and servers, allowing for high reuse of PostgreSQL server connections. | ✔️ Good | +| [Load balancer](load-balancer.md) | Splits query traffic evenly across multiple databases. | 🔨 Work in progress | +| [Healthcheks](healthchecks.md) | Periodically checks databases to ensure they can serve queries. | ✔️ Stable | diff --git a/docs/docs/features/load-balancer.md b/docs/docs/features/load-balancer.md new file mode 100644 index 0000000..78e7e6b --- /dev/null +++ b/docs/docs/features/load-balancer.md @@ -0,0 +1,69 @@ +# Load balancer + +pgDog operates at the application layer (OSI Level 7) and is capable of load balancing queries across +multiple PostgreSQL databases. + +
+ Load balancer +
+ +## Strategies + +The load balancer is configurable and can route querie +using one of several strategies: + +* Random (default) +* Least active connections +* Round robin + + +### Random + +Queries are sent to a database based using a random number generator modulus the number of replicas in the pool. +This strategy is the simplest and often effective at splitting traffic evenly across the cluster. It's unbiased +and assumes nothing about available resources or query performance. + +This strategy is used by **default**. + +### Least active connections + +!!! note + This feature is still under development. + +pgDog keeps track of how many active connections each database has and can route queries to databases +which are least busy executing requests. This allows to "bin pack" the cluster based on how seemingly active +(or inactive) the databases are. + +This strategy is useful when all databases have identical resources and all queries have roughly the same +cost and runtime. + +### Round robin + +!!! note + This feature is still under development. + +This strategy is often used in HTTP load balancers like nginx to route requests to hosts in the +same order they appear in the configuration. Each database receives exactly one query before the next +one is used. + +This strategy makes the same assumptions as [least active connections](#least-active-connections), except it makes no attempt to bin pack +the cluster with workload and distributes queries evenly. + +## Configuration + +The load balancer is enabled automatically when a database cluster contains more than +one database. For example: + +```toml +[[databases]] +name = "prod" +host = "10.0.0.1" + +[[databases]] +name = "prod" +host = "10.0.0.2" +``` + +## Learn more + +- [Healthchecks](healthchecks.md) diff --git a/docs/docs/features/plugins/c.md b/docs/docs/features/plugins/c.md new file mode 100644 index 0000000..bea9377 --- /dev/null +++ b/docs/docs/features/plugins/c.md @@ -0,0 +1 @@ +# Plugins in C diff --git a/docs/docs/features/plugins/index.md b/docs/docs/features/plugins/index.md new file mode 100644 index 0000000..b6b89fd --- /dev/null +++ b/docs/docs/features/plugins/index.md @@ -0,0 +1,62 @@ +# Plugins + +One of features that make pgDog particularly powerful is its plugin system. Users of pgDog can write plugins +in any language and inject them inside the query router to direct query traffic, to rewrite queries, or to block +them entirely and return a custom result. + +## API + +pgDog plugins are shared libraries loaded at application startup. They can be written in any programming language, as long +as that language can be compiled to a shared library, and can expose a predefined set of C ABI-compatible functions. + +### Functions + +#### `pgdog_init` + +This function is executed once when pgDog loads the plugin, at application startup. It allows to initialize any +kind of internal plugin state. Execution of this function is synchronized, so it's safe to execute any thread-unsafe +functions or initialize synchronization primitives, like mutexes. + + +This function has the following signature: + +=== "C/C++" + ```c + void pgdog_init(); + ``` +=== "Rust" + ```rust + pub extern "C" fn pgdog_init() {} + ``` + + +#### `pgdog_route_query` + +This function is called every time the query router sees a new query and needs to figure out +where this query should be sent. The query text and parameters will be provided and the router +expects the plugin to parse the query and provide a route. + +This function has the following signature: + +=== "C/C++" + ```c + Route pgdog_route_query(Query query); + ``` +=== "Rust" + ```rust + use pgdog_plugin::bindings; + + pub extern "C" fn pgdog_route_query(bindings::Query query) -> Route { + Route::unknown() + } + ``` + +## Examples + +Example plugins written in Rust and C are +included in [GitHub](https://github.com/levkk/pgdog/tree/main/examples). + +## Learn more + +- [Plugins in Rust](rust.md) +- [Plugins in C](c.md) diff --git a/docs/docs/features/plugins/rust.md b/docs/docs/features/plugins/rust.md new file mode 100644 index 0000000..d46218c --- /dev/null +++ b/docs/docs/features/plugins/rust.md @@ -0,0 +1 @@ +# Plugins in Rust diff --git a/docs/docs/features/transaction-mode.md b/docs/docs/features/transaction-mode.md new file mode 100644 index 0000000..8239f27 --- /dev/null +++ b/docs/docs/features/transaction-mode.md @@ -0,0 +1,35 @@ +# Transaction mode + +In transaction mode, pgDog is able to multiplex client transactions with several PostgreSQL backend servers. This +allows the pooler to serve thousands of clients using only dozens of actual server connections. This feature is essential for at-scale PostgreSQL deployments since Postgres is not able to maintain +more than a few thousand concurrently open connections. + +## Enable transaction mode + +Transaction mode is **enabled** by default. This is controllable via configuration, at the global +and database level. + +```toml +[general] +pooler_mode = "transaction" + +[[databases]] +name = "prod" +pooler_mode = "transaction" +``` + +## Session state + +!!! note + This feature is a work in progress. + +Since clients in transaction mode reuse PostgreSQL server connections, it's possible for session-level variables and state to leak between clients. pgDog keeps track of connection state modifications and can automatically clean up server connections after a transaction. While this helps prevent session variables leakage between clients, this does have a small performance overhead. + +To avoid this, clients using pgDog in transaction mode should avoid the usage of `SET` statements and use `SET LOCAL` inside an explicit transaction instead: + +```postgresql +BEGIN; +SET LOCAL statement_timeout = '30s'; +SELECT * FROM my_table; +COMMIT; +``` diff --git a/docs/docs/images/Untitled(1).png:Zone.Identifier b/docs/docs/images/Untitled(1).png:Zone.Identifier new file mode 100644 index 0000000..053d112 --- /dev/null +++ b/docs/docs/images/Untitled(1).png:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet diff --git a/docs/docs/images/Untitled(2).png:Zone.Identifier b/docs/docs/images/Untitled(2).png:Zone.Identifier new file mode 100644 index 0000000..053d112 --- /dev/null +++ b/docs/docs/images/Untitled(2).png:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet diff --git a/docs/docs/images/Untitled.png:Zone.Identifier b/docs/docs/images/Untitled.png:Zone.Identifier new file mode 100644 index 0000000..053d112 --- /dev/null +++ b/docs/docs/images/Untitled.png:Zone.Identifier @@ -0,0 +1,3 @@ +[ZoneTransfer] +ZoneId=3 +HostUrl=about:internet diff --git a/docs/docs/images/healtchecks.png b/docs/docs/images/healtchecks.png new file mode 100644 index 0000000..d1d1c37 Binary files /dev/null and b/docs/docs/images/healtchecks.png differ diff --git a/docs/docs/images/replicas.png b/docs/docs/images/replicas.png new file mode 100644 index 0000000..0086ad5 Binary files /dev/null and b/docs/docs/images/replicas.png differ diff --git a/docs/docs/index.md b/docs/docs/index.md index 3f8acc4..1e5b02a 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -93,6 +93,6 @@ Starting the pooler can be done by executing the binary or with Cargo: ## Next steps * [Features](features/index.md) -* [Architecture](arch/index.md) -* [Configuration](config/index.md) +* [Architecture](architecture/index.md) +* [Configuration](configuration/index.md) diff --git a/pgdog/src/config/mod.rs b/pgdog/src/config/mod.rs index de410dc..8989f4c 100644 --- a/pgdog/src/config/mod.rs +++ b/pgdog/src/config/mod.rs @@ -153,12 +153,15 @@ pub struct Database { /// Database name visible to the clients. pub name: String, /// Database role, e.g. primary. + #[serde(default)] pub role: Role, /// Database host or IP address, e.g. 127.0.0.1. pub host: String, /// Database port, e.g. 5432. + #[serde(default = "Database::port")] pub port: u16, /// PostgreSQL database name, e.g. "postgres". + #[serde(default = "Database::database_name")] pub database_name: String, /// Use this user to connect to the database, overriding the userlist. pub user: Option, @@ -174,13 +177,21 @@ impl Database { fn max_connections() -> usize { usize::MAX } + + fn port() -> u16 { + 5432 + } + + fn database_name() -> String { + "postgres".into() + } } #[derive(Serialize, Deserialize, Debug, Clone, Default, PartialEq)] #[serde(rename_all = "snake_case")] pub enum Role { - #[default] Primary, + #[default] Replica, }