From 8d9b8d321b4c4e7290fdacee652a19c528ca8945 Mon Sep 17 00:00:00 2001 From: Chris Arderne Date: Sat, 13 Jul 2024 12:14:39 +0100 Subject: [PATCH] improve all docs and readme --- README.md | 134 ++++++++++++++++++++++++++++++++++++++------- py/upid/core.py | 18 +++++- upid_pg/src/lib.rs | 13 +++-- upid_rs/src/b32.rs | 4 +- upid_rs/src/lib.rs | 87 ++++++++++++++++++++--------- 5 files changed, 200 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index 926e3ff..795cb0a 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ SELECT id FROM users; -- user_2accvpp5guht4dts56je5a SELECT id FROM users WHERE id = 'user_2accvpp5guht4dts56je5a'; ``` -Plays nice with your server code too, no extra work needed: +Plays nice with your server code, no extra work needed: ```python with psycopg.connect("postgresql://...") as conn: res = conn.execute("SELECT id FROM users").fetchone() @@ -79,7 +79,7 @@ Key changes relative to ULID: ``` ### Collision -Relative to ULID, the time precision is reduced from 48 to 40 bits (keeping the most significant bits, so oveflow still won't occur until 10889 AD), and the randomness reduced from 80 to 64 bits. +Relative to ULID, the time precision is reduced from 48 to 40 bits (keeping the most significant bits, so overflow still won't occur until 10889 AD), and the randomness reduced from 80 to 64 bits. The timestamp precision at 40 bits is around 250 milliseconds. In order to have a 50% probability of collision with 64 bits of randomness, you would need to generate around **4 billion items per 250 millisecond window**. @@ -104,8 +104,41 @@ from upid import upid upid("user") ``` +Or more explicitly: +```python +from upid import UPID +UPID.from_prefix("user") +``` + +Or specifying your own timestamp or datetime +```python +import time, datetime +UPID.from_prefix_and_milliseconds("user", milliseconds) +UPID.from_prefix_and_datetime("user", datetime.datetime.now()) +``` + +From and to a string: +```python +u = UPID.from_str("user_2accvpp5guht4dts56je5a") +u.to_str() # user_2a... +``` + +Get stuff out: +```python +u.prefix # user +u.datetime # 2024-07-07 ... +``` + +Convert to other formats: +```python +int(u) # 2079795568564925668398930358940603766 +u.hex # 01908dd6a3669b912738191ea3d61576 +u.to_uuid() # UUID('01908dd6-a366-9b91-2738-191ea3d61576') +``` + #### Development -Code and tests are in the [py/](./py/) directory. Using [Rye](https://rye.astral.sh/) for development (installation instructions at the link). +Code and tests are in the [py/](./py/) directory. +Using [Rye](https://rye.astral.sh/) for development (installation instructions at the link). ```bash # can be run from the repo root @@ -118,6 +151,8 @@ If you just want to have a look around, pip should also work: pip install -e . ``` +Please open a PR if you spot a bug or improvement! + ## Rust implementation The current Rust implementation is based on [dylanhart/ulid-rs](https://github.com/dylanhart/ulid-rs), but using the same lookup base32 lookup method as the Python implementation. @@ -132,6 +167,31 @@ use upid::Upid; Upid::new("user"); ``` +Or specifying your own timestamp or datetime: +```rust +use std::time::SystemTime; +Upid::from_prefix_and_milliseconds("user", 1720366572288); +Upid::from_prefix_and_datetime("user", SystemTime::now()); +``` + +From and to a string: +```rust +let u = Upid::from_string("user_2accvpp5guht4dts56je5a"); +u.to_string(); +``` + +Get stuff out: +```rust +u.prefix(); // user +u.datetime(); // 2024-07-07 ... +u.milliseconds(); // 17203... +``` + +Convert to other formats: +```rust +u.to_bytes(); +``` + #### Development Code and tests are in the [upid_rs/](./upid_rs/) directory. @@ -140,48 +200,80 @@ cd upid_rs cargo check # or fmt/clippy/build/test/run ``` +Please open a PR if you spot a bug or improvement! + ## Postgres extension There is also a Postgres extension built on the Rust implementation, using [pgrx](https://github.com/pgcentralfoundation/pgrx) and based on the very similar extension [pksunkara/pgx_ulid](https://github.com/pksunkara/pgx_ulid). #### Installation -You can try out the Docker image [carderne/postgres-upid:16](https://hub.docker.com/r/carderne/postgres-upid): +The easiest would be to try out the Docker image [carderne/postgres-upid:16](https://hub.docker.com/r/carderne/postgres-upid), currently built for arm64 and amd64 but only for Postgres 16: ```bash docker run -e POSTGRES_HOST_AUTH_METHOD=trust -p 5432:5432 carderne/postgres-upid:16 ``` -If you want to install it into another Postgres, you'll install pgrx and follow its [installation instructions](https://github.com/pgcentralfoundation/pgrx/blob/develop/cargo-pgrx/README.md). -Something like this: -```bash -cargo install --locked cargo-pgrx -pgrx init -cd upid_pg -pgrx install -``` +You can also grab a Linux `.deb` from the [Releases](https://github.com/carderne/upid/releases) page. This is built for Postgres 16 and amd64 only. -Installable binaries will come soon. +More architectures and versions will follow once it is out of alpha. #### Usage ```sql -CREATE EXTENSION ulid; - +CREATE EXTENSION upid_pg; CREATE TABLE users ( id upid NOT NULL DEFAULT gen_upid('user') PRIMARY KEY, name text NOT NULL ); + INSERT INTO users (name) VALUES('Bob'); + SELECT * FROM users; +-- id | name +-- -----------------------------+------ +-- user_2accvpp5guht4dts56je5a | Bob ``` -#### Development -Code and tests are in the [upid_pg/](./upid_pg/) directory. +You can get the raw `bytea` data, or the prefix or timestamp: +```sql +SELECT upid_to_bytea(id) FROM users; +-- \x019... +SELECT upid_to_prefix(id) FROM users; +-- 'user' + +SELECT upid_to_timestamp(id) FROM users; +-- 2024-07-07 ... +``` + +You can convert a `UPID` to a regular Postgres `UUID`: +```sql +SELECT upid_to_uuid(gen_upid('user')); +``` + +Or the reverse (although the prefix and timestamp will no longer make sense): +```sql +select upid_from_uuid(gen_random_uuid()); +``` + +#### Development +If you want to install it into another Postgres, you'll install pgrx and follow its [installation instructions](https://github.com/pgcentralfoundation/pgrx/blob/develop/cargo-pgrx/README.md). +Something like this: ```bash cd upid_pg +cargo install --locked cargo-pgrx +cargo pgrx init +cargo pgrx install +``` + +Some `cargo` commands work as normal: +```bash cargo check # or fmt/clippy +``` + +But building, testing and running must be done via pgrx. +This will compile it into a Postgres installation, and allow an interactive session and tests there. -# must test/run/install with pgrx -# this will compile it into a Postgres installation -# and run the tests there, or drop you into a psql prompt -cargo pgrx test # or run/install +```bash +cargo pgrx test pg16 +# or run +# or install ``` diff --git a/py/upid/core.py b/py/upid/core.py index dd83b6d..7791fe3 100644 --- a/py/upid/core.py +++ b/py/upid/core.py @@ -21,7 +21,8 @@ class UPID: """ The `UPID` contains a 20-bit prefix, 40-bit timestamp and 68 bits of randomness. - The prefix should only contain lower-case latin alphabet characters. + The prefix should only contain lower-case latin alphabet characters and be max + four characters long. It is usually created using the `upid(prefix: str)` helper function: @@ -78,10 +79,19 @@ def from_prefix_and_milliseconds(cls: type[Self], prefix: str, milliseconds: int @classmethod def from_str(cls: type[Self], string: str) -> Self: + """ + Convert the provided `str` to a `UPID`. + + Throws a `ValueError` if the string is invalid: + - too long + - too short + - contains characters not in the `ENCODE` base32 alphabet + """ return cls(b32.decode(string)) @property def prefix(self) -> str: + """Return just the prefix as a `str`.""" prefix, _ = b32.encode_prefix(self.b[b32.END_RANDO_BIN :]) return prefix @@ -99,14 +109,18 @@ def datetime(self) -> dt.datetime: def hex(self) -> str: return self.b.hex() + def to_str(self) -> str: + return b32.encode(self.b) + def to_uuid(self) -> uuid.UUID: + """Convert to a standard Python UUID.""" return uuid.UUID(bytes=self.b) def __repr__(self) -> str: return f"UPID({self!s})" def __str__(self) -> str: - return b32.encode(self.b) + return self.to_str() def __int__(self) -> int: return int.from_bytes(self.b, "big") diff --git a/upid_pg/src/lib.rs b/upid_pg/src/lib.rs index 7278c83..f976c2a 100644 --- a/upid_pg/src/lib.rs +++ b/upid_pg/src/lib.rs @@ -1,10 +1,10 @@ //! # upid_pg //! //! `upid_pg` is a thin wrapper for [upid](https://crates.io/crates/upid) -//! providing the UPID datatype and generator as a Postgres extension -//! -//! The code below is based largely on the following: -//! https://github.com/pksunkara/pgx_ulid +//! providing the UPID datatype and generator as a Postgres extension. + +// The code below is based largely on the following: +// https://github.com/pksunkara/pgx_ulid use core::ffi::CStr; use inner_upid::Upid as InnerUpid; @@ -105,6 +105,11 @@ fn upid_to_bytea(input: upid) -> Vec { bytes.to_vec() } +#[pg_extern(immutable, parallel_safe)] +fn upid_to_prefix(input: upid) -> String { + InnerUpid(input.0).prefix() +} + #[pg_extern(immutable, parallel_safe)] fn upid_to_timestamp(input: upid) -> Timestamp { let inner_seconds = (InnerUpid(input.0).milliseconds() as f64) / 1000.0; diff --git a/upid_rs/src/b32.rs b/upid_rs/src/b32.rs index ac819bd..0f85579 100644 --- a/upid_rs/src/b32.rs +++ b/upid_rs/src/b32.rs @@ -14,13 +14,13 @@ const RANDO_CHAR_LEN: usize = 13; const VERSION_CHAR_LEN: usize = 1; /// Length of a string-encoded Upid -pub const CHAR_LEN: usize = 26; +const CHAR_LEN: usize = 26; /// 32-character alphabet modified from Crockford's /// Numbers first for sensible sorting, but full lower-case /// latin alphabet so any sensible prefix can be used /// Effectively a mapping from 8 bit byte -> 5 bit int -> base32 character -const ENCODE: &[u8; 32] = b"234567abcdefghijklmnopqrstuvwxyz"; +pub const ENCODE: &[u8; 32] = b"234567abcdefghijklmnopqrstuvwxyz"; /// Speedy O(1) inverse lookup /// base32 char -> ascii byte int -> base32 alphabet index diff --git a/upid_rs/src/lib.rs b/upid_rs/src/lib.rs index fc4fdee..c2fcdbf 100644 --- a/upid_rs/src/lib.rs +++ b/upid_rs/src/lib.rs @@ -1,14 +1,41 @@ //! # upid //! -//! `upid` is the Rust implementation UPID, an alternative to UUID and ULID -//! that includes a useful prefix. +//! Rust implementation of UPID, an alternative to UUID and ULID +//! that includes a useful four-character prefix. //! -//! The code below is derived from the following: -//! https://github.com/dylanhart/ulid-rs +//! It is still stored as a `u128` binary, is still sortable by date, +//! and has 64 bits of randomness. It uses a modified form of +//! Crockford's base32 alphabet that uses lower-case and prioritises +//! letters so that any four-letter alpha prefix can be specified. +//! +//! ## Quickstart +//! +//! ```rust +//! use upid::Upid; +//! let upid = Upid::new("user"); +//! +//! let text = upid.to_string(); +//! +//! let same = Upid::from_string(&text); +//! assert_eq!(upid, same.unwrap()); +//! ``` +//! +//! If an invalid prefix is specified, it will be handled as follows: +//! - invalid letters (not in the [`ENCODE`] alphabet) replaced by 'z' +//! - too short will be right-padded with 'z' +//! - too long will be clipped to four characters +//! ```rust +//! use upid::Upid; +//! let upid = Upid::new("00"); +//! assert_eq!(upid.prefix(), "zzzz"); +//! ``` + +// The code below is derived from the following: +// https://github.com/dylanhart/ulid-rs mod b32; -pub use crate::b32::DecodeError; +pub use crate::b32::{DecodeError, ENCODE}; use std::fmt; use std::str::FromStr; @@ -22,6 +49,12 @@ fn now() -> std::time::SystemTime { std::time::SystemTime::now() } +/// A Upid is a unique 128-bit identifier is sortable and has a useful prefix. +/// +/// It is encoded as a 26 character string using a custom base32 alphabet based +/// on Crockford's, but with lower-case and prioritising letters over numerals. +/// In the binary, the first 40 bits are a unix timestamp with 256ms precision, +/// the next 64 are random bits, and the last 24 are the prefix and version identifier. #[derive(Debug, PartialOrd, Ord, PartialEq, Eq, Hash, Clone, Copy)] pub struct Upid(pub u128); @@ -41,7 +74,7 @@ impl Upid { /// Creates a Upid with the provided prefix and current time (UTC) /// - /// The prefix should only contain lower-case latin alphabet characters. + /// The prefix should contain four lower-case latin alphabet characters. /// # Example /// ```rust /// use upid::Upid; @@ -116,23 +149,6 @@ impl Upid { Upid(res) } - /// Gets the datetime of when this Upid was created accurate to around 300ms - /// - /// # Example - /// ```rust - /// use std::time::{SystemTime, Duration}; - /// use upid::Upid; - /// - /// let dt = SystemTime::now(); - /// let upid = Upid::from_prefix_and_datetime("user", dt); - /// - /// assert!(dt + Duration::from_millis(300) >= upid.datetime()); - /// ``` - pub fn datetime(&self) -> SystemTime { - let stamp = self.milliseconds(); - SystemTime::UNIX_EPOCH + Duration::from_millis(stamp) - } - /// Creates a Upid from a Base32 encoded string /// /// # Example @@ -151,6 +167,23 @@ impl Upid { } } + /// Gets the datetime of when this Upid was created accurate to around 256ms + /// + /// # Example + /// ```rust + /// use std::time::{SystemTime, Duration}; + /// use upid::Upid; + /// + /// let dt = SystemTime::now(); + /// let upid = Upid::from_prefix_and_datetime("user", dt); + /// + /// assert!(dt + Duration::from_millis(257) >= upid.datetime()); + /// ``` + pub fn datetime(&self) -> SystemTime { + let stamp = self.milliseconds(); + SystemTime::UNIX_EPOCH + Duration::from_millis(stamp) + } + /// Gets the prefix of this upid /// /// # Example @@ -177,7 +210,7 @@ impl Upid { /// let ms: u128 = 1720568902000; /// let upid = Upid::from_prefix_and_milliseconds("user", ms); /// - /// assert!(ms - u128::from(upid.milliseconds()) < 256); + /// assert!(ms - u128::from(upid.milliseconds()) < 257); /// ``` pub const fn milliseconds(&self) -> u64 { ((self.0 >> 88) << 8) as u64 @@ -268,7 +301,7 @@ impl fmt::Display for Upid { mod tests { use super::*; - const EPS: u128 = 256; + const EPS: u128 = 257; #[test] fn can_into_thing() { @@ -298,7 +331,7 @@ mod tests { fn test_order() { let dt = SystemTime::now(); let upid1 = Upid::from_prefix_and_datetime("user", dt); - let upid2 = Upid::from_prefix_and_datetime("user", dt + Duration::from_millis(300)); + let upid2 = Upid::from_prefix_and_datetime("user", dt + Duration::from_millis(EPS as u64)); assert!(upid1 < upid2); } @@ -321,7 +354,7 @@ mod tests { let upid = Upid::from_prefix_and_datetime("user", dt); assert!(upid.datetime() <= dt); - assert!(upid.datetime() + Duration::from_millis(300) >= dt); + assert!(upid.datetime() + Duration::from_millis(EPS as u64) >= dt); } #[test]