From f9e5b335305b1cab0f4c7675679b3d7d17c7efd0 Mon Sep 17 00:00:00 2001 From: parabala Date: Tue, 22 Oct 2024 14:57:16 +0800 Subject: [PATCH 1/2] =?UTF-8?q?=E5=A2=9E=E5=8A=A0hash=E6=96=B9=E6=B3=95bkd?= =?UTF-8?q?rsubh?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sharding/src/hash/bkdrsub.rs | 22 ++++++++++++++++------ sharding/src/hash/mod.rs | 7 ++++--- tests/src/bkdrsub.rs | 17 ++++++++++++++++- tests/src/hash_test.rs | 18 ++++++++++++++++-- 4 files changed, 52 insertions(+), 12 deletions(-) diff --git a/sharding/src/hash/bkdrsub.rs b/sharding/src/hash/bkdrsub.rs index 46dedbd7d..0e932d865 100644 --- a/sharding/src/hash/bkdrsub.rs +++ b/sharding/src/hash/bkdrsub.rs @@ -1,14 +1,24 @@ ///
用于支持key中部分字符串做hashkey,且hash算法类似bkdr的hash算法;
-/// key格式:abc#123_456,hashkey则是‘#’之后、‘_’之前的内容;
-/// 格式注意:'#'需要存在,否则hashkey为空;'_'可能不存在,如果'_'不存在,则'#'之后的全部是hashkey
+/// hashkey是‘#’之后、$delimiter之前的内容; +/// 例如key:abc#123_456 +/// 如果$delimiter是'_',则hashkey是123; +/// 如果$delimiter是'^',则hashkey是123_456; +/// 格式注意:'#'需要存在,否则hashkey为空;$delimiter可能不存在,如果$delimiter不存在,则'#'之后的全部是hashkey #[derive(Clone, Default, Debug)] -pub struct Bkdrsub; +pub struct BkdrsubDelimiter { + delimiter: u8, +} +impl BkdrsubDelimiter { + pub fn from(delimiter: u8) -> Self { + Self { delimiter } + } +} -impl super::Hash for Bkdrsub { +impl super::Hash for BkdrsubDelimiter { fn hash(&self, key: &S) -> i64 { const SEED: i32 = 131; // 31 131 1313 13131 131313 etc.. const START_CHAR_VAL: u8 = '#' as u8; - const END_CHAR_VAL: u8 = '_' as u8; + let end_char_val: u8 = self.delimiter; let mut hash = 0_i32; let mut found_start_char = false; @@ -17,7 +27,7 @@ impl super::Hash for Bkdrsub { let c = key.at(i); if found_start_char { // hashkey 计算 - if c != END_CHAR_VAL { + if c != end_char_val { hash = hash.wrapping_mul(SEED).wrapping_add(c as i32); continue; } diff --git a/sharding/src/hash/mod.rs b/sharding/src/hash/mod.rs index 3dc101178..d2c8da036 100644 --- a/sharding/src/hash/mod.rs +++ b/sharding/src/hash/mod.rs @@ -25,7 +25,7 @@ pub use rawsuffix::RawSuffix; pub mod crc; -use self::{bkdrsub::Bkdrsub, crc64::Crc64, fnv1::Fnv1F32, fnv1::Fnv1aF64}; +use self::{bkdrsub::BkdrsubDelimiter, crc64::Crc64, fnv1::Fnv1F32, fnv1::Fnv1aF64}; use enum_dispatch::enum_dispatch; // 占位hash,主要用于兼容服务框架,供mq等业务使用 @@ -69,7 +69,7 @@ pub enum Hasher { Padding(Padding), Raw(Raw), // redis raw, long型字符串直接用数字作为hash Bkdr(Bkdr), - Bkdrsub(Bkdrsub), + BkdrsubDelimiter(BkdrsubDelimiter), BkdrAbsCrc32(BkdrAbsCrc32), // 混合三种hash:先bkdr,再abs,最后进行crc32计算 Crc32(Crc32), Crc32Short(Crc32Short), // mc short crc32 @@ -120,7 +120,8 @@ impl Hasher { return match alg_parts[0] { HASH_PADDING => Self::Padding(Default::default()), "bkdr" => Self::Bkdr(Default::default()), - "bkdrsub" => Self::Bkdrsub(Default::default()), + "bkdrsub" => Self::BkdrsubDelimiter(BkdrsubDelimiter::from('_' as u8)), + "bkdrsubh" => Self::BkdrsubDelimiter(BkdrsubDelimiter::from('^' as u8)), "bkdrabscrc32" => Self::BkdrAbsCrc32(Default::default()), "raw" => Self::Raw(Raw::from(Default::default())), "crc32" => Self::Crc32(Default::default()), diff --git a/tests/src/bkdrsub.rs b/tests/src/bkdrsub.rs index 732aa4aec..aa28a6443 100644 --- a/tests/src/bkdrsub.rs +++ b/tests/src/bkdrsub.rs @@ -20,9 +20,24 @@ fn bkdrsub_one() { let dist = Distribute::from("modrange-8640", &servers); let dist_idx = dist.index(hash1); - println!("key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); + println!("bkdrsub key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); + assert_eq!(dist_idx, 905) } +#[test] +fn bkdrsubh_one() { + let hasher = Hasher::from("bkdrsubh"); + + let key1 = "otdn#1042015:carSubBrand^e4ab74c125e9e95edad691ffe9820118"; + let hash1 = hasher.hash(&key1.as_bytes()); + let shards = 1080; + let servers = vec!["padding".to_string(); shards]; + let dist = Distribute::from("modrange-8640", &servers); + let dist_idx = dist.index(hash1); + + println!("bkdrsubh key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); + assert_eq!(dist_idx, 905) +} // TODO 临时批量文件的hash、dist校验测试,按需打开 #[test] fn bkdrsub_dist() { diff --git a/tests/src/hash_test.rs b/tests/src/hash_test.rs index 0d3141aed..b0e487437 100644 --- a/tests/src/hash_test.rs +++ b/tests/src/hash_test.rs @@ -70,12 +70,26 @@ mod hash_test { let key1 = "abc#12345678901234567"; let hash1 = hasher.hash(&key1.as_bytes()); - println!("key:{}, hash:{}", key1, hash1); + println!("bkdrsub key:{}, hash:{}", key1, hash1); assert_eq!(hash1, 1108486745); let key2 = "abc#12345678901234567_123456"; let hash2 = hasher.hash(&key2.as_bytes()); - println!("key:{}, hash:{}", key2, hash2); + println!("bkdrsub key:{}, hash:{}", key2, hash2); + assert_eq!(hash2, 1108486745); + } + #[test] + fn bkdrsubh() { + let hasher = Hasher::from("bkdrsubh"); + + let key1 = "abc#12345678901234567"; + let hash1 = hasher.hash(&key1.as_bytes()); + println!("bkdrsubh key:{}, hash:{}", key1, hash1); + assert_eq!(hash1, 1108486745); + + let key2 = "abc#12345678901234567^123456"; + let hash2 = hasher.hash(&key2.as_bytes()); + println!("bkdrsubh key:{}, hash:{}", key2, hash2); assert_eq!(hash2, 1108486745); } From c0952a59206705bbb2536fc28bc381c4bf1f6668 Mon Sep 17 00:00:00 2001 From: parabala Date: Tue, 22 Oct 2024 17:39:15 +0800 Subject: [PATCH 2/2] =?UTF-8?q?=E6=A0=B9=E6=8D=AE=E8=AF=84=E5=AE=A1?= =?UTF-8?q?=E6=84=8F=E8=A7=81=EF=BC=8C=E4=B8=BA=E5=A2=9E=E5=8A=A0=E5=8C=BA?= =?UTF-8?q?=E5=88=86=E5=BA=A6=EF=BC=8C=E4=BF=AE=E6=94=B9bkdrsubh=E4=B8=BAb?= =?UTF-8?q?kdrsubhat?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- sharding/src/hash/mod.rs | 2 +- tests/src/bkdrsub.rs | 6 +++--- tests/src/hash_test.rs | 8 ++++---- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sharding/src/hash/mod.rs b/sharding/src/hash/mod.rs index d2c8da036..fb2e2dd6e 100644 --- a/sharding/src/hash/mod.rs +++ b/sharding/src/hash/mod.rs @@ -121,7 +121,7 @@ impl Hasher { HASH_PADDING => Self::Padding(Default::default()), "bkdr" => Self::Bkdr(Default::default()), "bkdrsub" => Self::BkdrsubDelimiter(BkdrsubDelimiter::from('_' as u8)), - "bkdrsubh" => Self::BkdrsubDelimiter(BkdrsubDelimiter::from('^' as u8)), + "bkdrsubhat" => Self::BkdrsubDelimiter(BkdrsubDelimiter::from('^' as u8)), "bkdrabscrc32" => Self::BkdrAbsCrc32(Default::default()), "raw" => Self::Raw(Raw::from(Default::default())), "crc32" => Self::Crc32(Default::default()), diff --git a/tests/src/bkdrsub.rs b/tests/src/bkdrsub.rs index aa28a6443..4e15b508d 100644 --- a/tests/src/bkdrsub.rs +++ b/tests/src/bkdrsub.rs @@ -24,8 +24,8 @@ fn bkdrsub_one() { assert_eq!(dist_idx, 905) } #[test] -fn bkdrsubh_one() { - let hasher = Hasher::from("bkdrsubh"); +fn bkdrsubhat_one() { + let hasher = Hasher::from("bkdrsubhat"); let key1 = "otdn#1042015:carSubBrand^e4ab74c125e9e95edad691ffe9820118"; let hash1 = hasher.hash(&key1.as_bytes()); @@ -35,7 +35,7 @@ fn bkdrsubh_one() { let dist = Distribute::from("modrange-8640", &servers); let dist_idx = dist.index(hash1); - println!("bkdrsubh key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); + println!("bkdrsubhat key:{}, hash:{}, idx:{}", key1, hash1, dist_idx); assert_eq!(dist_idx, 905) } // TODO 临时批量文件的hash、dist校验测试,按需打开 diff --git a/tests/src/hash_test.rs b/tests/src/hash_test.rs index b0e487437..18fd11262 100644 --- a/tests/src/hash_test.rs +++ b/tests/src/hash_test.rs @@ -79,17 +79,17 @@ mod hash_test { assert_eq!(hash2, 1108486745); } #[test] - fn bkdrsubh() { - let hasher = Hasher::from("bkdrsubh"); + fn bkdrsubhat() { + let hasher = Hasher::from("bkdrsubhat"); let key1 = "abc#12345678901234567"; let hash1 = hasher.hash(&key1.as_bytes()); - println!("bkdrsubh key:{}, hash:{}", key1, hash1); + println!("bkdrsubhat key:{}, hash:{}", key1, hash1); assert_eq!(hash1, 1108486745); let key2 = "abc#12345678901234567^123456"; let hash2 = hasher.hash(&key2.as_bytes()); - println!("bkdrsubh key:{}, hash:{}", key2, hash2); + println!("bkdrsubhat key:{}, hash:{}", key2, hash2); assert_eq!(hash2, 1108486745); }