From 745c60b0bcda62fca5bc901d3fdcf6e7513e48cc Mon Sep 17 00:00:00 2001 From: Peter Haag Date: Sat, 29 Jun 2024 14:18:49 +0200 Subject: [PATCH] Change hash to metrohash for de-duplication --- src/nfpcapd/metrohash.c | 225 ++++++++++++++++++++++++++++++++++++++++ src/nfpcapd/pcaproc.c | 2 +- 2 files changed, 226 insertions(+), 1 deletion(-) create mode 100644 src/nfpcapd/metrohash.c diff --git a/src/nfpcapd/metrohash.c b/src/nfpcapd/metrohash.c new file mode 100644 index 00000000..7ccc7595 --- /dev/null +++ b/src/nfpcapd/metrohash.c @@ -0,0 +1,225 @@ +// metrohash64.cpp +// +// The MIT License (MIT) +// +// Copyright (c) 2015 J. Andrew Rogers +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +#include + +// remember the last SlotSize packets with len and hash +// for duplicate check +#define SlotSize 8 +static struct { + uint32_t len; + uint64_t hash; +} lastPacketStat[SlotSize] = {0}; +static uint32_t packetSlot = 0; + +/* rotate right idiom recognized by compiler*/ +inline static uint64_t rotate_right(uint64_t v, unsigned k) { return (v >> k) | (v << (64 - k)); } + +// unaligned reads, fast and safe on Nehalem and later microarchitectures +inline static uint64_t read_u64(const void *const ptr) { return *(uint64_t *)ptr; } + +inline static uint64_t read_u32(const void *const ptr) { return *(uint32_t *)ptr; } + +inline static uint64_t read_u16(const void *const ptr) { return *(uint16_t *)ptr; } + +inline static uint64_t read_u8(const void *const ptr) { return *(uint8_t *)ptr; } + +static uint64_t metrohash64_1(const uint8_t *key, uint64_t len, uint32_t seed) { + static const uint64_t k0 = 0xC83A91E1; + static const uint64_t k1 = 0x8648DBDB; + static const uint64_t k2 = 0x7BDEC03B; + static const uint64_t k3 = 0x2F5870A5; + + const uint8_t *ptr = key; + const uint8_t *const end = ptr + len; + + uint64_t hash = ((((uint64_t)seed) + k2) * k0) + len; + + if (len >= 32) { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do { + v[0] += read_u64(ptr) * k0; + ptr += 8; + v[0] = rotate_right(v[0], 29) + v[2]; + v[1] += read_u64(ptr) * k1; + ptr += 8; + v[1] = rotate_right(v[1], 29) + v[3]; + v[2] += read_u64(ptr) * k2; + ptr += 8; + v[2] = rotate_right(v[2], 29) + v[0]; + v[3] += read_u64(ptr) * k3; + ptr += 8; + v[3] = rotate_right(v[3], 29) + v[1]; + } while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) { + uint64_t v0 = hash + (read_u64(ptr) * k0); + ptr += 8; + v0 = rotate_right(v0, 33) * k1; + uint64_t v1 = hash + (read_u64(ptr) * k1); + ptr += 8; + v1 = rotate_right(v1, 33) * k2; + v0 ^= rotate_right(v0 * k0, 35) + v1; + v1 ^= rotate_right(v1 * k3, 35) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) { + hash += read_u64(ptr) * k3; + ptr += 8; + hash ^= rotate_right(hash, 33) * k1; + } + + if ((end - ptr) >= 4) { + hash += read_u32(ptr) * k3; + ptr += 4; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) { + hash += read_u16(ptr) * k3; + ptr += 2; + hash ^= rotate_right(hash, 13) * k1; + } + + if ((end - ptr) >= 1) { + hash += read_u8(ptr) * k3; + hash ^= rotate_right(hash, 25) * k1; + } + + hash ^= rotate_right(hash, 33); + hash *= k0; + hash ^= rotate_right(hash, 33); + + return hash; +} + +static uint64_t __attribute__((__unused__)) metrohash64_2(const uint8_t *key, uint64_t len, uint32_t seed) { + static const uint64_t k0 = 0xD6D018F5; + static const uint64_t k1 = 0xA2AA033B; + static const uint64_t k2 = 0x62992FC1; + static const uint64_t k3 = 0x30BC5B29; + + const uint8_t *ptr = key; + const uint8_t *const end = ptr + len; + + uint64_t hash = ((((uint64_t)seed) + k2) * k0) + len; + + if (len >= 32) { + uint64_t v[4]; + v[0] = hash; + v[1] = hash; + v[2] = hash; + v[3] = hash; + + do { + v[0] += read_u64(ptr) * k0; + ptr += 8; + v[0] = rotate_right(v[0], 29) + v[2]; + v[1] += read_u64(ptr) * k1; + ptr += 8; + v[1] = rotate_right(v[1], 29) + v[3]; + v[2] += read_u64(ptr) * k2; + ptr += 8; + v[2] = rotate_right(v[2], 29) + v[0]; + v[3] += read_u64(ptr) * k3; + ptr += 8; + v[3] = rotate_right(v[3], 29) + v[1]; + } while (ptr <= (end - 32)); + + v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 30) * k1; + v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 30) * k0; + v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 30) * k1; + v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 30) * k0; + hash += v[0] ^ v[1]; + } + + if ((end - ptr) >= 16) { + uint64_t v0 = hash + (read_u64(ptr) * k2); + ptr += 8; + v0 = rotate_right(v0, 29) * k3; + uint64_t v1 = hash + (read_u64(ptr) * k2); + ptr += 8; + v1 = rotate_right(v1, 29) * k3; + v0 ^= rotate_right(v0 * k0, 34) + v1; + v1 ^= rotate_right(v1 * k3, 34) + v0; + hash += v1; + } + + if ((end - ptr) >= 8) { + hash += read_u64(ptr) * k3; + ptr += 8; + hash ^= rotate_right(hash, 36) * k1; + } + + if ((end - ptr) >= 4) { + hash += read_u32(ptr) * k3; + ptr += 4; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 2) { + hash += read_u16(ptr) * k3; + ptr += 2; + hash ^= rotate_right(hash, 15) * k1; + } + + if ((end - ptr) >= 1) { + hash += read_u8(ptr) * k3; + hash ^= rotate_right(hash, 23) * k1; + } + + hash ^= rotate_right(hash, 28); + hash *= k0; + hash ^= rotate_right(hash, 29); + + return hash; +} + +static int is_duplicate(const uint8_t *data_ptr, const uint32_t len) { + uint64_t hash = metrohash64_1(data_ptr, len, 0); + + for (int i = 0; i < SlotSize; i++) { + if (lastPacketStat[i].len == len && lastPacketStat[i].hash == hash) return 1; + } + + // not found - add to next slot round robin + lastPacketStat[packetSlot].len = len; + lastPacketStat[packetSlot].hash = hash; + packetSlot = (packetSlot + 1) & (SlotSize - 1); + return 0; +} // End of is_duplicate \ No newline at end of file diff --git a/src/nfpcapd/pcaproc.c b/src/nfpcapd/pcaproc.c index 68254e76..f5a8d70f 100644 --- a/src/nfpcapd/pcaproc.c +++ b/src/nfpcapd/pcaproc.c @@ -109,7 +109,7 @@ static inline void ProcessICMPFlow(packetParam_t *packetParam, struct FlowNode * static inline void ProcessOtherFlow(packetParam_t *packetParam, struct FlowNode *NewNode, void *payload, size_t payloadSize); -#include "murmurhash.c" +#include "metrohash.c" pcapfile_t *OpenNewPcapFile(pcap_t *p, char *filename, pcapfile_t *pcapfile) { if (!pcapfile) {