From 43437e67550070b8a2990a642e3d6dcb8b0635d0 Mon Sep 17 00:00:00 2001 From: Shizuo Fujita Date: Fri, 10 Jan 2025 12:30:23 +0900 Subject: [PATCH] output: use Hash.new {|hash, key| ... } for default value (#4764) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit **Which issue(s) this PR fixes**: Fixes # **What this PR does / why we need it**: This will remove evaluating `meta_and_data[meta] ||= []` in the iterator every time to improves performance slightly. The results of actually reading a 10 GB file are as follows. * Before: 67.50262675 sec * After: 63.862033033 sec Here is micro benchmark code. ```ruby require 'bundler/inline' gemfile do source 'https://rubygems.org' gem 'benchmark-ips' end Benchmark.ips do |x| h1 = {} h2 = Hash.new { |h, k| h[k] = [] } key = :foo x.report("1") { h1[key] ||= [] h1[key][0] = 1 } x.report("2") { h2[key][0] = 1 } x.compare! end ``` ``` ruby 3.4.1 (2024-12-25 revision 48d4efcb85) +PRISM [x86_64-linux] Warming up -------------------------------------- 1 1.776M i/100ms 2 2.239M i/100ms Calculating ------------------------------------- 1 18.426M (± 1.5%) i/s (54.27 ns/i) - 92.366M in 5.013927s 2 23.545M (± 0.8%) i/s (42.47 ns/i) - 118.653M in 5.039795s Comparison: 2: 23544712.1 i/s 1: 18426072.0 i/s - 1.28x slower ``` **Docs Changes**: **Release Note**: Signed-off-by: Shizuo Fujita --- lib/fluent/plugin/output.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/fluent/plugin/output.rb b/lib/fluent/plugin/output.rb index c600e35287..d2107330ba 100644 --- a/lib/fluent/plugin/output.rb +++ b/lib/fluent/plugin/output.rb @@ -1036,17 +1036,17 @@ def generate_format_proc # iteration of event stream, and it should be done just once even if total event stream size # is bigger than chunk_limit_size because of performance. def handle_stream_with_custom_format(tag, es, enqueue: false) - meta_and_data = {} + meta_and_data = Hash.new { |h, k| h[k] = [] } records = 0 es.each(unpacker: Fluent::MessagePackFactory.thread_local_msgpack_unpacker) do |time, record| meta = metadata(tag, time, record) - meta_and_data[meta] ||= [] res = format(tag, time, record) if res meta_and_data[meta] << res records += 1 end end + meta_and_data.default_proc = nil write_guard do @buffer.write(meta_and_data, enqueue: enqueue) end @@ -1057,14 +1057,14 @@ def handle_stream_with_custom_format(tag, es, enqueue: false) def handle_stream_with_standard_format(tag, es, enqueue: false) format_proc = generate_format_proc - meta_and_data = {} + meta_and_data = Hash.new { |h, k| h[k] = MultiEventStream.new } records = 0 es.each(unpacker: Fluent::MessagePackFactory.thread_local_msgpack_unpacker) do |time, record| meta = metadata(tag, time, record) - meta_and_data[meta] ||= MultiEventStream.new meta_and_data[meta].add(time, record) records += 1 end + meta_and_data.default_proc = nil write_guard do @buffer.write(meta_and_data, format: format_proc, enqueue: enqueue) end