Skip to content

Commit

Permalink
chore: refactor text sanitization in hex data parsing (#2117)
Browse files Browse the repository at this point in the history
  • Loading branch information
rabbitz authored Aug 2, 2024
1 parent 8922012 commit ac9ebcc
Showing 1 changed file with 16 additions and 20 deletions.
36 changes: 16 additions & 20 deletions app/utils/ckb_utils.rb
Original file line number Diff line number Diff line change
Expand Up @@ -614,27 +614,17 @@ def self.hexes_to_bins_sql(hex_strings)
end

def self.parse_spore_cluster_data(hex_data)
safe_encode = Proc.new do |str|
str.force_encoding("UTF-8").encode("UTF-8", invalid: :replace, undef: :replace, replace: "")
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
""
end
data = hex_data.slice(2..-1)
name_offset = [data.slice(8, 8)].pack("H*").unpack1("l") * 2
description_offset = [data.slice(16, 8)].pack("H*").unpack1("l") * 2
name = [data.slice(name_offset + 8..description_offset - 1)].pack("H*")
description = [data.slice(description_offset + 8..-1)].pack("H*")
name = "#{name[0, 97]}..." if name.length > 100

begin
data = hex_data.slice(2..-1)
name_offset = [data.slice(8, 8)].pack("H*").unpack1("l") * 2
description_offset = [data.slice(16, 8)].pack("H*").unpack1("l") * 2
name = [data.slice(name_offset + 8..description_offset - 1)].pack("H*")
description = [data.slice(description_offset + 8..-1)].pack("H*")
name = "#{name[0, 97]}..." if name.length > 100
name = safe_encode.call(name)
description = safe_encode.call(description)

{ name:, description: }
rescue StandardError => e
puts "Error parsing spore cluster data: #{e.message}"
{ name: nil, description: nil }
end
{ name: sanitize_string(name), description: sanitize_string(description) }
rescue StandardError => e
puts "Error parsing spore cluster data: #{e.message}"
{ name: nil, description: nil }
end

def self.parse_spore_cell_data(hex_data)
Expand Down Expand Up @@ -772,4 +762,10 @@ def self.parse_unique_cell(hex_data)
symbol = [data.slice!(0, symbol_len * 2)].pack("H*")
{ decimal:, name: name.presence, symbol: symbol.presence }
end

def self.sanitize_string(str)
str.force_encoding("UTF-8").encode("UTF-8", invalid: :replace, undef: :replace, replace: "").gsub(/[[:cntrl:]\u2028\u2029\u200B]/, "")
rescue Encoding::UndefinedConversionError, Encoding::InvalidByteSequenceError
""
end
end

0 comments on commit ac9ebcc

Please sign in to comment.