forked from evilmartians/fias
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_index.rb
63 lines (48 loc) · 1.53 KB
/
generate_index.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# Can be run in cloned repo
# DATABASE_URL=postgres://localhost/fias bundle exec ruby generate_index.rb
require 'ruby-progressbar'
require 'sequel'
require 'active_support/core_ext/object/blank'
require 'fias'
def create_bar(count)
ProgressBar.create(total: count, format: '%a |%B| [%E] (%c/%C) %p%%')
end
DB = Sequel.connect(ENV['DATABASE_URL'])
DB.extension :pg_array
ADDRESS_OBJECTS_TABLE_NAME = :address_objects
ADDRESS_OBJECTS = DB[ADDRESS_OBJECTS_TABLE_NAME]
def alter_table
puts 'Adding tokens field...'
DB.alter_table(ADDRESS_OBJECTS_TABLE_NAME) do
add_column :tokens, 'text[]'
add_column :ancestry, 'integer[]'
add_column :forms, 'text[]'
end
DB.run 'CREATE INDEX idx_tokens on "address_objects" USING GIN ("tokens");'
end
def ancestry_for(id)
ADDRESS_OBJECTS
.select(:id)
.join(:address_object_hierarchies, ancestor_id: :id)
.where(address_object_hierarchies__descendant_id: id)
.order(:address_object_hierarchies__generations)
.select_map(:id) - [id]
end
def tokenize
puts 'Generating tokens for search...'
scope = ADDRESS_OBJECTS
bar = create_bar(scope.count)
scope.select(:id, :name).each do |row|
bar.increment
tokens = Fias::Name::Synonyms.tokens(row[:name])
forms = Fias::Name::Synonyms.forms(row[:name])
ancestry = ancestry_for(row[:id])
ADDRESS_OBJECTS.where(id: row[:id]).update(
tokens: Sequel.pg_array(tokens, :text),
forms: Sequel.pg_array(forms, :text),
ancestry: Sequel.pg_array(ancestry, :integer)
)
end
end
alter_table
tokenize