diff --git a/ext/fiparse/src/fiparse.rb b/ext/fiparse/src/fiparse.rb index 6f7af3d7..2455bf41 100644 --- a/ext/fiparse/src/fiparse.rb +++ b/ext/fiparse/src/fiparse.rb @@ -177,30 +177,29 @@ def parse_fachinfo_docx(path, iksnr, lang='de') writer.format = :documed writer.extract(Hpricot(doc.to_html(true)), :fi) end - def parse_fachinfo_html(src, format = :documed, title='', styles = nil) + def parse_fachinfo_html(src, title, styles, image_folder) lang = (src =~ /\/de\// ? 'de' : 'fr') if File.exist?(src) src = File.read src end writer = FachinfoHpricot.new - # swissmedicinfo - writer.format = format + writer.format = :swissmedicinfo writer.title = title writer.lang = lang + writer.image_folder = image_folder writer.extract(Hpricot(src), :fi, title, styles) end - def parse_patinfo_html(src, format=:documed, title='', styles = nil, image_folder = nil) + def parse_patinfo_html(src, title, styles, image_folder) lang = (src =~ /\/de\// ? 'de' : 'fr') if File.exist?(src) src = File.read src end writer = PatinfoHpricot.new - writer.format = format + writer.format = :swissmedicinfo writer.title = title writer.lang = lang writer.image_folder = image_folder writer.extract(Hpricot(src), :pi, title, styles) - # swissmedicinfo end module_function :storage= module_function :parse_fachinfo_docx diff --git a/ext/fiparse/src/textinfo_hpricot.rb b/ext/fiparse/src/textinfo_hpricot.rb index de561b77..fe19fb12 100644 --- a/ext/fiparse/src/textinfo_hpricot.rb +++ b/ext/fiparse/src/textinfo_hpricot.rb @@ -295,19 +295,18 @@ def handle_image(ptr, child) if src =~ /^data:image\/(jp[e]?g|gif|png|x-[ew]mf);base64($|,)/ ptr.target.style = child[:style] ext = $1 - folder = @image_folder || ((@title || @name).to_s[0,100]) - name_base = File.basename(folder.to_s.gsub(/®/, '').gsub(/[^A-z0-9]/, '_')).strip - file_name = File.join(name_base + '_files', "#{@image_index.to_s}.#{ext}") + file_name = File.join(@image_folder || @title, "#{@image_index.to_s}.#{ext}") lang = (@lang || 'de') end + dir = File.join('/', 'resources', 'images') else file_name = File.basename(child[:src]. gsub(' ',''). gsub(/\?px=[0-9]*$/, '').strip) lang = (file_name[0].upcase == 'F' ? 'fr' : 'de') unless file_name.empty? + type = (self.is_a?(ODDB::FiParse::FachinfoHpricot) ? 'fi' : 'pi') + dir = File.join('/', 'resources', 'images', type, lang) end - type = (self.is_a?(ODDB::FiParse::FachinfoHpricot) ? 'fi' : 'pi') - dir = File.join('/', 'resources', 'images', type, lang) ptr.target.src = File.join(dir, file_name) end def insert_image(ptr, child) diff --git a/ext/fiparse/test/test_fachinfo_hpricot.rb b/ext/fiparse/test/test_fachinfo_hpricot.rb index 634edee9..5754ff7a 100755 --- a/ext/fiparse/test/test_fachinfo_hpricot.rb +++ b/ext/fiparse/test/test_fachinfo_hpricot.rb @@ -76,6 +76,7 @@ def test_more_line_breaks_in_table ) writer = FachinfoHpricot.new + writer.image_folder = "fiImageFolder_#{__LINE__}" code, chapter = writer.chapter(Hpricot(html).at("table")) @lookandfeel = FlexMock.new 'lookandfeel' @lookandfeel.should_receive(:section_style).and_return { 'section_style' } @@ -719,11 +720,18 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path('data/html/de/fi_62580_novartis_seebris.de.html', File.dirname(__FILE__)) @@writer = FachinfoHpricot.new + @@writer.image_folder = "Seebri_Breezhaler" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName) } end + def test_parse_fachinfo_html_with_image_dir + res = FiParse::parse_fachinfo_html(@@path, titles='dummy', styles = nil, 'fiImageFolder') + # File.open('tst_fi.yaml', 'w+' ) { |out| YAML.dump(res, out, line_width: -1 )} + assert(res.to_yaml.index('/resources/images/fiImageFolder/3.png'), 'Must have image nr 3 in fiImageFolder') + end + def test_name2 assert_equal(MedicInfoName, @@fachinfo.name.to_s) end @@ -753,12 +761,12 @@ def test_firmenlogo assert(@@fachinfo.galenic_form.to_s.index('Firmenlogo')) assert(@@fachinfo.effects.to_s.index('(image)'), 'Wirkungen muss Bild enthalten') assert(@@fachinfo.galenic_form.to_s.index('(image)'), 'galenic_form must have an image') - assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/5.png'), 'Must have image nr 5') - assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/4.png'), 'Must have image nr 4') - assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/3.png'), 'Must have image nr 3') + assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/5.png'), 'Must have image nr 5') + assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/4.png'), 'Must have image nr 4') + assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/3.png'), 'Must have image nr 3') assert(@@fachinfo.galenic_form.to_s.index('(image)'), 'Zusamensetzung muss Bild enthalten') - assert(@@fachinfo.to_yaml.index('/resources/images/fi/de/_Seebri_Breezhaler_files/1.x-wmf'), 'Must have image nr 1') + assert(@@fachinfo.to_yaml.index('/resources/images/Seebri_Breezhaler/1.x-wmf'), 'Must have image nr 1') end def test_iksnrs @@ -808,6 +816,7 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path('data/html/de/fi_62184_cipralex_de.html', File.dirname(__FILE__)) @@writer = FachinfoHpricot.new + @@writer.image_folder = "fiImageFolder_#{__LINE__}" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Cipralex) @@ -892,6 +901,7 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path(HtmlName, File.dirname(__FILE__)) @@writer = FachinfoHpricot.new + @@writer.image_folder = "fiImageFolder_#{__LINE__}" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Isentres) @@ -990,6 +1000,7 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path(HtmlName, File.dirname(__FILE__)) @@writer = FachinfoHpricot.new + @@writer.image_folder = "fiImageFolder_#{__LINE__}" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Clexane) @@ -1075,6 +1086,7 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path(HtmlName, File.dirname(__FILE__)) @@writer = FachinfoHpricot.new + @@writer.image_folder = "fiImageFolder_#{__LINE__}" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, StylesPonstan) @@ -1114,6 +1126,7 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path(HtmlName, File.dirname(__FILE__)) @@writer = FachinfoHpricot.new + @@writer.image_folder = "fiImageFolder_#{__LINE__}" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, Styles_Baraclude) @@ -1172,6 +1185,7 @@ def setup return if defined?(@@path) and defined?(@@fachinfo) and @@fachinfo @@path = File.expand_path(HtmlName, File.dirname(__FILE__)) @@writer = ODDB::FiParse::FachinfoHpricot.new + @@writer.image_folder = "fiImageFolder_#{__LINE__}" open(@@path) { |fh| @@fachinfo = @@writer.extract(Hpricot(fh), :fi, MedicInfoName, StylesCoAprovel) } diff --git a/src/plugin/text_info.rb b/src/plugin/text_info.rb index 6aba2a1e..0fb708c6 100644 --- a/src/plugin/text_info.rb +++ b/src/plugin/text_info.rb @@ -57,7 +57,6 @@ def initialize app, opts={:newest => true} @news_log = File.join ODDB.config.log_dir, 'textinfos.txt' @problematic_fi_pi = File.join ODDB.config.log_dir, 'problematic_fi_pi.lst' @title = '' # target fi/pi name - @format = :swissmedicinfo @target = :both @search_term = [] # FI/PI names @@ -371,6 +370,7 @@ def update_patinfo_lang(meta_info, pis) return end end + # return unless @options[:reparse] && @options[:newest] if pis.size != 1 || !pis.values.first LogFile.debug "We expect pis.size to be 1 and valid, but it is #{pis}" @@ -601,7 +601,7 @@ def report res << "\nNo need to add anything to #{Override_file}" else res << "\n#{Override_file}: The #{@missing_override.size} missing overrides are\n" - res << @missing_override.join("\n") + res << @missing_override.collect{ | key, value | "#{key} #{value}"}.join("\n") end File.open(Override_file, 'w+' ) { |out| YAML.dump(@specify_barcode_to_text_info.merge(@missing_override), out, line_width: -1 )} res @@ -824,7 +824,11 @@ def TextInfoPlugin.find_iksnr_in_string(string, iksnr) end def download_swissmedicinfo_xml(file = nil) - return IO.read(file) if file + if file + content = IO.read(file) + LogFile.debug("Read #{content.size} bytes from #{file}") + return content + end setup_default_agent url = "http://download.swissmedicinfo.ch/Accept.aspx?ReturnUrl=%2f" dir = File.join(ODDB.config.data_dir, 'xml') @@ -906,26 +910,22 @@ def match(node_set, iksnr) @notfound << " IKSNR-not found #{iksnr.inspect} : #{type} - #{lang.to_s}." return name end - def extract_image(html_file, name, type, lang, iksnrs) - LogFile.debug "Extracting image to #{name}" + def extract_images(html_file, type, lang, iksnrs, image_folder) if html_file && File.exist?(html_file) - resource_dir = (File.join(ODDB::IMAGE_DIR, type.to_s, lang.to_s)) - FileUtils.mkdir_p(resource_dir) html = File.open(html_file, 'r:utf-8').read if html =~ / textinfo_fi } ) elsif type == :pi # TODO: Do we really catch all the cases when packages have different PIs? if is_same_html && !@options[:reparse] && reg && text_info && text_info.descriptions.keys.index(meta_info.lang) - LogFile.debug "parse_textinfo #{__LINE__} at #{nr_uptodate}: #{type} #{html_name} is_same_html #{html_name}" + LogFile.debug "at #{nr_uptodate}: #{type} #{html_name} is_same_html #{html_name}" @up_to_date_pis += 1 return end - textinfo_pi = @parser.parse_patinfo_html(html_name, @format, meta_info.title, styles, image_folder) + textinfo_pi = @parser.parse_patinfo_html(html_name, meta_info.title, styles, image_subfolder) update_patinfo_lang(meta_info, { meta_info.lang => textinfo_pi } ) if textinfo_pi.respond_to?(:name) textinfo_pi_name = textinfo_pi.name @@ -1305,19 +1307,8 @@ def parse_textinfo(meta_info) end # Extract image to path generated from XML title, # This should be the "correct" path - extract_image(html_name, image_folder, meta_info.type, meta_info.lang, meta_info.authNrs) - # However, ODBA is always buggy, sometimes it just doesn't like saving objects #231 - # There's case which the Html pointed the image to a wrong path, and we cannot update - # the HTML because ODBA's problem, so here we extract image to path generated from the wrong H1 title, - if !textinfo_pi_name.nil? - begin - extract_image(html_name, textinfo_pi_name.to_s[0,100], meta_info.type, meta_info.lang, meta_info.authNrs) - rescue => error - LogFile.debug "#236 #{error}" - # Sometimes it gets file name too long error #236 - end - end - LogFile.debug "parse_textinfo #{__LINE__} at #{nr_uptodate}: #{type} textinfo #{textinfo.to_s.split("\n")[0..2]}" if self.respond_to?(:textinfo) + extract_images(html_name, meta_info.type, meta_info.lang, meta_info.authNrs, File.join(image_base, image_subfolder)) + LogFile.debug "at #{nr_uptodate}: #{type} textinfo #{textinfo.to_s.split("\n")[0..2]}" if self.respond_to?(:textinfo) if reg reg.odba_store textinfo = nil diff --git a/src/state/admin/patinfo_pdf.rb b/src/state/admin/patinfo_pdf.rb index 2c18ce24..7f5bf8da 100644 --- a/src/state/admin/patinfo_pdf.rb +++ b/src/state/admin/patinfo_pdf.rb @@ -64,7 +64,7 @@ def get_patinfo_input(input) newstate end def parse_patinfo(src) - HTML_PARSER.parse_patinfo_html(src) + HTML_PARSER.parse_patinfo_html(src, :documed, '', nil, 'admin') rescue StandardError => e msg = ' (' << e.message << ')' err = create_error(:e_html_not_parsed, :html_upload, msg) diff --git a/test/test_plugin/text_info.rb b/test/test_plugin/text_info.rb index cda6bc20..c503dd8d 100755 --- a/test/test_plugin/text_info.rb +++ b/test/test_plugin/text_info.rb @@ -173,6 +173,7 @@ def setup path_check = File.expand_path(File.join(File.dirname(__FILE__), '../../etc', 'barcode_minitest.yml')) assert_equal(ODDB::TextInfoPlugin::Override_file, path_check) FileUtils.rm_f(path_check, :verbose => true) + FileUtils.rm_f(File.expand_path('../data/'), :verbose => true) pointer = flexmock 'pointer' @aips_download = File.expand_path('../data/xml/Aips_test.xml', File.dirname(__FILE__)) latest_from = File.expand_path('../data/xlsx/Packungen-latest.xlsx', File.dirname(__FILE__)) diff --git a/test/test_plugin/text_info_swissmedicinfo.rb b/test/test_plugin/text_info_swissmedicinfo.rb index c6cf57cc..5be23ef5 100755 --- a/test/test_plugin/text_info_swissmedicinfo.rb +++ b/test/test_plugin/text_info_swissmedicinfo.rb @@ -459,8 +459,7 @@ def test_import_patinfo_tramal_43788 @opts[:target] = :pi @plugin = TextInfoPlugin.new(@app, @opts) agent = @plugin.init_agent - # @app.create_registration('43788') - patinfo = setup_texinfo_mock(:patinfo) + patinfo = Patinfo.new @parser.should_receive(:parse_fachinfo_html).never @parser.should_receive(:parse_patinfo_html).and_return(patinfo).at_least.once @parser.should_receive(:parse_textinfo).never @@ -489,7 +488,7 @@ def test_import_patinfo_tramal_43788 assert(@plugin.import_swissmedicinfo(@opts), 'must be able to run import_swissmedicinfo') end assert(File.exist?(@plugin.problematic_fi_pi), "Datei #{ @plugin.problematic_fi_pi} must exist") - path = File.join(File.dirname(__FILE__), '../../doc/resources/images/pi/de/43788Tramal_Tropfen__L_sung_zum_Einnehmen_files/1.png') + path = File.join(File.dirname(__FILE__), '../../doc/resources/images/pi/de/43788_Tramal__Tr/1.png') assert(File.exist?(path), "Created image file #{path} must exist") @app.registration('15219').packages.size @app.registration('15219').packages.values.find_all { |x| x.patinfo} @@ -497,9 +496,7 @@ def test_import_patinfo_tramal_43788 end def test_import_fachinfo_tramal_43788 - fachinfo = setup_texinfo_mock(:fachinfo) - @parser.should_receive(:parse_patinfo_html).never - @parser.should_receive(:parse_fachinfo_html).at_least.once.and_return { fachinfo } + fachinfo = Fachinfo.new info = { :iksnr => '43788', :title => 'Tramal, Tropfen' } info = flexmock('info 43788') info.should_receive(:iksnr).and_return('43788') @@ -510,6 +507,7 @@ def test_import_fachinfo_tramal_43788 @app.registration('43788').company = Aut_43788 setup_refdata_mock + @parser.should_receive(:parse_fachinfo_html) replace_constant('ODDB::RefdataPlugin::REFDATA_SERVER', @server) do @opts[:target] = :fi assert(@plugin.import_swissmedicinfo(@opts), 'must be able to run import_swissmedicinfo')