diff --git a/Dockerfile b/Dockerfile index 1f3acd1..06be086 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,9 @@ RUN apt-get update && \ apt-get install -y --no-install-recommends \ build-essential \ curl \ + apt-transport-https \ + ca-certificates \ + gnupg \ default-libmysqlclient-dev \ libpq-dev \ git \ @@ -14,20 +17,15 @@ RUN apt-get update && \ lsof \ imagemagick \ cron && \ + curl -sSL https://dl.google.com/linux/linux_signing_key.pub | apt-key add - && \ + echo "deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list && \ + apt-get update && \ + apt-get install -y google-chrome-stable --no-install-recommends && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* \ /tmp/* \ /var/tmp/* -# Create a symlink to what will be the phantomjs exec path -RUN ln -s /phantomjs-2.1.1-linux-x86_64/bin/phantomjs /bin/phantomjs - -# Set up phantomjs, making sure to check the known good sha256sum -RUN curl -sLo phantomjs.tar.bz2 https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-x86_64.tar.bz2 && \ - bash -l -c '[ "`sha256sum phantomjs.tar.bz2 | cut -f1 -d" "`" = "86dd9a4bf4aee45f1a84c9f61cf1947c1d6dce9b9e8d2a907105da7852460d2f" ]' && \ - tar -jxvf phantomjs.tar.bz2 > /dev/null && \ - rm phantomjs.tar.bz2 - # Datasources should be a persistent volume VOLUME /datasources diff --git a/Gemfile b/Gemfile index 56ab40d..6910c83 100644 --- a/Gemfile +++ b/Gemfile @@ -41,8 +41,7 @@ gem 'enumerize' gem 'mini_magick' gem 'delayed_job_active_record' gem 'nokogiri', '>= 1.8.2' -gem 'capybara' -gem 'poltergeist' +gem 'capybara-selenium' gem 'git' gem 'thin' gem 'carrierwave' @@ -57,7 +56,7 @@ gem 'dotenv' gem 'rack-cors' gem 'rest-client' gem 'rack-protection', '~> 1.5.5' - +gem 'chromedriver-helper' # Or Padrino Edge # gem 'padrino', :github => 'padrino/padrino-framework' diff --git a/Gemfile.lock b/Gemfile.lock index 92eb7a8..2f4f921 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -14,14 +14,22 @@ GEM minitest (~> 5.1) thread_safe (~> 0.3, >= 0.3.4) tzinfo (~> 1.1) + addressable (2.5.2) + public_suffix (>= 2.0.2, < 4.0) + archive-zip (0.11.0) + io-like (~> 0.3.0) arel (6.0.4) builder (3.2.3) - capybara (2.5.0) - mime-types (>= 1.16) - nokogiri (>= 1.3.3) - rack (>= 1.0.0) - rack-test (>= 0.5.4) - xpath (~> 2.0) + capybara (3.1.0) + addressable + mini_mime (>= 0.1.3) + nokogiri (~> 1.8) + rack (>= 1.6.0) + rack-test (>= 0.6.3) + xpath (~> 3.0) + capybara-selenium (0.0.6) + capybara + selenium-webdriver carrierwave (0.10.0) activemodel (>= 3.2.0) activesupport (>= 3.2.0) @@ -29,7 +37,9 @@ GEM mime-types (>= 1.16) childprocess (0.7.1) ffi (~> 1.0, >= 1.0.11) - cliver (0.3.2) + chromedriver-helper (1.2.0) + archive-zip (~> 0.10) + nokogiri (~> 1.8) coderay (1.1.1) daemons (1.2.3) database_cleaner (1.5.1) @@ -176,6 +186,7 @@ GEM multi_xml (>= 0.5.2) i18n (0.8.6) inflecto (0.0.2) + io-like (0.3.0) ipaddress (0.8.2) json (2.1.0) mail (2.6.6) @@ -183,6 +194,7 @@ GEM method_source (0.8.2) mime-types (2.99.3) mini_magick (4.3.6) + mini_mime (1.0.0) mini_portile2 (2.3.0) minitest (5.10.3) moneta (0.8.1) @@ -229,14 +241,11 @@ GEM padrino-core (= 0.14.1.1) padrino-support (0.14.1.1) pg (0.18.4) - poltergeist (1.17.0) - capybara (~> 2.1) - cliver (~> 0.3.1) - websocket-driver (>= 0.2.0) pry (0.10.4) coderay (~> 1.1.0) method_source (~> 0.8.1) slop (~> 3.4) + public_suffix (3.0.2) rack (1.6.10) rack-cors (0.4.1) rack-parser (0.6.1) @@ -302,20 +311,18 @@ GEM unf_ext (0.0.7.4) watir (6.7.3) selenium-webdriver (~> 3.4, >= 3.4.1) - websocket-driver (0.7.0) - websocket-extensions (>= 0.1.0) - websocket-extensions (0.1.3) xml-simple (1.1.5) - xpath (2.1.0) - nokogiri (~> 1.3) + xpath (3.0.0) + nokogiri (~> 1.8) PLATFORMS ruby DEPENDENCIES activerecord - capybara + capybara-selenium carrierwave + chromedriver-helper database_cleaner delayed_job_active_record dotenv @@ -330,7 +337,6 @@ DEPENDENCIES nokogiri (>= 1.8.2) padrino (>= 0.12.5) pg - poltergeist pry rack-cors rack-parser @@ -348,4 +354,4 @@ DEPENDENCIES watir BUNDLED WITH - 1.15.4 + 1.16.1 diff --git a/README.md b/README.md index 727737a..6cbf3c3 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A RESTful API for retrieving the required fields for and filling out the contact Phantom DC has three major functions: * Looking up form fields provided by all members of congress -* Using [PhantomJS](http://phantomjs.org/) to proxy fill-in a congress member's form such that they need not navigate directly to the congress member's web page +* Using [Chrome Headless](https://chromium.googlesource.com/chromium/src/+/lkgr/headless/README.md) to proxy fill-in a congress member's form such that they need not navigate directly to the congress member's web page * It can return any captcha images and forward the user submitted solution to the `.gov` website This project relies on: @@ -115,10 +115,13 @@ $ cd /vagrant; #### Requirements -On a Debian based system (we're testing against **Ubuntu**) download and install the latest [phantomjs](http://phantomjs.org/) and then run the below `apt-get` command. +On a Debian based system (we're testing against **Ubuntu**) download and install the latest [Chrome](https://www.google.com/chrome/) and then run the below `apt-get` command. ```bash -$ apt-get install imagemagick libmysql++-dev libpq-dev git libqt4-dev xvfb +$ wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add - +$ sudo sh -c 'echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' +$ sudo apt-get update +$ apt-get install google-chrome-stable imagemagick libmysql++-dev libpq-dev git libqt4-dev xvfb ``` [Install ruby with rvm](http://rvm.io), then diff --git a/app/helpers/form_fill_helper.rb b/app/helpers/form_fill_helper.rb index 0d5adb4..425677b 100644 --- a/app/helpers/form_fill_helper.rb +++ b/app/helpers/form_fill_helper.rb @@ -1,5 +1,5 @@ module FormFillHelper - def save_screenshot_and_store_poltergeist + def save_screenshot screenshot_location = random_screenshot_location @session.save_screenshot(screenshot_location, full: true) url = store_screenshot_from_location screenshot_location @@ -8,7 +8,7 @@ def save_screenshot_and_store_poltergeist url end - def save_captcha_and_store_poltergeist x, y, width, height + def save_captcha x, y, width, height screenshot_location = random_captcha_location @session.save_screenshot(screenshot_location, full: true) crop_screenshot_from_coords screenshot_location, x, y, width, height diff --git a/config/boot.rb b/config/boot.rb index 8cbe0d6..5871ce1 100644 --- a/config/boot.rb +++ b/config/boot.rb @@ -20,20 +20,29 @@ require file end -require 'capybara/poltergeist' +require 'capybara' +require 'selenium/webdriver' Capybara.run_server = false Capybara.default_max_wait_time = 5 -Capybara.register_driver :poltergeist do |app| - options = { - js_errors: false, - phantomjs_options: ['--ssl-protocol=TLSv1'], - url_blacklist: ENV.fetch('URL_BLACKLIST'){ '' }.split(',') - } +# Switch to this driver if you want to watch Capybara in action. +# Can help with debugging. +Capybara.register_driver :chrome do |app| + Capybara::Selenium::Driver.new(app, browser: :chrome) +end + +# TODO: figure out how to re-implement the url_blacklist +Capybara.register_driver :headless_chrome do |app| + capabilities = Selenium::WebDriver::Remote::Capabilities.chrome( + chromeOptions: { args: %w(headless no-sandbox disable-gpu window-size=1200,1400) } + ) - Capybara::Poltergeist::Driver.new(app, options) + Capybara::Selenium::Driver.new app, + browser: :chrome, + desired_capabilities: capabilities end +Capybara.javascript_driver = :headless_chrome SmartyStreets.configure do |c| c.auth_id = SMARTY_STREETS_ID diff --git a/docker-compose.yml.example b/docker-compose.yml.example index 4f42835..bd010c1 100644 --- a/docker-compose.yml.example +++ b/docker-compose.yml.example @@ -29,7 +29,6 @@ services: command: sh -c 'QUEUES=notifications bundle exec rake jobs:work' - # Uncomment the following lines if you'd like the members of congress to be # updated from the YAML files every 15 minutes. Otherwise you'll have to # manually update them via `rake phantom-dc:update_git` within the container. diff --git a/lib/form_filler/capybara.rb b/lib/form_filler/capybara.rb index 69001f1..fed7fef 100644 --- a/lib/form_filler/capybara.rb +++ b/lib/form_filler/capybara.rb @@ -14,9 +14,10 @@ def initialize(rep, fields, session: nil) end def fill_out(starting_action = nil, &block) - @session ||= Capybara::Session.new(:poltergeist) - @session.driver.options[:js_errors] = false - @session.driver.options[:phantomjs_options] = ['--ssl-protocol=TLSv1'] + @session ||= Capybara::Session.new(:headless_chrome) + # TODO: do we need to turn off js_errors & TLS versions > 1? + #@session.driver.options[:js_errors] = false + #@session.driver.options[:phantomjs_options] = ['--tls1'] form_fill_log("begin") begin @@ -42,14 +43,14 @@ def fill_out(starting_action = nil, &block) form_fill_log("done: #{success ? 'passing' : 'failing'} success criteria") success_hash = {success: success} - success_hash[:screenshot] = save_screenshot_and_store_poltergeist if !success + success_hash[:screenshot] = save_screenshot if !success success_hash rescue Exception => e form_fill_log("done: unsuccessful fill (#{e.class})") Raven.extra_context(backtrace: e.backtrace) message = {success: false, message: e.message, exception: e} - message[:screenshot] = save_screenshot_and_store_poltergeist + message[:screenshot] = save_screenshot raise e ensure @session.driver.quit unless rep.persist_session? @@ -63,7 +64,7 @@ def url_for(action) location ||= @session.driver.evaluate_script( 'document.querySelector("' + action.captcha_selector.gsub('"', '\"') + '").getBoundingClientRect();' ) - save_captcha_and_store_poltergeist( + save_captcha( location["left"], location["top"], location["width"], location["height"] ) end diff --git a/lib/form_filler/capybara_action.rb b/lib/form_filler/capybara_action.rb index 3eb404a..c54b33a 100644 --- a/lib/form_filler/capybara_action.rb +++ b/lib/form_filler/capybara_action.rb @@ -60,27 +60,30 @@ def javascript end def uncheck + scroll_to(selector) @session.find(selector).set(false) end def check + scroll_to(selector) @session.find(selector).set(true) end def click_on + scroll_to(selector) @session.find(selector).click end def choose - if options.nil? - @session.find(selector).set(true) - else - @session.find(element_name(@fields[value], selector)).set(true) - end + element = options.nil? ? selector : element_name(@fields[value], selector) + scroll_to(element) + @session.find(element).set(true) end def find wait_val = DEFAULT_FIND_WAIT_TIME + scroll_to(selector) + if options options_hash = YAML.load options wait_val = options_hash['wait'] || wait_val @@ -112,6 +115,12 @@ def select end def element_name(value, selector = 'option') - selector + '[value="' + value.gsub('"', '\"') + '"]' + %(#{selector}[value='#{value.gsub('"', '\"')}']) + end + + def scroll_to(element) + return unless @session.find(element, visible: false) + + @session.execute_script(%($("#{element}")[0].scrollIntoView(true))) end end diff --git a/setup_dev.sh b/setup_dev.sh index bbd09fc..b7fb953 100755 --- a/setup_dev.sh +++ b/setup_dev.sh @@ -55,10 +55,12 @@ RACK_ENV=test bundle exec rake ar:create ar:schema:load > /dev/null echo "Loading congress members..." bundle exec rake phantom-dc:clone_git[/home/vagrant] > /dev/null -echo "Setting up PhantomJS..." +echo "Setting up Chrome..." cd /home/vagrant -curl -Lo phantomjs.tar.bz2 https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-1.9.8-linux-x86_64.tar.bz2 -tar -jxvf phantomjs.tar.bz2 > /dev/null +wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add - +sudo sh -c 'echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' +sudo apt-get update +sudo apt-get install google-chrome-stable EOF ln -s /home/vagrant/phantomjs-1.9.8-linux-x86_64/bin/phantomjs /usr/bin/phantomjs diff --git a/spec/lib/form_filler_capybara.rb b/spec/lib/form_filler_capybara.rb index 9ba24a6..bafa131 100644 --- a/spec/lib/form_filler_capybara.rb +++ b/spec/lib/form_filler_capybara.rb @@ -12,7 +12,7 @@ end describe "with saved session and action" do - let(:session){ Capybara::Session.new(:poltergeist) } + let(:session){ Capybara::Session.new(:headless_chrome) } let(:congress_member){ create :congress_member } let(:fields) { MOCK_VALUES }