Skip to content

Commit

Permalink
Replace PhantomJS with Chrome Headless
Browse files Browse the repository at this point in the history
  • Loading branch information
k-stewart committed May 21, 2018
1 parent 84204e9 commit 7ebb2d5
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 61 deletions.
16 changes: 7 additions & 9 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ RUN apt-get update && \
apt-get install -y --no-install-recommends \
build-essential \
curl \
apt-transport-https \
ca-certificates \
gnupg \
default-libmysqlclient-dev \
libpq-dev \
git \
Expand All @@ -14,20 +17,15 @@ RUN apt-get update && \
lsof \
imagemagick \
cron && \
curl -sSL https://dl.google.com/linux/linux_signing_key.pub | apt-key add - && \
echo "deb [arch=amd64] https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list && \
apt-get update && \
apt-get install -y google-chrome-stable --no-install-recommends && \
apt-get clean && \
rm -rf /var/lib/apt/lists/* \
/tmp/* \
/var/tmp/*

# Create a symlink to what will be the phantomjs exec path
RUN ln -s /phantomjs-2.1.1-linux-x86_64/bin/phantomjs /bin/phantomjs

# Set up phantomjs, making sure to check the known good sha256sum
RUN curl -sLo phantomjs.tar.bz2 https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-2.1.1-linux-x86_64.tar.bz2 && \
bash -l -c '[ "`sha256sum phantomjs.tar.bz2 | cut -f1 -d" "`" = "86dd9a4bf4aee45f1a84c9f61cf1947c1d6dce9b9e8d2a907105da7852460d2f" ]' && \
tar -jxvf phantomjs.tar.bz2 > /dev/null && \
rm phantomjs.tar.bz2

# Datasources should be a persistent volume
VOLUME /datasources

Expand Down
5 changes: 2 additions & 3 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,7 @@ gem 'enumerize'
gem 'mini_magick'
gem 'delayed_job_active_record'
gem 'nokogiri', '>= 1.8.2'
gem 'capybara'
gem 'poltergeist'
gem 'capybara-selenium'
gem 'git'
gem 'thin'
gem 'carrierwave'
Expand All @@ -57,7 +56,7 @@ gem 'dotenv'
gem 'rack-cors'
gem 'rest-client'
gem 'rack-protection', '~> 1.5.5'

gem 'chromedriver-helper'

# Or Padrino Edge
# gem 'padrino', :github => 'padrino/padrino-framework'
Expand Down
44 changes: 25 additions & 19 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -14,22 +14,32 @@ GEM
minitest (~> 5.1)
thread_safe (~> 0.3, >= 0.3.4)
tzinfo (~> 1.1)
addressable (2.5.2)
public_suffix (>= 2.0.2, < 4.0)
archive-zip (0.11.0)
io-like (~> 0.3.0)
arel (6.0.4)
builder (3.2.3)
capybara (2.5.0)
mime-types (>= 1.16)
nokogiri (>= 1.3.3)
rack (>= 1.0.0)
rack-test (>= 0.5.4)
xpath (~> 2.0)
capybara (3.1.0)
addressable
mini_mime (>= 0.1.3)
nokogiri (~> 1.8)
rack (>= 1.6.0)
rack-test (>= 0.6.3)
xpath (~> 3.0)
capybara-selenium (0.0.6)
capybara
selenium-webdriver
carrierwave (0.10.0)
activemodel (>= 3.2.0)
activesupport (>= 3.2.0)
json (>= 1.7)
mime-types (>= 1.16)
childprocess (0.7.1)
ffi (~> 1.0, >= 1.0.11)
cliver (0.3.2)
chromedriver-helper (1.2.0)
archive-zip (~> 0.10)
nokogiri (~> 1.8)
coderay (1.1.1)
daemons (1.2.3)
database_cleaner (1.5.1)
Expand Down Expand Up @@ -176,13 +186,15 @@ GEM
multi_xml (>= 0.5.2)
i18n (0.8.6)
inflecto (0.0.2)
io-like (0.3.0)
ipaddress (0.8.2)
json (2.1.0)
mail (2.6.6)
mime-types (>= 1.16, < 4)
method_source (0.8.2)
mime-types (2.99.3)
mini_magick (4.3.6)
mini_mime (1.0.0)
mini_portile2 (2.3.0)
minitest (5.10.3)
moneta (0.8.1)
Expand Down Expand Up @@ -229,14 +241,11 @@ GEM
padrino-core (= 0.14.1.1)
padrino-support (0.14.1.1)
pg (0.18.4)
poltergeist (1.17.0)
capybara (~> 2.1)
cliver (~> 0.3.1)
websocket-driver (>= 0.2.0)
pry (0.10.4)
coderay (~> 1.1.0)
method_source (~> 0.8.1)
slop (~> 3.4)
public_suffix (3.0.2)
rack (1.6.10)
rack-cors (0.4.1)
rack-parser (0.6.1)
Expand Down Expand Up @@ -302,20 +311,18 @@ GEM
unf_ext (0.0.7.4)
watir (6.7.3)
selenium-webdriver (~> 3.4, >= 3.4.1)
websocket-driver (0.7.0)
websocket-extensions (>= 0.1.0)
websocket-extensions (0.1.3)
xml-simple (1.1.5)
xpath (2.1.0)
nokogiri (~> 1.3)
xpath (3.0.0)
nokogiri (~> 1.8)

PLATFORMS
ruby

DEPENDENCIES
activerecord
capybara
capybara-selenium
carrierwave
chromedriver-helper
database_cleaner
delayed_job_active_record
dotenv
Expand All @@ -330,7 +337,6 @@ DEPENDENCIES
nokogiri (>= 1.8.2)
padrino (>= 0.12.5)
pg
poltergeist
pry
rack-cors
rack-parser
Expand All @@ -348,4 +354,4 @@ DEPENDENCIES
watir

BUNDLED WITH
1.15.4
1.16.1
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ A RESTful API for retrieving the required fields for and filling out the contact
Phantom DC has three major functions:

* Looking up form fields provided by all members of congress
* Using [PhantomJS](http://phantomjs.org/) to proxy fill-in a congress member's form such that they need not navigate directly to the congress member's web page
* Using [Chrome Headless](https://chromium.googlesource.com/chromium/src/+/lkgr/headless/README.md) to proxy fill-in a congress member's form such that they need not navigate directly to the congress member's web page
* It can return any captcha images and forward the user submitted solution to the `.gov` website

This project relies on:
Expand Down Expand Up @@ -115,10 +115,13 @@ $ cd /vagrant;

#### Requirements

On a Debian based system (we're testing against **Ubuntu**) download and install the latest [phantomjs](http://phantomjs.org/) and then run the below `apt-get` command.
On a Debian based system (we're testing against **Ubuntu**) download and install the latest [Chrome](https://www.google.com/chrome/) and then run the below `apt-get` command.

```bash
$ apt-get install imagemagick libmysql++-dev libpq-dev git libqt4-dev xvfb
$ wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
$ sudo sh -c 'echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list'
$ sudo apt-get update
$ apt-get install google-chrome-stable imagemagick libmysql++-dev libpq-dev git libqt4-dev xvfb
```

[Install ruby with rvm](http://rvm.io), then
Expand Down
4 changes: 2 additions & 2 deletions app/helpers/form_fill_helper.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
module FormFillHelper
def save_screenshot_and_store_poltergeist
def save_screenshot
screenshot_location = random_screenshot_location
@session.save_screenshot(screenshot_location, full: true)
url = store_screenshot_from_location screenshot_location
Expand All @@ -8,7 +8,7 @@ def save_screenshot_and_store_poltergeist
url
end

def save_captcha_and_store_poltergeist x, y, width, height
def save_captcha x, y, width, height
screenshot_location = random_captcha_location
@session.save_screenshot(screenshot_location, full: true)
crop_screenshot_from_coords screenshot_location, x, y, width, height
Expand Down
25 changes: 17 additions & 8 deletions config/boot.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,29 @@
require file
end

require 'capybara/poltergeist'
require 'capybara'
require 'selenium/webdriver'
Capybara.run_server = false
Capybara.default_max_wait_time = 5

Capybara.register_driver :poltergeist do |app|
options = {
js_errors: false,
phantomjs_options: ['--ssl-protocol=TLSv1'],
url_blacklist: ENV.fetch('URL_BLACKLIST'){ '' }.split(',')
}
# Switch to this driver if you want to watch Capybara in action.
# Can help with debugging.
Capybara.register_driver :chrome do |app|
Capybara::Selenium::Driver.new(app, browser: :chrome)
end

# TODO: figure out how to re-implement the url_blacklist
Capybara.register_driver :headless_chrome do |app|
capabilities = Selenium::WebDriver::Remote::Capabilities.chrome(
chromeOptions: { args: %w(headless no-sandbox disable-gpu window-size=1200,1400) }
)

Capybara::Poltergeist::Driver.new(app, options)
Capybara::Selenium::Driver.new app,
browser: :chrome,
desired_capabilities: capabilities
end

Capybara.javascript_driver = :headless_chrome

SmartyStreets.configure do |c|
c.auth_id = SMARTY_STREETS_ID
Expand Down
1 change: 0 additions & 1 deletion docker-compose.yml.example
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ services:

command: sh -c 'QUEUES=notifications bundle exec rake jobs:work'


# Uncomment the following lines if you'd like the members of congress to be
# updated from the YAML files every 15 minutes. Otherwise you'll have to
# manually update them via `rake phantom-dc:update_git` within the container.
Expand Down
13 changes: 7 additions & 6 deletions lib/form_filler/capybara.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,10 @@ def initialize(rep, fields, session: nil)
end

def fill_out(starting_action = nil, &block)
@session ||= Capybara::Session.new(:poltergeist)
@session.driver.options[:js_errors] = false
@session.driver.options[:phantomjs_options] = ['--ssl-protocol=TLSv1']
@session ||= Capybara::Session.new(:headless_chrome)
# TODO: do we need to turn off js_errors & TLS versions > 1?
#@session.driver.options[:js_errors] = false
#@session.driver.options[:phantomjs_options] = ['--tls1']
form_fill_log("begin")

begin
Expand All @@ -42,14 +43,14 @@ def fill_out(starting_action = nil, &block)
form_fill_log("done: #{success ? 'passing' : 'failing'} success criteria")

success_hash = {success: success}
success_hash[:screenshot] = save_screenshot_and_store_poltergeist if !success
success_hash[:screenshot] = save_screenshot if !success
success_hash
rescue Exception => e
form_fill_log("done: unsuccessful fill (#{e.class})")
Raven.extra_context(backtrace: e.backtrace)

message = {success: false, message: e.message, exception: e}
message[:screenshot] = save_screenshot_and_store_poltergeist
message[:screenshot] = save_screenshot
raise e
ensure
@session.driver.quit unless rep.persist_session?
Expand All @@ -63,7 +64,7 @@ def url_for(action)
location ||= @session.driver.evaluate_script(
'document.querySelector("' + action.captcha_selector.gsub('"', '\"') + '").getBoundingClientRect();'
)
save_captcha_and_store_poltergeist(
save_captcha(
location["left"], location["top"], location["width"], location["height"]
)
end
Expand Down
21 changes: 15 additions & 6 deletions lib/form_filler/capybara_action.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,27 +60,30 @@ def javascript
end

def uncheck
scroll_to(selector)
@session.find(selector).set(false)
end

def check
scroll_to(selector)
@session.find(selector).set(true)
end

def click_on
scroll_to(selector)
@session.find(selector).click
end

def choose
if options.nil?
@session.find(selector).set(true)
else
@session.find(element_name(@fields[value], selector)).set(true)
end
element = options.nil? ? selector : element_name(@fields[value], selector)
scroll_to(element)
@session.find(element).set(true)
end

def find
wait_val = DEFAULT_FIND_WAIT_TIME
scroll_to(selector)

if options
options_hash = YAML.load options
wait_val = options_hash['wait'] || wait_val
Expand Down Expand Up @@ -112,6 +115,12 @@ def select
end

def element_name(value, selector = 'option')
selector + '[value="' + value.gsub('"', '\"') + '"]'
%(#{selector}[value='#{value.gsub('"', '\"')}'])
end

def scroll_to(element)
return unless @session.find(element, visible: false)

@session.execute_script(%($("#{element}")[0].scrollIntoView(true)))
end
end
8 changes: 5 additions & 3 deletions setup_dev.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ RACK_ENV=test bundle exec rake ar:create ar:schema:load > /dev/null
echo "Loading congress members..."
bundle exec rake phantom-dc:clone_git[/home/vagrant] > /dev/null
echo "Setting up PhantomJS..."
echo "Setting up Chrome..."
cd /home/vagrant
curl -Lo phantomjs.tar.bz2 https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-1.9.8-linux-x86_64.tar.bz2
tar -jxvf phantomjs.tar.bz2 > /dev/null
wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | sudo apt-key add -
sudo sh -c 'echo "deb http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list'
sudo apt-get update
sudo apt-get install google-chrome-stable
EOF

ln -s /home/vagrant/phantomjs-1.9.8-linux-x86_64/bin/phantomjs /usr/bin/phantomjs
Expand Down
2 changes: 1 addition & 1 deletion spec/lib/form_filler_capybara.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
end

describe "with saved session and action" do
let(:session){ Capybara::Session.new(:poltergeist) }
let(:session){ Capybara::Session.new(:headless_chrome) }
let(:congress_member){ create :congress_member }
let(:fields) { MOCK_VALUES }

Expand Down

0 comments on commit 7ebb2d5

Please sign in to comment.