Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(client): Implement RawRequestBuilder for raw HTTP requests #257

Merged
merged 9 commits into from
Aug 31, 2024
352 changes: 264 additions & 88 deletions src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,264 @@
self.issue(Cmd::Persist).await?;
Ok(())
}

/// Create a new raw request builder.
jonhoo marked this conversation as resolved.
Show resolved Hide resolved
///
/// This method allows to build a direct HTTP request to a remote site without routing
/// through the WebDriver host. It preserves the cookies and user agent from the current
/// WebDriver session, enabling you to maintain the session context while making external
/// requests.
///
/// This can be useful for operations where direct access is needed or when
/// interacting with third-party services that require the same session cookies.
pub fn raw_request(
&self,
) -> RawRequestBuilder<

Check warning on line 179 in src/client.rs

View workflow job for this annotation

GitHub Actions / stable / clippy

[clippy] reported by reviewdog 🐶 warning: very complex type used. Consider factoring parts into `type` definitions --> src/client.rs:179:10 | 179 | ) -> RawRequestBuilder< | __________^ 180 | | '_, 181 | | fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>, 182 | | > { | |_____^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity = note: `#[warn(clippy::type_complexity)]` on by default Raw Output: src/client.rs:179:10:w:warning: very complex type used. Consider factoring parts into `type` definitions --> src/client.rs:179:10 | 179 | ) -> RawRequestBuilder< | __________^ 180 | | '_, 181 | | fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>, 182 | | > { | |_____^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity = note: `#[warn(clippy::type_complexity)]` on by default __END__

Check warning on line 179 in src/client.rs

View workflow job for this annotation

GitHub Actions / beta / clippy

[clippy] reported by reviewdog 🐶 warning: very complex type used. Consider factoring parts into `type` definitions --> src/client.rs:179:10 | 179 | ) -> RawRequestBuilder< | __________^ 180 | | '_, 181 | | fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>, 182 | | > { | |_____^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity = note: `#[warn(clippy::type_complexity)]` on by default Raw Output: src/client.rs:179:10:w:warning: very complex type used. Consider factoring parts into `type` definitions --> src/client.rs:179:10 | 179 | ) -> RawRequestBuilder< | __________^ 180 | | '_, 181 | | fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>, 182 | | > { | |_____^ | = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#type_complexity = note: `#[warn(clippy::type_complexity)]` on by default __END__
'_,
fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
> {
RawRequestBuilder::new(self)
}
}

/// A builder for constructing raw HTTP requests with optional cookies.
///
/// ```no_run
/// # use fantoccini::{ClientBuilder, Locator};
/// # #[tokio::main]
/// # async fn main() -> Result<(), fantoccini::error::CmdError> {
/// # #[cfg(all(feature = "native-tls", not(feature = "rustls-tls")))]
/// # let client = ClientBuilder::native().connect("http://localhost:4444").await.expect("failed to connect to WebDriver");
/// # #[cfg(feature = "rustls-tls")]
/// # let client = ClientBuilder::rustls().expect("rustls initialization").connect("http://localhost:4444").await.expect("failed to connect to WebDriver");
/// # #[cfg(all(not(feature = "native-tls"), not(feature = "rustls-tls")))]
/// # let client: fantoccini::Client = unreachable!("no tls provider available");
/// // go back to the frontpage
/// client.goto("https://www.wikipedia.org/").await?;
/// // find the source for the Wikipedia globe
/// let img = client.find(Locator::Css("img.central-featured-logo")).await?;
/// let src = img.attr("src").await?.expect("image should have a src");
/// // now build a raw HTTP client request
/// // we could just use client.raw_client_for() here,
/// // but let's use the builder to show how it works:
/// let mut builder = client.raw_request();
/// builder.method(hyper::Method::GET).url(&src);
/// // we don't need cookies for this request
/// builder.skip_cookie_navigation();
/// let raw = builder.send().await?;
///
/// // we then read out the image bytes
/// use futures_util::TryStreamExt;
/// use http_body_util::BodyExt;
/// let pixels = raw
/// .into_body()
/// .collect()
/// .await
/// .map_err(fantoccini::error::CmdError::from)?
/// .to_bytes();
/// // and voilla, we now have the bytes for the Wikipedia logo!
/// assert!(pixels.len() > 0);
/// println!("Wikipedia logo is {}b", pixels.len());
/// # client.close().await
/// # }
/// ```
#[derive(Clone, Debug)]
pub struct RawRequestBuilder<'a, F> {
client: &'a Client,
method: Method,
url: String,
cookie_url: Option<String>,
request_modifier: F,
}

fn empty_body(
req: http::request::Builder,
) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>> {
req.body(BoxBody::new(http_body_util::Empty::new()))
.unwrap()
}

impl<'a>
RawRequestBuilder<
'a,
fn(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
>
{
/// Create a new raw request builder.
pub fn new(client: &'a Client) -> Self {
RawRequestBuilder {
client,
method: Method::GET,
url: String::new(),
cookie_url: Some("/please_give_me_your_cookies".to_string()),
request_modifier: empty_body,
}
}
}

impl<'a, F> RawRequestBuilder<'a, F> {
/// Set the HTTP method for the request.
pub fn method(&mut self, method: Method) -> &mut Self {
self.method = method;
self
}

/// Set the URL for the request.
pub fn url(&mut self, url: &str) -> &mut Self {
self.url = url.to_string();
self
}

/// Set the URL for retrieving cookies.
jonhoo marked this conversation as resolved.
Show resolved Hide resolved
///
/// The WebDriver specification requires that cookies can only be retrieved or set for the
/// current domain of the active WebDriver session. This method sets a `cookie_url` which
/// the WebDriver client will navigate to in order to retrieve the cookies needed for
/// the raw HTTP request.
///
/// This approach is necessary due to the WebDriver limitation discussed in
/// [w3c/webdriver#1238](https://github.com/w3c/webdriver/issues/1238),
/// which prevents setting cookies for a domain that the WebDriver is not currently on.
///
/// By setting this URL, you can ensure that the appropriate cookies are included in the
/// raw HTTP request. This can be particularly useful for scenarios where you need to
/// reuse cookies from a previous session to avoid redundant login operations or share
/// WebDriver sessions across different threads with distinct cookies.
///
/// - [Issue #148](https://github.com/jonhoo/fantoccini/issues/148)
pub fn cookie_url(&mut self, url: &str) -> &mut Self {
self.cookie_url = Some(url.to_string());
self
}

/// Opt out of the cookie navigation process.
///
/// This allows to skip the navigation to a cookie URL, if you don't want to retrieve cookies.
pub fn skip_cookie_navigation(&mut self) -> &mut Self {
self.cookie_url = None;
self
}
}

impl<'a, F> RawRequestBuilder<'a, F> {
/// Set a function to modify the request.
pub fn map_request<F2>(self, f: F2) -> RawRequestBuilder<'a, F2>
where
F2: FnOnce(
http::request::Builder,
) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
{
RawRequestBuilder {
client: self.client,
method: self.method,
url: self.url,
cookie_url: self.cookie_url,
request_modifier: f,
}
}
}

impl<'a, F> RawRequestBuilder<'a, F>
where
F: FnOnce(http::request::Builder) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
{
/// Send the constructed request.
pub async fn send(self) -> Result<hyper::Response<hyper::body::Incoming>, error::CmdError> {
// We need to do some trickiness here. GetCookies will only give us the cookies for the
// *current* domain, whereas we want the cookies for `url`'s domain. So, we navigate to the
// URL in question, fetch its cookies, and then navigate back. *Except* that we can't do
// that either (what if `url` is some huge file?). So we *actually* navigate to some weird
// url that's unlikely to exist on the target domain, and which won't resolve into the
// actual content, but will still give the same cookies.
//
// The fact that cookies can have /path and security constraints makes this even more of a
// pain. /path in particular is tricky, because you could have a URL like:
//
// example.com/download/some_identifier/ignored_filename_just_for_show
//
// Imagine if a cookie is set with path=/download/some_identifier. How do we get that
// cookie without triggering a request for the (large) file? I don't know. Hence: TODO.
//
let cookies = if let Some(cookie_url) = self.cookie_url {
let current_url = self.client.current_url_().await?;
let cookie_url = current_url.join(&cookie_url)?;
self.client.goto(cookie_url.as_str()).await?;

let cookies = self.client.issue(WebDriverCommand::GetCookies).await?;
self.client.back().await?;

if !cookies.is_array() {
return Err(error::CmdError::NotW3C(cookies));
}

// now add all the cookies
let mut all_ok = true;
let mut jar = Vec::new();
for cookie in cookies.as_array().unwrap() {
if !cookie.is_object() {
all_ok = false;
break;
}

// https://w3c.github.io/webdriver/webdriver-spec.html#cookies
let cookie = cookie.as_object().unwrap();
if !cookie.contains_key("name") || !cookie.contains_key("value") {
all_ok = false;
break;
}

if !cookie["name"].is_string() || !cookie["value"].is_string() {
all_ok = false;
break;
}

// Note that since we're sending these cookies, all that matters is the mapping
// from name to value. The other fields only matter when deciding whether to
// include a cookie or not, and the driver has already decided that for us
// (GetCookies is for a particular URL).
jar.push(
cookie::Cookie::new(
cookie["name"].as_str().unwrap().to_owned(),
cookie["value"].as_str().unwrap().to_owned(),
)
.encoded()
.to_string(),
);
}

if !all_ok {
return Err(error::CmdError::NotW3C(cookies));
}

Some(jar.join("; "))
} else {
None
};

let mut req = hyper::Request::builder();
req = req
.method(self.method)
.uri(http::Uri::try_from(self.url.as_str()).unwrap());

if let Some(cookies) = cookies {
req = req.header(hyper::header::COOKIE, cookies);
}

let ua = self.client.get_ua().await?;
if let Some(ua) = ua {
req = req.header(hyper::header::USER_AGENT, ua);
}

let req = (self.request_modifier)(req);

let (tx, rx) = oneshot::channel();
self.client.issue(Cmd::Raw { req, rsp: tx }).await?;
match rx.await {
Ok(Ok(r)) => Ok(r),
Ok(Err(e)) => Err(e.into()),
Err(e) => unreachable!("Session ended prematurely: {:?}", e),
}
}
}

// NOTE: new impl block to keep related methods together.
Expand Down Expand Up @@ -903,11 +1161,9 @@
method: Method,
url: &str,
) -> Result<hyper::Response<hyper::body::Incoming>, error::CmdError> {
self.with_raw_client_for(method, url, |req| {
req.body(BoxBody::new(http_body_util::Empty::new()))
.unwrap()
})
.await
let mut builder = self.raw_request();
builder.method(method).url(url);
builder.send().await
}

/// Build and issue an HTTP request to the given `url` with all the same cookies as the current
Expand All @@ -926,89 +1182,9 @@
http::request::Builder,
) -> hyper::Request<BoxBody<hyper::body::Bytes, Infallible>>,
{
let url = url.to_owned();
// We need to do some trickiness here. GetCookies will only give us the cookies for the
// *current* domain, whereas we want the cookies for `url`'s domain. So, we navigate to the
// URL in question, fetch its cookies, and then navigate back. *Except* that we can't do
// that either (what if `url` is some huge file?). So we *actually* navigate to some weird
// url that's unlikely to exist on the target domain, and which won't resolve into the
// actual content, but will still give the same cookies.
//
// The fact that cookies can have /path and security constraints makes this even more of a
// pain. /path in particular is tricky, because you could have a URL like:
//
// example.com/download/some_identifier/ignored_filename_just_for_show
//
// Imagine if a cookie is set with path=/download/some_identifier. How do we get that
// cookie without triggering a request for the (large) file? I don't know. Hence: TODO.
let old_url = self.current_url_().await?;
let url = old_url.clone().join(&url)?;
let cookie_url = url.clone().join("/please_give_me_your_cookies")?;
self.goto(cookie_url.as_str()).await?;

// TODO: go back before we return if this call errors:
let cookies = self.issue(WebDriverCommand::GetCookies).await?;
if !cookies.is_array() {
return Err(error::CmdError::NotW3C(cookies));
}
self.back().await?;
let ua = self.get_ua().await?;

// now add all the cookies
let mut all_ok = true;
let mut jar = Vec::new();
for cookie in cookies.as_array().unwrap() {
if !cookie.is_object() {
all_ok = false;
break;
}

// https://w3c.github.io/webdriver/webdriver-spec.html#cookies
let cookie = cookie.as_object().unwrap();
if !cookie.contains_key("name") || !cookie.contains_key("value") {
all_ok = false;
break;
}

if !cookie["name"].is_string() || !cookie["value"].is_string() {
all_ok = false;
break;
}

// Note that since we're sending these cookies, all that matters is the mapping
// from name to value. The other fields only matter when deciding whether to
// include a cookie or not, and the driver has already decided that for us
// (GetCookies is for a particular URL).
jar.push(
cookie::Cookie::new(
cookie["name"].as_str().unwrap().to_owned(),
cookie["value"].as_str().unwrap().to_owned(),
)
.encoded()
.to_string(),
);
}

if !all_ok {
return Err(error::CmdError::NotW3C(cookies));
}

let mut req = hyper::Request::builder();
req = req
.method(method)
.uri(http::Uri::try_from(url.as_str()).unwrap());
req = req.header(hyper::header::COOKIE, jar.join("; "));
if let Some(s) = ua {
req = req.header(hyper::header::USER_AGENT, s);
}
let req = before(req);
let (tx, rx) = oneshot::channel();
self.issue(Cmd::Raw { req, rsp: tx }).await?;
match rx.await {
Ok(Ok(r)) => Ok(r),
Ok(Err(e)) => Err(e.into()),
Err(e) => unreachable!("Session ended prematurely: {:?}", e),
}
let mut builder = self.raw_request();
builder.method(method).url(url).clone().map_request(before);
builder.send().await
}
}

Expand Down
Loading