Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/master'
Browse files Browse the repository at this point in the history
  • Loading branch information
liamwhite committed Jun 21, 2024
2 parents 2066143 + ad2b4b0 commit 884f467
Show file tree
Hide file tree
Showing 17 changed files with 152 additions and 156 deletions.
3 changes: 1 addition & 2 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ config :canary,

# Configures the endpoint
config :philomena, PhilomenaWeb.Endpoint,
adapter: Bandit.PhoenixAdapter,
url: [host: "localhost"],
secret_key_base: "xZYTon09JNRrj8snd7KL31wya4x71jmo5aaSSRmw1dGjWLRmEwWMTccwxgsGFGjM",
render_errors: [view: PhilomenaWeb.ErrorView, accepts: ~w(html json)],
Expand All @@ -46,8 +47,6 @@ config :phoenix, :template_engines,
slime: PhoenixSlime.Engine,
slimleex: PhoenixSlime.LiveViewEngine

config :tesla, adapter: Tesla.Adapter.Mint

# Configures Elixir's Logger
config :logger, :console,
format: "$time $metadata[$level] $message\n",
Expand Down
12 changes: 3 additions & 9 deletions config/runtime.exs
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,7 @@ config :philomena, :s3_primary_options,
host: System.fetch_env!("S3_HOST"),
port: System.fetch_env!("S3_PORT"),
access_key_id: System.fetch_env!("AWS_ACCESS_KEY_ID"),
secret_access_key: System.fetch_env!("AWS_SECRET_ACCESS_KEY"),
http_opts: [timeout: 180_000, recv_timeout: 180_000]
secret_access_key: System.fetch_env!("AWS_SECRET_ACCESS_KEY")

config :philomena, :s3_primary_bucket, System.fetch_env!("S3_BUCKET")

Expand All @@ -85,20 +84,15 @@ config :philomena, :s3_secondary_options,
host: System.get_env("ALT_S3_HOST"),
port: System.get_env("ALT_S3_PORT"),
access_key_id: System.get_env("ALT_AWS_ACCESS_KEY_ID"),
secret_access_key: System.get_env("ALT_AWS_SECRET_ACCESS_KEY"),
http_opts: [timeout: 180_000, recv_timeout: 180_000]
secret_access_key: System.get_env("ALT_AWS_SECRET_ACCESS_KEY")

config :philomena, :s3_secondary_bucket, System.get_env("ALT_S3_BUCKET")

# Don't bail on OpenSearch's self-signed certificate
config :elastix,
httpoison_options: [ssl: [verify: :verify_none]]

config :ex_aws, :hackney_opts,
timeout: 180_000,
recv_timeout: 180_000,
use_default_pool: false,
pool: false
config :ex_aws, http_client: PhilomenaMedia.Req

config :ex_aws, :retries,
max_attempts: 20,
Expand Down
5 changes: 1 addition & 4 deletions lib/philomena/application.ex
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,7 @@ defmodule Philomena.Application do
PhilomenaWeb.AdvertUpdater,
PhilomenaWeb.UserFingerprintUpdater,
PhilomenaWeb.UserIpUpdater,
PhilomenaWeb.Endpoint,

# Connection drainer for SIGTERM
{Plug.Cowboy.Drainer, refs: [PhilomenaWeb.Endpoint.HTTP]}
PhilomenaWeb.Endpoint
]

# See https://hexdocs.pm/elixir/Supervisor.html
Expand Down
2 changes: 1 addition & 1 deletion lib/philomena/artist_links/automatic_verifier.ex
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ defmodule Philomena.ArtistLinks.AutomaticVerifier do
end
end

defp contains_verification_code?({:ok, %Tesla.Env{body: body, status: 200}}, code) do
defp contains_verification_code?({:ok, %{body: body, status: 200}}, code) do
String.contains?(body, code)
end

Expand Down
2 changes: 1 addition & 1 deletion lib/philomena/channels/picarto_channel.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ defmodule Philomena.Channels.PicartoChannel do
@api_online
|> PhilomenaProxy.Http.get()
|> case do
{:ok, %Tesla.Env{body: body, status: 200}} ->
{:ok, %{body: body, status: 200}} ->
body
|> Jason.decode!()
|> Map.new(&{&1["name"], fetch(&1, now)})
Expand Down
2 changes: 1 addition & 1 deletion lib/philomena/channels/piczel_channel.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ defmodule Philomena.Channels.PiczelChannel do
@api_online
|> PhilomenaProxy.Http.get()
|> case do
{:ok, %Tesla.Env{body: body, status: 200}} ->
{:ok, %{body: body, status: 200}} ->
body
|> Jason.decode!()
|> Map.new(&{&1["slug"], fetch(&1, now)})
Expand Down
31 changes: 31 additions & 0 deletions lib/philomena_media/req.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
defmodule PhilomenaMedia.Req do
@behaviour ExAws.Request.HttpClient

@moduledoc """
Configuration for `m:Req`.
Options can be set for `m:Req` with the following config:
config :philomena, :req_opts,
receive_timeout: 30_000
The default config handles setting the above.
"""

@default_opts [receive_timeout: 30_000]

@impl true
def request(method, url, body \\ "", headers \\ [], http_opts \\ []) do
[method: method, url: url, body: body, headers: headers, decode_body: false]
|> Keyword.merge(Application.get_env(:philomena, :req_opts, @default_opts))
|> Keyword.merge(http_opts)
|> Req.request()
|> case do
{:ok, %{status: status, headers: headers, body: body}} ->
{:ok, %{status_code: status, headers: headers, body: body}}

{:error, reason} ->
{:error, %{reason: reason}}
end
end
end
118 changes: 79 additions & 39 deletions lib/philomena_proxy/http.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,29 @@ defmodule PhilomenaProxy.Http do

@type url :: String.t()
@type header_list :: [{String.t(), String.t()}]
@type body :: binary()
@type body :: iodata()
@type result :: {:ok, Req.Response.t()} | {:error, Exception.t()}

@type client_options :: keyword()
@user_agent "Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:126.0) Gecko/20100101 Firefox/126.0"
@max_body 125_000_000

@max_body_key :resp_body_size

@doc ~S"""
Perform a HTTP GET request.
## Example
iex> PhilomenaProxy.Http.get("http://example.com", [{"authorization", "Bearer #{token}"}])
{:ok, %Tesla.Env{...}}
{:ok, %{status: 200, body: ...}}
iex> PhilomenaProxy.Http.get("http://nonexistent.example.com")
{:error, %Mint.TransportError{reason: :nxdomain}}
{:error, %Req.TransportError{reason: :nxdomain}}
"""
@spec get(url(), header_list(), client_options()) :: Tesla.Env.result()
def get(url, headers \\ [], options \\ []) do
Tesla.get(client(headers), url, opts: [adapter: adapter_opts(options)])
@spec get(url(), header_list()) :: result()
def get(url, headers \\ []) do
request(:get, url, [], headers)
end

@doc ~S"""
Expand All @@ -44,15 +48,15 @@ defmodule PhilomenaProxy.Http do
## Example
iex> PhilomenaProxy.Http.head("http://example.com", [{"authorization", "Bearer #{token}"}])
{:ok, %Tesla.Env{...}}
{:ok, %{status: 200, body: ...}}
iex> PhilomenaProxy.Http.head("http://nonexistent.example.com")
{:error, %Mint.TransportError{reason: :nxdomain}}
{:error, %Req.TransportError{reason: :nxdomain}}
"""
@spec head(url(), header_list(), client_options()) :: Tesla.Env.result()
def head(url, headers \\ [], options \\ []) do
Tesla.head(client(headers), url, opts: [adapter: adapter_opts(options)])
@spec head(url(), header_list()) :: result()
def head(url, headers \\ []) do
request(:head, url, [], headers)
end

@doc ~S"""
Expand All @@ -61,27 +65,67 @@ defmodule PhilomenaProxy.Http do
## Example
iex> PhilomenaProxy.Http.post("http://example.com", "", [{"authorization", "Bearer #{token}"}])
{:ok, %Tesla.Env{...}}
{:ok, %{status: 200, body: ...}}
iex> PhilomenaProxy.Http.post("http://nonexistent.example.com", "")
{:error, %Mint.TransportError{reason: :nxdomain}}
{:error, %Req.TransportError{reason: :nxdomain}}
"""
@spec post(url(), body(), header_list(), client_options()) :: Tesla.Env.result()
def post(url, body, headers \\ [], options \\ []) do
Tesla.post(client(headers), url, body, opts: [adapter: adapter_opts(options)])
@spec post(url(), body(), header_list()) :: result()
def post(url, body, headers \\ []) do
request(:post, url, body, headers)
end

defp adapter_opts(opts) do
opts = Keyword.merge(opts, max_body: 125_000_000, inet6: true)

case Application.get_env(:philomena, :proxy_host) do
nil ->
opts
@spec request(atom(), String.t(), iodata(), header_list()) :: result()
defp request(method, url, body, headers) do
Req.new(
method: method,
url: url,
body: body,
headers: [{:user_agent, @user_agent} | headers],
max_redirects: 1,
connect_options: connect_options(url),
inet6: true,
into: &stream_response_callback/2,
decode_body: false
)
|> Req.Request.put_private(@max_body_key, 0)
|> Req.request()
end

url ->
Keyword.merge(opts, proxy: proxy_opts(URI.parse(url)))
end
defp connect_options(url) do
transport_opts =
case URI.parse(url) do
%{scheme: "https"} ->
# SSL defaults validate SHA-1 on root certificates but this is unnecessary because many
# many roots are still signed with SHA-1 and it isn't relevant for security. Relax to
# allow validation of SHA-1, even though this creates a less secure client.
# https://github.com/erlang/otp/issues/8601
[
transport_opts: [
customize_hostname_check: [
match_fun: :public_key.pkix_verify_hostname_match_fun(:https)
],
signature_algs_cert: :ssl.signature_algs(:default, :"tlsv1.3") ++ [sha: :rsa]
]
]

_ ->
# Do not pass any options for non-HTTPS schemes. Finch will raise badarg if the above
# options are passed.
[]
end

proxy_opts =
case Application.get_env(:philomena, :proxy_host) do
nil ->
[]

url ->
[proxy: proxy_opts(URI.parse(url))]
end

transport_opts ++ proxy_opts
end

defp proxy_opts(%{host: host, port: port, scheme: "https"}),
Expand All @@ -90,18 +134,14 @@ defmodule PhilomenaProxy.Http do
defp proxy_opts(%{host: host, port: port, scheme: "http"}),
do: {:http, host, port, [transport_opts: [inet6: true]]}

defp client(headers) do
Tesla.client(
[
{Tesla.Middleware.FollowRedirects, max_redirects: 1},
{Tesla.Middleware.Headers,
[
{"User-Agent",
"Mozilla/5.0 (X11; Philomena; Linux x86_64; rv:86.0) Gecko/20100101 Firefox/86.0"}
| headers
]}
],
Tesla.Adapter.Mint
)
defp stream_response_callback({:data, data}, {req, resp}) do
req = update_in(req.private[@max_body_key], &(&1 + byte_size(data)))
resp = update_in(resp.body, &<<&1::binary, data::binary>>)

if req.private.resp_body_size < @max_body do
{:cont, {req, resp}}
else
{:halt, {req, RuntimeError.exception("body too big")}}
end
end
end
58 changes: 3 additions & 55 deletions lib/philomena_proxy/scrapers/deviantart.ex
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
@image_regex ~r|data-rh="true" rel="preload" href="([^"]*)" as="image"|
@source_regex ~r|rel="canonical" href="([^"]*)"|
@artist_regex ~r|https://www.deviantart.com/([^/]*)/art|
@serial_regex ~r|https://www.deviantart.com/(?:.*?)-(\d+)\z|
@cdnint_regex ~r|(https://images-wixmp-[0-9a-f]+.wixmp.com)(?:/intermediary)?/f/([^/]*)/([^/?]*)|
@png_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.png/v1/fill/[0-9a-z_,]+/[0-9a-z_\-]+)(\.png)(.*)|
@jpg_regex ~r|(https://[0-9a-z\-\.]+(?:/intermediary)?/f/[0-9a-f\-]+/[0-9a-z\-]+\.jpg/v1/fill/w_[0-9]+,h_[0-9]+,q_)([0-9]+)(,[a-z]+\/[a-z0-6_\-]+\.jpe?g.*)|
Expand All @@ -31,14 +30,13 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
@spec scrape(URI.t(), Scrapers.url()) :: Scrapers.scrape_result()
def scrape(_uri, url) do
url
|> follow_redirect(2)
|> PhilomenaProxy.Http.get()
|> extract_data!()
|> try_intermediary_hires!()
|> try_new_hires!()
|> try_old_hires!()
end

defp extract_data!({:ok, %Tesla.Env{body: body, status: 200}}) do
defp extract_data!({:ok, %{body: body, status: 200}}) do
[image] = Regex.run(@image_regex, body, capture: :all_but_first)
[source] = Regex.run(@source_regex, body, capture: :all_but_first)
[artist] = Regex.run(@artist_regex, source, capture: :all_but_first)
Expand All @@ -60,7 +58,7 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
with [domain, object_uuid, object_name] <-
Regex.run(@cdnint_regex, image.url, capture: :all_but_first),
built_url <- "#{domain}/intermediary/f/#{object_uuid}/#{object_name}",
{:ok, %Tesla.Env{status: 200}} <- PhilomenaProxy.Http.head(built_url) do
{:ok, %{status: 200}} <- PhilomenaProxy.Http.head(built_url) do
# This is the high resolution URL.
%{
data
Expand Down Expand Up @@ -107,54 +105,4 @@ defmodule PhilomenaProxy.Scrapers.Deviantart do
data
end
end

defp try_old_hires!(%{source_url: source, images: [image]} = data) do
[serial] = Regex.run(@serial_regex, source, capture: :all_but_first)

base36 =
serial
|> String.to_integer()
|> Integer.to_string(36)
|> String.downcase()

built_url = "http://orig01.deviantart.net/x_by_x-d#{base36}.png"

case PhilomenaProxy.Http.get(built_url) do
{:ok, %Tesla.Env{status: 301, headers: headers}} ->
# Location header provides URL of high res image.
{_location, link} = Enum.find(headers, fn {header, _val} -> header == "location" end)

%{
data
| images: [
%{
url: link,
camo_url: image.camo_url
}
]
}

_ ->
# Nothing to be found here, move along...
data
end
end

# Workaround for benoitc/hackney#273
defp follow_redirect(_url, 0), do: nil

defp follow_redirect(url, max_times) do
case PhilomenaProxy.Http.get(url) do
{:ok, %Tesla.Env{headers: headers, status: code}} when code in [301, 302] ->
location = Enum.find_value(headers, &location_header/1)
follow_redirect(location, max_times - 1)

response ->
response
end
end

defp location_header({"Location", location}), do: location
defp location_header({"location", location}), do: location
defp location_header(_), do: nil
end
2 changes: 1 addition & 1 deletion lib/philomena_proxy/scrapers/pillowfort.ex
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ defmodule PhilomenaProxy.Scrapers.Pillowfort do
|> process_response!(url)
end

defp json!({:ok, %Tesla.Env{body: body, status: 200}}),
defp json!({:ok, %{body: body, status: 200}}),
do: Jason.decode!(body)

defp process_response!(post_json, url) do
Expand Down
Loading

0 comments on commit 884f467

Please sign in to comment.