new parser
This commit is contained in:
188
lib/something_erlang/awful_api/awful_api.ex
Normal file
188
lib/something_erlang/awful_api/awful_api.ex
Normal file
@ -0,0 +1,188 @@
|
||||
defmodule SomethingErlang.AwfulApi do
|
||||
require Logger
|
||||
|
||||
alias SomethingErlang.AwfulApi.Client
|
||||
alias SomethingErlang.AwfulApi.Thread
|
||||
|
||||
def parse_thread_userinfo(thread) do
|
||||
{_, userinfos} =
|
||||
Floki.find(thread, "td.userinfo dl.userinfo")
|
||||
|> Floki.traverse_and_update([], fn
|
||||
{"dt", [{"class", _class} | _rest], children}, acc ->
|
||||
{nil, [{:name, Floki.text(children)} | acc]}
|
||||
|
||||
{"dd", [{"class", "registered"} | _rest], children}, acc ->
|
||||
{nil, [{:regdate, Floki.text(children)} | acc]}
|
||||
|
||||
{"dd", [{"class", "title"} | _rest], children}, acc ->
|
||||
{nil, [{:title, Floki.raw_html(children)} | acc]}
|
||||
|
||||
other, acc ->
|
||||
{other, acc}
|
||||
end)
|
||||
|
||||
Enum.chunk_every(userinfos, 3)
|
||||
|> Enum.map(&Map.new/1)
|
||||
|> Enum.reverse()
|
||||
end
|
||||
|
||||
def parse_thread_postdate(thread) do
|
||||
{[], postdates} =
|
||||
Floki.find(thread, "td.postdate")
|
||||
|> Floki.traverse_and_update([], fn
|
||||
{"td", [{"class", "postdate"}], children}, acc ->
|
||||
{nil, [Floki.text(children) | acc]}
|
||||
|
||||
other, acc ->
|
||||
{other, acc}
|
||||
end)
|
||||
|
||||
Enum.reverse(postdates)
|
||||
end
|
||||
|
||||
def parse_thread_postbody(thread) do
|
||||
{[], postbodies} =
|
||||
Floki.find(thread, "td.postbody")
|
||||
|> Floki.traverse_and_update(fn
|
||||
{"img", attrs, []} -> transform(:img, attrs)
|
||||
{"a", attrs, children} -> transform(:a, attrs, children)
|
||||
{:comment, _} -> nil
|
||||
other -> other
|
||||
end)
|
||||
# TODO: use Floki find or smth?
|
||||
|> Floki.traverse_and_update([], fn
|
||||
{"td", [{"class", "postbody"}], children}, acc ->
|
||||
{nil, [Floki.raw_html(children) | acc]}
|
||||
|
||||
other, acc ->
|
||||
{other, acc}
|
||||
end)
|
||||
|
||||
Enum.reverse(postbodies)
|
||||
end
|
||||
|
||||
defp transform(elem, attr, children \\ [])
|
||||
|
||||
defp transform(:img, attrs, _children) do
|
||||
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
|
||||
if class == "sa-smilie" do
|
||||
{"img", attrs, []}
|
||||
else
|
||||
t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"})
|
||||
{"img", [{"loading", "lazy"} | t_attrs], []}
|
||||
end
|
||||
end
|
||||
|
||||
defp transform(:a, attrs, children) do
|
||||
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
|
||||
cond do
|
||||
# skip internal links
|
||||
String.starts_with?(href, "/") ->
|
||||
{"a", [{"href", href}], children}
|
||||
|
||||
# mp4
|
||||
String.ends_with?(href, ".mp4") ->
|
||||
transform_link(:mp4, href)
|
||||
|
||||
# gifv
|
||||
String.ends_with?(href, ".gifv") ->
|
||||
transform_link(:gifv, href)
|
||||
|
||||
# youtube
|
||||
String.starts_with?(href, "https://www.youtube.com/watch") ->
|
||||
transform_link(:ytlong, href)
|
||||
|
||||
String.starts_with?(href, "https://youtu.be/") ->
|
||||
transform_link(:ytshort, href)
|
||||
|
||||
true ->
|
||||
Logger.debug "no transform for #{href}"
|
||||
{"a", [{"href", href}], children}
|
||||
end
|
||||
end
|
||||
|
||||
defp transform_link(:mp4, href),
|
||||
do: {"div", [{"class", "responsive-embed"}],
|
||||
[{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
||||
[{"source", [{"src", href}, {"type", "video/mp4"}], []}]
|
||||
}]
|
||||
}
|
||||
|
||||
defp transform_link(:gifv, href),
|
||||
do: {"div", [{"class", "responsive-embed"}],
|
||||
[{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
||||
[{"source", [{"src", String.replace(href, ".gifv", ".webm")},
|
||||
{"type", "video/webm"}], []},
|
||||
{"source", [{"src", String.replace(href, ".gifv", ".mp4")},
|
||||
{"type", "video/mp4"}], []}]
|
||||
}]
|
||||
}
|
||||
|
||||
defp transform_link(:ytlong, href) do
|
||||
String.replace(href, "/watch?v=", "/embed/")
|
||||
|> youtube_iframe()
|
||||
end
|
||||
|
||||
defp transform_link(:ytshort, href) do
|
||||
String.replace(href, "youtu.be/", "www.youtube.com/embed/")
|
||||
|> youtube_iframe()
|
||||
end
|
||||
|
||||
defp youtube_iframe(src),
|
||||
do: {"div", [{"class", "responsive-embed"}],
|
||||
[{"iframe",
|
||||
[
|
||||
{"class", "youtube-player"},
|
||||
{"loading", "lazy"},
|
||||
{"allow", "fullscreen"},
|
||||
{"src", src}
|
||||
], []}
|
||||
]}
|
||||
|
||||
@doc """
|
||||
Returns a list of all posts on page of a thread.
|
||||
|
||||
## Examples
|
||||
|
||||
iex> t = AwfulApi.parsed_thread(3945300, 1)
|
||||
iex> length(t.posts)
|
||||
42
|
||||
iex> t.page_count
|
||||
12
|
||||
"""
|
||||
def parsed_thread(id, page, user) do
|
||||
Thread.compile(id, page, user)
|
||||
end
|
||||
|
||||
def parsed_thread(id, page, user, :deprecated) do
|
||||
doc = Client.thread_doc(id, page, user)
|
||||
html = Floki.parse_document!(doc)
|
||||
thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
|
||||
|> tap(&IO.inspect(Floki.find(&1, "table.post") |> Enum.take(2)))
|
||||
|
||||
title = Floki.find(html, "title") |> Floki.text()
|
||||
title = title |> String.replace(" - The Something Awful Forums", "")
|
||||
|
||||
page_count =
|
||||
case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do
|
||||
"" -> 1
|
||||
s -> String.to_integer(s)
|
||||
end
|
||||
|
||||
posts =
|
||||
Enum.zip([
|
||||
parse_thread_userinfo(thread),
|
||||
parse_thread_postdate(thread),
|
||||
parse_thread_postbody(thread)
|
||||
])
|
||||
|> Enum.map(fn {ui, pd, pb} ->
|
||||
%{:userinfo => ui, :postdate => pd, :postbody => pb}
|
||||
end)
|
||||
|
||||
%{id: id,
|
||||
title: title,
|
||||
page: page,
|
||||
page_count: page_count,
|
||||
posts: posts}
|
||||
end
|
||||
end
|
25
lib/something_erlang/awful_api/client.ex
Normal file
25
lib/something_erlang/awful_api/client.ex
Normal file
@ -0,0 +1,25 @@
|
||||
defmodule SomethingErlang.AwfulApi.Client do
|
||||
@base_url "https://forums.somethingawful.com/"
|
||||
|
||||
def thread_doc(id, page, user) do
|
||||
resp = new_request(user) |> get_thread(id, page)
|
||||
:unicode.characters_to_binary(resp.body, :latin1)
|
||||
end
|
||||
|
||||
defp cookies(args) when is_map(args) do
|
||||
Enum.map_join(args, "; ", fn {k, v} -> "#{k}=#{v}" end)
|
||||
end
|
||||
|
||||
defp get_thread(req, id, page \\ 1) do
|
||||
url = "showthread.php"
|
||||
params = [threadid: id, pagenumber: page]
|
||||
Req.get!(req, url: url, params: params)
|
||||
end
|
||||
|
||||
defp new_request(user) do
|
||||
Req.new(
|
||||
base_url: @base_url,
|
||||
headers: [cookie: [cookies(%{bbuserid: user.id, bbpassword: user.hash})]]
|
||||
)
|
||||
end
|
||||
end
|
155
lib/something_erlang/awful_api/thread.ex
Normal file
155
lib/something_erlang/awful_api/thread.ex
Normal file
@ -0,0 +1,155 @@
|
||||
defmodule SomethingErlang.AwfulApi.Thread do
|
||||
require Logger
|
||||
|
||||
alias SomethingErlang.AwfulApi.Client
|
||||
|
||||
def compile(id, page, user) do
|
||||
doc = Client.thread_doc(id, page, user)
|
||||
html = Floki.parse_document!(doc)
|
||||
thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
|
||||
|
||||
|
||||
title = Floki.find(html, "title") |> Floki.text()
|
||||
title = title |> String.replace(" - The Something Awful Forums", "")
|
||||
|
||||
page_count =
|
||||
case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do
|
||||
"" -> 1
|
||||
s -> String.to_integer(s)
|
||||
end
|
||||
|
||||
posts = for post <- Floki.find(thread, "table.post") do
|
||||
%{
|
||||
userinfo: post |> userinfo(),
|
||||
postdate: post |> postdate(),
|
||||
postbody: post |> postbody()
|
||||
}
|
||||
end
|
||||
|
||||
%{id: id,
|
||||
title: title,
|
||||
page: page,
|
||||
page_count: page_count,
|
||||
posts: posts}
|
||||
end
|
||||
|
||||
def userinfo(post) do
|
||||
|
||||
|
||||
user = Floki.find(post, "dl.userinfo")
|
||||
user |> IO.inspect()
|
||||
|
||||
name = user |> Floki.find("dt") |> Floki.text() |> IO.inspect()
|
||||
regdate = user |> Floki.find("dd.registered") |> Floki.text() |> IO.inspect()
|
||||
title = user |> Floki.find_and_update("dd.title", fn {"dd", attrs} -> {"div", attrs} end) |> Floki.raw_html()
|
||||
|
||||
%{
|
||||
name: name,
|
||||
regdate: regdate,
|
||||
title: title
|
||||
}
|
||||
end
|
||||
|
||||
def postdate(post) do
|
||||
_date =
|
||||
Floki.find(post, "td.postdate")
|
||||
|> Floki.find("td.postdate") |> Floki.children() |> Floki.text()
|
||||
end
|
||||
|
||||
def postbody(post) do
|
||||
body = Floki.find(post, "td.postbody")
|
||||
|
||||
Floki.traverse_and_update(body, fn
|
||||
{"img", attrs, []} -> transform(:img, attrs)
|
||||
{"a", attrs, children} -> transform(:a, attrs, children)
|
||||
{:comment, _} -> nil
|
||||
other -> other
|
||||
end)
|
||||
|> Floki.traverse_and_update([], fn
|
||||
{"td", [{"class", "postbody"}], children}, acc ->
|
||||
{nil, [Floki.raw_html(children) | acc]}
|
||||
|
||||
other, acc ->
|
||||
{other, acc}
|
||||
end)
|
||||
end
|
||||
|
||||
defp transform(elem, attr, children \\ [])
|
||||
|
||||
defp transform(:img, attrs, _children) do
|
||||
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
|
||||
if class == "sa-smilie" do
|
||||
{"img", attrs, []}
|
||||
else
|
||||
t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"})
|
||||
{"img", [{"loading", "lazy"} | t_attrs], []}
|
||||
end
|
||||
end
|
||||
|
||||
defp transform(:a, attrs, children) do
|
||||
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
|
||||
cond do
|
||||
# skip internal links
|
||||
String.starts_with?(href, "/") ->
|
||||
{"a", [{"href", href}], children}
|
||||
|
||||
# mp4
|
||||
String.ends_with?(href, ".mp4") ->
|
||||
transform_link(:mp4, href)
|
||||
|
||||
# gifv
|
||||
String.ends_with?(href, ".gifv") ->
|
||||
transform_link(:gifv, href)
|
||||
|
||||
# youtube
|
||||
String.starts_with?(href, "https://www.youtube.com/watch") ->
|
||||
transform_link(:ytlong, href)
|
||||
|
||||
String.starts_with?(href, "https://youtu.be/") ->
|
||||
transform_link(:ytshort, href)
|
||||
|
||||
true ->
|
||||
Logger.debug "no transform for #{href}"
|
||||
{"a", [{"href", href}], children}
|
||||
end
|
||||
end
|
||||
|
||||
defp transform_link(:mp4, href),
|
||||
do: {"div", [{"class", "responsive-embed"}],
|
||||
[{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
||||
[{"source", [{"src", href}, {"type", "video/mp4"}], []}]
|
||||
}]
|
||||
}
|
||||
|
||||
defp transform_link(:gifv, href),
|
||||
do: {"div", [{"class", "responsive-embed"}],
|
||||
[{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
||||
[{"source", [{"src", String.replace(href, ".gifv", ".webm")},
|
||||
{"type", "video/webm"}], []},
|
||||
{"source", [{"src", String.replace(href, ".gifv", ".mp4")},
|
||||
{"type", "video/mp4"}], []}]
|
||||
}]
|
||||
}
|
||||
|
||||
defp transform_link(:ytlong, href) do
|
||||
String.replace(href, "/watch?v=", "/embed/")
|
||||
|> youtube_iframe()
|
||||
end
|
||||
|
||||
defp transform_link(:ytshort, href) do
|
||||
String.replace(href, "youtu.be/", "www.youtube.com/embed/")
|
||||
|> youtube_iframe()
|
||||
end
|
||||
|
||||
defp youtube_iframe(src),
|
||||
do: {"div", [{"class", "responsive-embed"}],
|
||||
[{"iframe",
|
||||
[
|
||||
{"class", "youtube-player"},
|
||||
{"loading", "lazy"},
|
||||
{"allow", "fullscreen"},
|
||||
{"src", src}
|
||||
], []}
|
||||
]}
|
||||
|
||||
end
|
@ -1,6 +1,7 @@
|
||||
defmodule SomethingErlang.Grover do
|
||||
use GenServer
|
||||
|
||||
alias SomethingErlang.AwfulApi
|
||||
require Logger
|
||||
|
||||
def mount(user, thread_id) do
|
||||
|
@ -43,7 +43,7 @@ defmodule SomethingErlangWeb.ThreadLive.Show do
|
||||
<aside class="userinfo bg-base-100 shrink-0 sm:w-[13em]">
|
||||
<h3 class="mb-4"><%= @info.name %></h3>
|
||||
<div class="title hidden sm:flex flex-col text-sm pr-4">
|
||||
<%= raw @info.title %>
|
||||
<%= raw @info.title %>
|
||||
</div>
|
||||
</aside>
|
||||
"""
|
||||
|
@ -12,6 +12,6 @@
|
||||
<%= link "Register", class: "link",
|
||||
to: Routes.user_registration_path(@conn, :new) %>
|
||||
<%= button "Log in", class: "btn btn-sm",
|
||||
to: Routes.user_session_path(@conn, :new) %>
|
||||
to: Routes.user_session_path(@conn, :new), method: :get %>
|
||||
<% end %>
|
||||
</div>
|
||||
|
Reference in New Issue
Block a user