217 lines
5.3 KiB
Elixir
217 lines
5.3 KiB
Elixir
defmodule SomethingErlang.AwfulApi.Thread do
|
|
import Meeseeks.CSS
|
|
|
|
require Logger
|
|
|
|
def compile(html) do
|
|
title =
|
|
Meeseeks.one(html, css("title"))
|
|
|> Meeseeks.text()
|
|
|> String.replace(" - The Something Awful Forums", "")
|
|
|
|
thread =
|
|
Meeseeks.one(html, css("#thread"))
|
|
|
|
thread_id =
|
|
Meeseeks.attr(thread, "class")
|
|
|> String.split(":")
|
|
|> List.last()
|
|
|> String.to_integer()
|
|
|
|
page =
|
|
Meeseeks.one(html, css("#content .pages.top option[selected]"))
|
|
|> Meeseeks.text()
|
|
|> case do
|
|
"" -> 1
|
|
s -> String.to_integer(s)
|
|
end
|
|
|
|
page_count =
|
|
Meeseeks.one(html, css("#content .pages.top option:last-of-type"))
|
|
|> Meeseeks.text()
|
|
|> case do
|
|
"" -> 1
|
|
s -> String.to_integer(s)
|
|
end
|
|
|
|
posts =
|
|
for post <- Meeseeks.all(thread, css("table.post")),
|
|
post
|
|
|> Meeseeks.attr("class")
|
|
|> String.contains?("ignored")
|
|
|> Kernel.not() do
|
|
%{
|
|
userinfo: userinfo(post),
|
|
postdate: postdate(post),
|
|
postbody: postbody(post)
|
|
}
|
|
end
|
|
|
|
%{id: thread_id, title: title, page: page, page_count: page_count, posts: posts}
|
|
end
|
|
|
|
defp userinfo(post) do
|
|
user = Meeseeks.one(post, css("dl.userinfo"))
|
|
name = user |> Meeseeks.one(css("dt")) |> Meeseeks.text()
|
|
regdate = user |> Meeseeks.one(css("dd.registered")) |> Meeseeks.text()
|
|
|
|
title =
|
|
user
|
|
|> Meeseeks.one(css("dd.title > *"))
|
|
|> Meeseeks.html()
|
|
|
|
%{
|
|
name: name,
|
|
regdate: regdate,
|
|
title: title
|
|
}
|
|
end
|
|
|
|
defp postdate(post) do
|
|
date =
|
|
post
|
|
|> Meeseeks.one(css("td.postdate"))
|
|
|> Meeseeks.text()
|
|
|
|
[month_text, day, year, hours, minutes] =
|
|
date
|
|
|> String.split(~r{[\s,:]}, trim: true)
|
|
|> Enum.drop(2)
|
|
|
|
month =
|
|
1 +
|
|
Enum.find_index(
|
|
["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
|
|
fn m -> m == month_text end
|
|
)
|
|
|
|
NaiveDateTime.new!(
|
|
year |> String.to_integer(),
|
|
month,
|
|
day |> String.to_integer(),
|
|
hours |> String.to_integer(),
|
|
minutes |> String.to_integer(),
|
|
0
|
|
)
|
|
end
|
|
|
|
defp postbody(post) do
|
|
{_, _, body} =
|
|
post
|
|
|> Meeseeks.one(css("td.postbody"))
|
|
|> Meeseeks.tree()
|
|
|
|
body
|
|
|> Enum.map(&transform/1)
|
|
|> Enum.reject(fn x -> x == "" end)
|
|
|> then(&{"div", [], &1})
|
|
|> Meeseeks.parse(:tuple_tree)
|
|
|> Meeseeks.html()
|
|
end
|
|
|
|
defp transform({"img", attrs, _children}) do
|
|
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
|
|
|
|
if class == "sa-smilie" do
|
|
{"img", attrs, []}
|
|
else
|
|
t_attrs =
|
|
attrs
|
|
|> fix_attachment_links()
|
|
|> List.keyreplace("class", 0, {"class", "img-responsive"})
|
|
|
|
{"img", [{"loading", "lazy"} | t_attrs], []}
|
|
end
|
|
end
|
|
|
|
defp transform({"a", attrs, children}) do
|
|
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
|
|
|
|
cond do
|
|
# skip internal links
|
|
String.starts_with?(href, "/") ->
|
|
{"a", [{"href", href}], children}
|
|
|
|
# mp4
|
|
String.ends_with?(href, ".mp4") ->
|
|
transform_link(:mp4, href)
|
|
|
|
# gifv
|
|
String.ends_with?(href, ".gifv") ->
|
|
transform_link(:gifv, href)
|
|
|
|
# youtube
|
|
String.starts_with?(href, "https://www.youtube.com/watch") ->
|
|
transform_link(:ytlong, href)
|
|
|
|
String.starts_with?(href, "https://youtu.be/") ->
|
|
transform_link(:ytshort, href)
|
|
|
|
true ->
|
|
Logger.debug("no transform for #{href}")
|
|
{"a", [{"href", href}], children}
|
|
end
|
|
end
|
|
|
|
defp transform({:comment, _}), do: ""
|
|
defp transform({tag, attrs, children}), do: {tag, attrs, children}
|
|
|
|
defp transform(text) when is_binary(text),
|
|
do: String.trim(text)
|
|
|
|
defp transform_link(:mp4, href),
|
|
do:
|
|
{"div", [{"class", "responsive-embed"}],
|
|
[
|
|
{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
|
[{"source", [{"src", href}, {"type", "video/mp4"}], []}]}
|
|
]}
|
|
|
|
defp transform_link(:gifv, href),
|
|
do:
|
|
{"div", [{"class", "responsive-embed"}],
|
|
[
|
|
{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
|
[
|
|
{"source", [{"src", String.replace(href, ".gifv", ".webm")}, {"type", "video/webm"}],
|
|
[]},
|
|
{"source", [{"src", String.replace(href, ".gifv", ".mp4")}, {"type", "video/mp4"}],
|
|
[]}
|
|
]}
|
|
]}
|
|
|
|
defp transform_link(:ytlong, href) do
|
|
String.replace(href, "/watch?v=", "/embed/")
|
|
|> youtube_iframe()
|
|
end
|
|
|
|
defp transform_link(:ytshort, href) do
|
|
String.replace(href, "youtu.be/", "www.youtube.com/embed/")
|
|
|> youtube_iframe()
|
|
end
|
|
|
|
defp youtube_iframe(src),
|
|
do:
|
|
{"div", [{"class", "responsive-embed"}],
|
|
[
|
|
{"iframe",
|
|
[
|
|
{"class", "youtube-player"},
|
|
{"loading", "lazy"},
|
|
{"allow", "fullscreen"},
|
|
{"src", src}
|
|
], []}
|
|
]}
|
|
|
|
defp fix_attachment_links(attrs) do
|
|
{"src", src} = List.keyfind(attrs, "src", 0)
|
|
|
|
if String.starts_with?(src, "attachment.php") do
|
|
List.keyreplace(attrs, "src", 0, {"src", "https://forums.somethingawful.com/" <> src})
|
|
|> List.keydelete("alt", 0)
|
|
else
|
|
attrs
|
|
end
|
|
end
|
|
end
|