2022-08-01 15:58:55 +02:00
|
|
|
defmodule SomethingErlang.AwfulApi.Thread do
|
|
|
|
require Logger
|
|
|
|
|
|
|
|
alias SomethingErlang.AwfulApi.Client
|
|
|
|
|
|
|
|
def compile(id, page, user) do
|
|
|
|
doc = Client.thread_doc(id, page, user)
|
|
|
|
html = Floki.parse_document!(doc)
|
|
|
|
thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
|
|
|
|
|
|
|
|
|
|
|
|
title = Floki.find(html, "title") |> Floki.text()
|
|
|
|
title = title |> String.replace(" - The Something Awful Forums", "")
|
|
|
|
|
|
|
|
page_count =
|
|
|
|
case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do
|
|
|
|
"" -> 1
|
|
|
|
s -> String.to_integer(s)
|
|
|
|
end
|
|
|
|
|
|
|
|
posts = for post <- Floki.find(thread, "table.post") do
|
|
|
|
%{
|
|
|
|
userinfo: post |> userinfo(),
|
|
|
|
postdate: post |> postdate(),
|
|
|
|
postbody: post |> postbody()
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
|
|
|
%{id: id,
|
|
|
|
title: title,
|
|
|
|
page: page,
|
|
|
|
page_count: page_count,
|
|
|
|
posts: posts}
|
|
|
|
end
|
|
|
|
|
2022-08-02 10:28:16 +02:00
|
|
|
defp userinfo(post) do
|
2022-08-01 15:58:55 +02:00
|
|
|
user = Floki.find(post, "dl.userinfo")
|
2022-08-02 10:28:16 +02:00
|
|
|
name = user |> Floki.find("dt") |> Floki.text()
|
|
|
|
regdate = user |> Floki.find("dd.registered") |> Floki.text()
|
|
|
|
title =
|
|
|
|
user |> Floki.find("dd.title") |> List.first()
|
|
|
|
|> Floki.children() |> Floki.raw_html()
|
2022-08-01 15:58:55 +02:00
|
|
|
|
|
|
|
%{
|
|
|
|
name: name,
|
|
|
|
regdate: regdate,
|
|
|
|
title: title
|
|
|
|
}
|
|
|
|
end
|
|
|
|
|
2022-08-02 10:28:16 +02:00
|
|
|
defp postdate(post) do
|
|
|
|
date =
|
2022-08-01 15:58:55 +02:00
|
|
|
Floki.find(post, "td.postdate")
|
2022-08-02 10:28:16 +02:00
|
|
|
|> Floki.find("td.postdate") |> Floki.text()
|
|
|
|
|
|
|
|
[month_text, day, year, hours, minutes] = date
|
|
|
|
|> String.split(~r{[\s,:]}, trim: true)
|
|
|
|
|> Enum.drop(1)
|
|
|
|
|
|
|
|
month = 1 + Enum.find_index(["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
|
|
|
|
fn m -> m == month_text end)
|
|
|
|
NaiveDateTime.new!(year |> String.to_integer(), month, day |> String.to_integer(),
|
|
|
|
hours |> String.to_integer(), minutes |> String.to_integer(), 0)
|
2022-08-01 15:58:55 +02:00
|
|
|
end
|
|
|
|
|
2022-08-02 10:28:16 +02:00
|
|
|
defp postbody(post) do
|
|
|
|
body =
|
|
|
|
Floki.find(post, "td.postbody")
|
|
|
|
|> List.first()
|
|
|
|
|> Floki.filter_out(:comment)
|
2022-08-01 15:58:55 +02:00
|
|
|
|
|
|
|
Floki.traverse_and_update(body, fn
|
|
|
|
{"img", attrs, []} -> transform(:img, attrs)
|
|
|
|
{"a", attrs, children} -> transform(:a, attrs, children)
|
|
|
|
other -> other
|
|
|
|
end)
|
2022-08-02 10:28:16 +02:00
|
|
|
|> Floki.children()
|
|
|
|
|> Floki.raw_html()
|
2022-08-01 15:58:55 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
defp transform(elem, attr, children \\ [])
|
|
|
|
|
|
|
|
defp transform(:img, attrs, _children) do
|
|
|
|
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
|
|
|
|
if class == "sa-smilie" do
|
|
|
|
{"img", attrs, []}
|
|
|
|
else
|
|
|
|
t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"})
|
|
|
|
{"img", [{"loading", "lazy"} | t_attrs], []}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp transform(:a, attrs, children) do
|
|
|
|
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
|
|
|
|
cond do
|
|
|
|
# skip internal links
|
|
|
|
String.starts_with?(href, "/") ->
|
|
|
|
{"a", [{"href", href}], children}
|
|
|
|
|
|
|
|
# mp4
|
|
|
|
String.ends_with?(href, ".mp4") ->
|
|
|
|
transform_link(:mp4, href)
|
|
|
|
|
|
|
|
# gifv
|
|
|
|
String.ends_with?(href, ".gifv") ->
|
|
|
|
transform_link(:gifv, href)
|
|
|
|
|
|
|
|
# youtube
|
|
|
|
String.starts_with?(href, "https://www.youtube.com/watch") ->
|
|
|
|
transform_link(:ytlong, href)
|
|
|
|
|
|
|
|
String.starts_with?(href, "https://youtu.be/") ->
|
|
|
|
transform_link(:ytshort, href)
|
|
|
|
|
|
|
|
true ->
|
|
|
|
Logger.debug "no transform for #{href}"
|
|
|
|
{"a", [{"href", href}], children}
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
defp transform_link(:mp4, href),
|
|
|
|
do: {"div", [{"class", "responsive-embed"}],
|
|
|
|
[{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
|
|
|
[{"source", [{"src", href}, {"type", "video/mp4"}], []}]
|
|
|
|
}]
|
|
|
|
}
|
|
|
|
|
|
|
|
defp transform_link(:gifv, href),
|
|
|
|
do: {"div", [{"class", "responsive-embed"}],
|
|
|
|
[{"video", [{"class", "img-responsive"}, {"controls", ""}],
|
|
|
|
[{"source", [{"src", String.replace(href, ".gifv", ".webm")},
|
|
|
|
{"type", "video/webm"}], []},
|
|
|
|
{"source", [{"src", String.replace(href, ".gifv", ".mp4")},
|
|
|
|
{"type", "video/mp4"}], []}]
|
|
|
|
}]
|
|
|
|
}
|
|
|
|
|
|
|
|
defp transform_link(:ytlong, href) do
|
|
|
|
String.replace(href, "/watch?v=", "/embed/")
|
|
|
|
|> youtube_iframe()
|
|
|
|
end
|
|
|
|
|
|
|
|
defp transform_link(:ytshort, href) do
|
|
|
|
String.replace(href, "youtu.be/", "www.youtube.com/embed/")
|
|
|
|
|> youtube_iframe()
|
|
|
|
end
|
|
|
|
|
|
|
|
defp youtube_iframe(src),
|
|
|
|
do: {"div", [{"class", "responsive-embed"}],
|
|
|
|
[{"iframe",
|
|
|
|
[
|
|
|
|
{"class", "youtube-player"},
|
|
|
|
{"loading", "lazy"},
|
|
|
|
{"allow", "fullscreen"},
|
|
|
|
{"src", src}
|
|
|
|
], []}
|
|
|
|
]}
|
|
|
|
|
|
|
|
end
|