defmodule SomethingErlang.AwfulApi.Thread do require Logger alias SomethingErlang.AwfulApi.Client def compile(id, page, user) do doc = Client.thread_doc(id, page, user) html = Floki.parse_document!(doc) thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored") title = Floki.find(html, "title") |> Floki.text() title = title |> String.replace(" - The Something Awful Forums", "") page_count = case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do "" -> 1 s -> String.to_integer(s) end posts = for post <- Floki.find(thread, "table.post") do %{ userinfo: post |> userinfo(), postdate: post |> postdate(), postbody: post |> postbody() } end %{id: id, title: title, page: page, page_count: page_count, posts: posts} end def userinfo(post) do user = Floki.find(post, "dl.userinfo") user |> IO.inspect() name = user |> Floki.find("dt") |> Floki.text() |> IO.inspect() regdate = user |> Floki.find("dd.registered") |> Floki.text() |> IO.inspect() title = user |> Floki.find_and_update("dd.title", fn {"dd", attrs} -> {"div", attrs} end) |> Floki.raw_html() %{ name: name, regdate: regdate, title: title } end def postdate(post) do _date = Floki.find(post, "td.postdate") |> Floki.find("td.postdate") |> Floki.children() |> Floki.text() end def postbody(post) do body = Floki.find(post, "td.postbody") Floki.traverse_and_update(body, fn {"img", attrs, []} -> transform(:img, attrs) {"a", attrs, children} -> transform(:a, attrs, children) {:comment, _} -> nil other -> other end) |> Floki.traverse_and_update([], fn {"td", [{"class", "postbody"}], children}, acc -> {nil, [Floki.raw_html(children) | acc]} other, acc -> {other, acc} end) end defp transform(elem, attr, children \\ []) defp transform(:img, attrs, _children) do {"class", class} = List.keyfind(attrs, "class", 0, {"class", ""}) if class == "sa-smilie" do {"img", attrs, []} else t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"}) {"img", [{"loading", "lazy"} | t_attrs], []} end end defp transform(:a, attrs, children) do {"href", href} = List.keyfind(attrs, "href", 0, {"href", ""}) cond do # skip internal links String.starts_with?(href, "/") -> {"a", [{"href", href}], children} # mp4 String.ends_with?(href, ".mp4") -> transform_link(:mp4, href) # gifv String.ends_with?(href, ".gifv") -> transform_link(:gifv, href) # youtube String.starts_with?(href, "https://www.youtube.com/watch") -> transform_link(:ytlong, href) String.starts_with?(href, "https://youtu.be/") -> transform_link(:ytshort, href) true -> Logger.debug "no transform for #{href}" {"a", [{"href", href}], children} end end defp transform_link(:mp4, href), do: {"div", [{"class", "responsive-embed"}], [{"video", [{"class", "img-responsive"}, {"controls", ""}], [{"source", [{"src", href}, {"type", "video/mp4"}], []}] }] } defp transform_link(:gifv, href), do: {"div", [{"class", "responsive-embed"}], [{"video", [{"class", "img-responsive"}, {"controls", ""}], [{"source", [{"src", String.replace(href, ".gifv", ".webm")}, {"type", "video/webm"}], []}, {"source", [{"src", String.replace(href, ".gifv", ".mp4")}, {"type", "video/mp4"}], []}] }] } defp transform_link(:ytlong, href) do String.replace(href, "/watch?v=", "/embed/") |> youtube_iframe() end defp transform_link(:ytshort, href) do String.replace(href, "youtu.be/", "www.youtube.com/embed/") |> youtube_iframe() end defp youtube_iframe(src), do: {"div", [{"class", "responsive-embed"}], [{"iframe", [ {"class", "youtube-player"}, {"loading", "lazy"}, {"allow", "fullscreen"}, {"src", src} ], []} ]} end