defmodule SomethingErlang.AwfulApi.Thread do import Meeseeks.CSS require Logger def compile(html) do title = Meeseeks.one(html, css("title")) |> Meeseeks.text() |> String.replace(" - The Something Awful Forums", "") thread = Meeseeks.one(html, css("#thread")) thread_id = Meeseeks.attr(thread, "class") |> String.split(":") |> List.last() |> String.to_integer() page = Meeseeks.one(html, css("#content .pages.top option[selected]")) |> Meeseeks.text() |> case do "" -> 1 s -> String.to_integer(s) end page_count = Meeseeks.one(html, css("#content .pages.top option:last-of-type")) |> Meeseeks.text() |> case do "" -> 1 s -> String.to_integer(s) end posts = for post <- Meeseeks.all(thread, css("table.post:not(.ignored)")) do %{ userinfo: userinfo(post), postdate: postdate(post), postbody: postbody(post) } end %{id: thread_id, title: title, page: page, page_count: page_count, posts: posts} end defp userinfo(post) do user = Meeseeks.one(post, css("dl.userinfo")) name = user |> Meeseeks.one(css("dt")) |> Meeseeks.text() regdate = user |> Meeseeks.one(css("dd.registered")) |> Meeseeks.text() title = user |> Meeseeks.one(css("dd.title > *")) |> Meeseeks.html() %{ name: name, regdate: regdate, title: title } end defp postdate(post) do date = post |> Meeseeks.one(css("td.postdate")) |> Meeseeks.text() [month_text, day, year, hours, minutes] = date |> String.split(~r{[\s,:]}, trim: true) |> Enum.drop(2) month = 1 + Enum.find_index( ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"], fn m -> m == month_text end ) NaiveDateTime.new!( year |> String.to_integer(), month, day |> String.to_integer(), hours |> String.to_integer(), minutes |> String.to_integer(), 0 ) end defp postbody(post) do {_, _, body} = post |> Meeseeks.one(css("td.postbody")) |> Meeseeks.tree() body |> Enum.map(&transform/1) |> Enum.reject(fn x -> x == "" end) |> then(&{"div", [], &1}) |> Meeseeks.parse(:tuple_tree) |> Meeseeks.html() end defp transform({"img", attrs, _children}) do {"class", class} = List.keyfind(attrs, "class", 0, {"class", ""}) if class == "sa-smilie" do {"img", attrs, []} else t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"}) {"img", [{"loading", "lazy"} | t_attrs], []} end end defp transform({"a", attrs, children}) do {"href", href} = List.keyfind(attrs, "href", 0, {"href", ""}) cond do # skip internal links String.starts_with?(href, "/") -> {"a", [{"href", href}], children} # mp4 String.ends_with?(href, ".mp4") -> transform_link(:mp4, href) # gifv String.ends_with?(href, ".gifv") -> transform_link(:gifv, href) # youtube String.starts_with?(href, "https://www.youtube.com/watch") -> transform_link(:ytlong, href) String.starts_with?(href, "https://youtu.be/") -> transform_link(:ytshort, href) true -> Logger.debug("no transform for #{href}") {"a", [{"href", href}], children} end end defp transform({:comment, _}), do: "" defp transform({tag, attrs, children}), do: {tag, attrs, children} defp transform(text) when is_binary(text), do: String.trim(text) defp transform_link(:mp4, href), do: {"div", [{"class", "responsive-embed"}], [ {"video", [{"class", "img-responsive"}, {"controls", ""}], [{"source", [{"src", href}, {"type", "video/mp4"}], []}]} ]} defp transform_link(:gifv, href), do: {"div", [{"class", "responsive-embed"}], [ {"video", [{"class", "img-responsive"}, {"controls", ""}], [ {"source", [{"src", String.replace(href, ".gifv", ".webm")}, {"type", "video/webm"}], []}, {"source", [{"src", String.replace(href, ".gifv", ".mp4")}, {"type", "video/mp4"}], []} ]} ]} defp transform_link(:ytlong, href) do String.replace(href, "/watch?v=", "/embed/") |> youtube_iframe() end defp transform_link(:ytshort, href) do String.replace(href, "youtu.be/", "www.youtube.com/embed/") |> youtube_iframe() end defp youtube_iframe(src), do: {"div", [{"class", "responsive-embed"}], [ {"iframe", [ {"class", "youtube-player"}, {"loading", "lazy"}, {"allow", "fullscreen"}, {"src", src} ], []} ]} end