meeseeks in

This commit is contained in:
2024-06-04 00:06:23 +02:00
parent 757c1fe063
commit 4a25eae6c6
3 changed files with 66 additions and 32 deletions

View File

@ -1,23 +1,43 @@
defmodule SomethingErlang.AwfulApi.Thread do
import Meeseeks.CSS
require Logger
alias SomethingErlang.AwfulApi.Client
def compile(html) do
title =
Meeseeks.one(html, css("title"))
|> Meeseeks.text()
|> String.replace(" - The Something Awful Forums", "")
def compile(doc) do
html = Floki.parse_document!(doc)
thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
thread =
Meeseeks.one(html, css("#thread"))
title = Floki.find(html, "title") |> Floki.text()
title = title |> String.replace(" - The Something Awful Forums", "")
# Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
thread_id =
Meeseeks.attr(thread, "class")
|> String.split(":")
|> List.last()
|> String.to_integer()
page =
Meeseeks.one(html, css("#content .pages.top option[selected]"))
|> Meeseeks.text()
|> case do
"" -> 1
s -> String.to_integer(s)
end
page_count =
case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do
Meeseeks.one(html, css("#content .pages.top option:last-of-type"))
|> Meeseeks.text()
|> case do
"" -> 1
s -> String.to_integer(s)
end
posts =
for post <- Floki.find(thread, "table.post") do
for post <- Meeseeks.all(thread, css("table.post")) do
%{
userinfo: post |> userinfo(),
postdate: post |> postdate(),
@ -25,14 +45,18 @@ defmodule SomethingErlang.AwfulApi.Thread do
}
end
%{id: id, title: title, page: page, page_count: page_count, posts: posts}
%{id: thread_id, title: title, page: page, page_count: page_count, posts: posts}
end
defp userinfo(post) do
user = Floki.find(post, "dl.userinfo")
name = user |> Floki.find("dt") |> Floki.text()
regdate = user |> Floki.find("dd.registered") |> Floki.text()
title = user |> Floki.find("dd.title") |> List.first() |> Floki.children() |> Floki.raw_html()
user = Meeseeks.one(post, css("dl.userinfo"))
name = user |> Meeseeks.one(css("dt")) |> Meeseeks.text()
regdate = user |> Meeseeks.one(css("dd.registered")) |> Meeseeks.text()
title =
user
|> Meeseeks.one(css("dd.title > *"))
|> Meeseeks.html()
%{
name: name,
@ -42,12 +66,16 @@ defmodule SomethingErlang.AwfulApi.Thread do
end
defp postdate(post) do
date = Floki.find(post, "td.postdate") |> Floki.find("td.postdate") |> Floki.text()
date =
post
|> Meeseeks.one(css("td.postdate"))
|> Meeseeks.text()
[month_text, day, year, hours, minutes] =
date
|> String.split(~r{[\s,:]}, trim: true)
|> Enum.drop(1)
|> Enum.drop(2)
|> dbg()
month =
1 +
@ -67,23 +95,20 @@ defmodule SomethingErlang.AwfulApi.Thread do
end
defp postbody(post) do
body =
Floki.find(post, "td.postbody")
|> List.first()
|> Floki.filter_out(:comment)
{_, _, body} =
post
|> Meeseeks.one(css("td.postbody"))
|> Meeseeks.tree()
Floki.traverse_and_update(body, fn
{"img", attrs, []} -> transform(:img, attrs)
{"a", attrs, children} -> transform(:a, attrs, children)
other -> other
end)
|> Floki.children()
|> Floki.raw_html()
body
|> Enum.map(&transform/1)
|> Enum.reject(fn x -> x == "" end)
|> then(&{"div", [], &1})
|> Meeseeks.parse(:tuple_tree)
|> Meeseeks.html()
end
defp transform(elem, attr, children \\ [])
defp transform(:img, attrs, _children) do
defp transform({"img", attrs, _children}) do
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
if class == "sa-smilie" do
@ -94,7 +119,7 @@ defmodule SomethingErlang.AwfulApi.Thread do
end
end
defp transform(:a, attrs, children) do
defp transform({"a", attrs, children}) do
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
cond do
@ -123,6 +148,12 @@ defmodule SomethingErlang.AwfulApi.Thread do
end
end
defp transform({:comment, _}), do: ""
defp transform({tag, attrs, children}), do: {tag, attrs, children}
defp transform(text) when is_binary(text),
do: String.trim(text)
defp transform_link(:mp4, href),
do:
{"div", [{"class", "responsive-embed"}],