meeseeks in
This commit is contained in:
@ -1,23 +1,43 @@
|
||||
defmodule SomethingErlang.AwfulApi.Thread do
|
||||
import Meeseeks.CSS
|
||||
|
||||
require Logger
|
||||
|
||||
alias SomethingErlang.AwfulApi.Client
|
||||
def compile(html) do
|
||||
title =
|
||||
Meeseeks.one(html, css("title"))
|
||||
|> Meeseeks.text()
|
||||
|> String.replace(" - The Something Awful Forums", "")
|
||||
|
||||
def compile(doc) do
|
||||
html = Floki.parse_document!(doc)
|
||||
thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
|
||||
thread =
|
||||
Meeseeks.one(html, css("#thread"))
|
||||
|
||||
title = Floki.find(html, "title") |> Floki.text()
|
||||
title = title |> String.replace(" - The Something Awful Forums", "")
|
||||
# Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
|
||||
|
||||
thread_id =
|
||||
Meeseeks.attr(thread, "class")
|
||||
|> String.split(":")
|
||||
|> List.last()
|
||||
|> String.to_integer()
|
||||
|
||||
page =
|
||||
Meeseeks.one(html, css("#content .pages.top option[selected]"))
|
||||
|> Meeseeks.text()
|
||||
|> case do
|
||||
"" -> 1
|
||||
s -> String.to_integer(s)
|
||||
end
|
||||
|
||||
page_count =
|
||||
case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do
|
||||
Meeseeks.one(html, css("#content .pages.top option:last-of-type"))
|
||||
|> Meeseeks.text()
|
||||
|> case do
|
||||
"" -> 1
|
||||
s -> String.to_integer(s)
|
||||
end
|
||||
|
||||
posts =
|
||||
for post <- Floki.find(thread, "table.post") do
|
||||
for post <- Meeseeks.all(thread, css("table.post")) do
|
||||
%{
|
||||
userinfo: post |> userinfo(),
|
||||
postdate: post |> postdate(),
|
||||
@ -25,14 +45,18 @@ defmodule SomethingErlang.AwfulApi.Thread do
|
||||
}
|
||||
end
|
||||
|
||||
%{id: id, title: title, page: page, page_count: page_count, posts: posts}
|
||||
%{id: thread_id, title: title, page: page, page_count: page_count, posts: posts}
|
||||
end
|
||||
|
||||
defp userinfo(post) do
|
||||
user = Floki.find(post, "dl.userinfo")
|
||||
name = user |> Floki.find("dt") |> Floki.text()
|
||||
regdate = user |> Floki.find("dd.registered") |> Floki.text()
|
||||
title = user |> Floki.find("dd.title") |> List.first() |> Floki.children() |> Floki.raw_html()
|
||||
user = Meeseeks.one(post, css("dl.userinfo"))
|
||||
name = user |> Meeseeks.one(css("dt")) |> Meeseeks.text()
|
||||
regdate = user |> Meeseeks.one(css("dd.registered")) |> Meeseeks.text()
|
||||
|
||||
title =
|
||||
user
|
||||
|> Meeseeks.one(css("dd.title > *"))
|
||||
|> Meeseeks.html()
|
||||
|
||||
%{
|
||||
name: name,
|
||||
@ -42,12 +66,16 @@ defmodule SomethingErlang.AwfulApi.Thread do
|
||||
end
|
||||
|
||||
defp postdate(post) do
|
||||
date = Floki.find(post, "td.postdate") |> Floki.find("td.postdate") |> Floki.text()
|
||||
date =
|
||||
post
|
||||
|> Meeseeks.one(css("td.postdate"))
|
||||
|> Meeseeks.text()
|
||||
|
||||
[month_text, day, year, hours, minutes] =
|
||||
date
|
||||
|> String.split(~r{[\s,:]}, trim: true)
|
||||
|> Enum.drop(1)
|
||||
|> Enum.drop(2)
|
||||
|> dbg()
|
||||
|
||||
month =
|
||||
1 +
|
||||
@ -67,23 +95,20 @@ defmodule SomethingErlang.AwfulApi.Thread do
|
||||
end
|
||||
|
||||
defp postbody(post) do
|
||||
body =
|
||||
Floki.find(post, "td.postbody")
|
||||
|> List.first()
|
||||
|> Floki.filter_out(:comment)
|
||||
{_, _, body} =
|
||||
post
|
||||
|> Meeseeks.one(css("td.postbody"))
|
||||
|> Meeseeks.tree()
|
||||
|
||||
Floki.traverse_and_update(body, fn
|
||||
{"img", attrs, []} -> transform(:img, attrs)
|
||||
{"a", attrs, children} -> transform(:a, attrs, children)
|
||||
other -> other
|
||||
end)
|
||||
|> Floki.children()
|
||||
|> Floki.raw_html()
|
||||
body
|
||||
|> Enum.map(&transform/1)
|
||||
|> Enum.reject(fn x -> x == "" end)
|
||||
|> then(&{"div", [], &1})
|
||||
|> Meeseeks.parse(:tuple_tree)
|
||||
|> Meeseeks.html()
|
||||
end
|
||||
|
||||
defp transform(elem, attr, children \\ [])
|
||||
|
||||
defp transform(:img, attrs, _children) do
|
||||
defp transform({"img", attrs, _children}) do
|
||||
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
|
||||
|
||||
if class == "sa-smilie" do
|
||||
@ -94,7 +119,7 @@ defmodule SomethingErlang.AwfulApi.Thread do
|
||||
end
|
||||
end
|
||||
|
||||
defp transform(:a, attrs, children) do
|
||||
defp transform({"a", attrs, children}) do
|
||||
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
|
||||
|
||||
cond do
|
||||
@ -123,6 +148,12 @@ defmodule SomethingErlang.AwfulApi.Thread do
|
||||
end
|
||||
end
|
||||
|
||||
defp transform({:comment, _}), do: ""
|
||||
defp transform({tag, attrs, children}), do: {tag, attrs, children}
|
||||
|
||||
defp transform(text) when is_binary(text),
|
||||
do: String.trim(text)
|
||||
|
||||
defp transform_link(:mp4, href),
|
||||
do:
|
||||
{"div", [{"class", "responsive-embed"}],
|
||||
|
Reference in New Issue
Block a user