bookmark parsing lets go

This commit is contained in:
2022-08-02 15:51:58 +02:00
parent 606f5df100
commit e7da164b3f
4 changed files with 97 additions and 4 deletions

View File

@ -2,6 +2,7 @@ defmodule SomethingErlang.AwfulApi do
require Logger
alias SomethingErlang.AwfulApi.Thread
alias SomethingErlang.AwfulApi.Bookmarks
@doc """
Returns a list of all posts on page of a thread.
@ -17,4 +18,8 @@ defmodule SomethingErlang.AwfulApi do
def parsed_thread(id, page, user) do
Thread.compile(id, page, user)
end
def bookmarks(user) do
Bookmarks.compile(1, user)
end
end

View File

@ -0,0 +1,51 @@
defmodule SomethingErlang.AwfulApi.Bookmarks do
require Logger
alias SomethingErlang.AwfulApi.Client
def compile(page, user) do
doc = Client.bookmarks_doc(page, user)
html = Floki.parse_document!(doc)
for thread <- Floki.find(html, "tr.thread") do
parse(thread)
end
end
def parse(thread) do
%{
title: Floki.find(thread, "td.title") |> inner_html() |> Floki.raw_html(),
icon: Floki.find(thread, "td.icon") |> inner_html() |> Floki.raw_html(),
author: Floki.find(thread, "td.author") |> inner_html() |> Floki.text(),
replies: Floki.find(thread, "td.replies") |> inner_html() |> Floki.text(),
views: Floki.find(thread, "td.views") |> inner_html() |> Floki.text(),
rating: Floki.find(thread, "td.rating") |> inner_html() |> Floki.raw_html(),
lastpost: Floki.find(thread, "td.lastpost") |> inner_html() |> Floki.raw_html()
}
for {"td", [{"class", class} | _attrs], children} <- Floki.find(thread, "td"),
String.starts_with?(class, "star") == false,
into: %{} do
case class do
<<"title", _rest::binary>> ->
{:title, children |> Floki.raw_html()}
<<"icon", _rest::binary>> ->
{:icon, children |> Floki.raw_html()}
<<"author", _rest::binary>> ->
{:author, children |> Floki.text()}
<<"replies", _rest::binary>> ->
{:replies, children |> Floki.text() |> String.to_integer()}
<<"views", _rest::binary>> ->
{:views, children |> Floki.text() |> String.to_integer()}
<<"rating", _rest::binary>> ->
{:rating, children |> Floki.raw_html()}
<<"lastpost", _rest::binary>> ->
{:lastpost, children |> Floki.raw_html()}
end
end
end
defp inner_html(node) do
node
|> List.first()
|> Floki.children()
end
end

View File

@ -7,8 +7,9 @@ defmodule SomethingErlang.AwfulApi.Client do
:unicode.characters_to_binary(resp.body, :latin1)
end
defp cookies(args) when is_map(args) do
Enum.map_join(args, "; ", fn {k, v} -> "#{k}=#{v}" end)
def bookmarks_doc(page, user) do
resp = new_request(user) |> get_bookmarks(page)
:unicode.characters_to_binary(resp.body, :latin1)
end
defp get_thread(req, id, page \\ 1) do
@ -17,6 +18,12 @@ defmodule SomethingErlang.AwfulApi.Client do
Req.get!(req, url: url, params: params)
end
defp get_bookmarks(req, page \\ 1) do
url = "bookmarkthreads.php"
params = [pagenumber: page]
Req.get!(req, url: url, params: params)
end
defp new_request(user) do
Req.new(
base_url: @base_url,
@ -26,4 +33,8 @@ defmodule SomethingErlang.AwfulApi.Client do
)
# |> Req.Request.append_request_steps(inspect: &IO.inspect/1)
end
defp cookies(args) when is_map(args) do
Enum.map_join(args, "; ", fn {k, v} -> "#{k}=#{v}" end)
end
end