lv upgrade done; home.html form needs changed; test migrations

This commit is contained in:
2024-06-02 14:44:53 +02:00
parent 443081e086
commit 31c126a394
25 changed files with 1452 additions and 284 deletions

View File

@ -0,0 +1,25 @@
defmodule SomethingErlang.AwfulApi do
require Logger
alias SomethingErlang.AwfulApi.Thread
alias SomethingErlang.AwfulApi.Bookmarks
@doc """
Returns a list of all posts on page of a thread.
## Examples
iex> t = AwfulApi.parsed_thread(3945300, 1)
iex> length(t.posts)
42
iex> t.page_count
12
"""
def parsed_thread(id, page, user) do
Thread.compile(id, page, user)
end
def bookmarks(user) do
Bookmarks.compile(1, user)
end
end

View File

@ -0,0 +1,59 @@
defmodule SomethingErlang.AwfulApi.Bookmarks do
require Logger
alias SomethingErlang.AwfulApi.Client
def compile(page, user) do
doc = Client.bookmarks_doc(page, user)
html = Floki.parse_document!(doc)
for thread <- Floki.find(html, "tr.thread") do
parse(thread)
end
end
def parse(thread) do
%{
title: Floki.find(thread, "td.title") |> inner_html() |> Floki.raw_html(),
icon: Floki.find(thread, "td.icon") |> inner_html() |> Floki.raw_html(),
author: Floki.find(thread, "td.author") |> inner_html() |> Floki.text(),
replies: Floki.find(thread, "td.replies") |> inner_html() |> Floki.text(),
views: Floki.find(thread, "td.views") |> inner_html() |> Floki.text(),
rating: Floki.find(thread, "td.rating") |> inner_html() |> Floki.raw_html(),
lastpost: Floki.find(thread, "td.lastpost") |> inner_html() |> Floki.raw_html()
}
for {"td", [{"class", class} | _attrs], children} <- Floki.find(thread, "td"),
String.starts_with?(class, "star") == false,
into: %{} do
case class do
<<"title", _rest::binary>> ->
{:title, children |> Floki.raw_html()}
<<"icon", _rest::binary>> ->
{:icon, children |> Floki.raw_html()}
<<"author", _rest::binary>> ->
{:author, children |> Floki.text()}
<<"replies", _rest::binary>> ->
{:replies, children |> Floki.text() |> String.to_integer()}
<<"views", _rest::binary>> ->
{:views, children |> Floki.text() |> String.to_integer()}
<<"rating", _rest::binary>> ->
{:rating, children |> Floki.raw_html()}
<<"lastpost", _rest::binary>> ->
{:lastpost, children |> Floki.raw_html()}
end
end
end
defp inner_html(node) do
node
|> List.first()
|> Floki.children()
end
end

View File

@ -0,0 +1,95 @@
defmodule SomethingErlang.AwfulApi.Client do
@base_url "https://forums.somethingawful.com/"
@user_agent "SomethingErlangClient/0.1"
require Logger
def thread_doc(id, page, user) do
resp = new(user) |> get_thread(id, page)
Logger.debug("Client reply in #{resp.private.time}ms ")
:unicode.characters_to_binary(resp.body, :latin1)
end
def thread_lastseen_page(id, user) do
resp = new(user) |> get_thread_newpost(id)
%{status: 302, headers: headers} = resp
{"location", redir_url} = List.keyfind(headers, "location", 0)
[_, page] = Regex.run(~r/pagenumber=(\d+)/, redir_url)
page |> String.to_integer()
end
def bookmarks_doc(page, user) do
resp = new(user) |> get_bookmarks(page)
:unicode.characters_to_binary(resp.body, :latin1)
end
defp get_thread(req, id, page) do
url = "showthread.php"
params = [threadid: id, pagenumber: page]
Req.get!(req, url: url, params: params)
end
defp get_thread_newpost(req, id) do
url = "showthread.php"
params = [threadid: id, goto: "newpost"]
Req.get!(req, url: url, params: params, follow_redirects: false)
end
defp get_bookmarks(req, page) do
url = "bookmarkthreads.php"
params = [pagenumber: page]
Req.get!(req, url: url, params: params)
end
def login(username, password) do
form = [action: "login", username: username, password: password]
url = "account.php"
new()
|> Req.post!(url: url, form: form)
|> extract_cookies()
end
defp extract_cookies(%Req.Response{} = response) do
cookies = response.headers["set-cookie"]
for cookie <- cookies, String.starts_with?(cookie, "bb"), into: %{} do
cookie
|> String.split(";", parts: 2)
|> List.first()
|> String.split("=")
|> then(fn [k, v] -> {String.to_existing_atom(k), v} end)
end
end
defp new(user) do
Req.new(
base_url: @base_url,
user_agent: @user_agent,
cache: true,
headers: [cookie: [cookies(%{bbuserid: user.id, bbpassword: user.hash})]]
)
|> Req.Request.append_request_steps(
time: fn req -> Req.Request.put_private(req, :time, Time.utc_now()) end
)
|> Req.Request.prepend_response_steps(
time: fn {req, res} ->
start = req.private.time
diff = Time.diff(Time.utc_now(), start, :millisecond)
{req, Req.Response.put_private(res, :time, diff)}
end
)
end
defp new() do
Req.new(
base_url: @base_url,
user_agent: @user_agent,
redirect: false
)
end
defp cookies(args) when is_map(args) do
Enum.map_join(args, "; ", fn {k, v} -> "#{k}=#{v}" end)
end
end

View File

@ -0,0 +1,170 @@
defmodule SomethingErlang.AwfulApi.Thread do
require Logger
alias SomethingErlang.AwfulApi.Client
def compile(id, page, user) do
doc = Client.thread_doc(id, page, user)
html = Floki.parse_document!(doc)
thread = Floki.find(html, "#thread") |> Floki.filter_out("table.post.ignored")
title = Floki.find(html, "title") |> Floki.text()
title = title |> String.replace(" - The Something Awful Forums", "")
page_count =
case Floki.find(html, "#content .pages.top option:last-of-type") |> Floki.text() do
"" -> 1
s -> String.to_integer(s)
end
posts =
for post <- Floki.find(thread, "table.post") do
%{
userinfo: post |> userinfo(),
postdate: post |> postdate(),
postbody: post |> postbody()
}
end
%{id: id, title: title, page: page, page_count: page_count, posts: posts}
end
defp userinfo(post) do
user = Floki.find(post, "dl.userinfo")
name = user |> Floki.find("dt") |> Floki.text()
regdate = user |> Floki.find("dd.registered") |> Floki.text()
title = user |> Floki.find("dd.title") |> List.first() |> Floki.children() |> Floki.raw_html()
%{
name: name,
regdate: regdate,
title: title
}
end
defp postdate(post) do
date = Floki.find(post, "td.postdate") |> Floki.find("td.postdate") |> Floki.text()
[month_text, day, year, hours, minutes] =
date
|> String.split(~r{[\s,:]}, trim: true)
|> Enum.drop(1)
month =
1 +
Enum.find_index(
["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"],
fn m -> m == month_text end
)
NaiveDateTime.new!(
year |> String.to_integer(),
month,
day |> String.to_integer(),
hours |> String.to_integer(),
minutes |> String.to_integer(),
0
)
end
defp postbody(post) do
body =
Floki.find(post, "td.postbody")
|> List.first()
|> Floki.filter_out(:comment)
Floki.traverse_and_update(body, fn
{"img", attrs, []} -> transform(:img, attrs)
{"a", attrs, children} -> transform(:a, attrs, children)
other -> other
end)
|> Floki.children()
|> Floki.raw_html()
end
defp transform(elem, attr, children \\ [])
defp transform(:img, attrs, _children) do
{"class", class} = List.keyfind(attrs, "class", 0, {"class", ""})
if class == "sa-smilie" do
{"img", attrs, []}
else
t_attrs = List.keyreplace(attrs, "class", 0, {"class", "img-responsive"})
{"img", [{"loading", "lazy"} | t_attrs], []}
end
end
defp transform(:a, attrs, children) do
{"href", href} = List.keyfind(attrs, "href", 0, {"href", ""})
cond do
# skip internal links
String.starts_with?(href, "/") ->
{"a", [{"href", href}], children}
# mp4
String.ends_with?(href, ".mp4") ->
transform_link(:mp4, href)
# gifv
String.ends_with?(href, ".gifv") ->
transform_link(:gifv, href)
# youtube
String.starts_with?(href, "https://www.youtube.com/watch") ->
transform_link(:ytlong, href)
String.starts_with?(href, "https://youtu.be/") ->
transform_link(:ytshort, href)
true ->
Logger.debug("no transform for #{href}")
{"a", [{"href", href}], children}
end
end
defp transform_link(:mp4, href),
do:
{"div", [{"class", "responsive-embed"}],
[
{"video", [{"class", "img-responsive"}, {"controls", ""}],
[{"source", [{"src", href}, {"type", "video/mp4"}], []}]}
]}
defp transform_link(:gifv, href),
do:
{"div", [{"class", "responsive-embed"}],
[
{"video", [{"class", "img-responsive"}, {"controls", ""}],
[
{"source", [{"src", String.replace(href, ".gifv", ".webm")}, {"type", "video/webm"}],
[]},
{"source", [{"src", String.replace(href, ".gifv", ".mp4")}, {"type", "video/mp4"}],
[]}
]}
]}
defp transform_link(:ytlong, href) do
String.replace(href, "/watch?v=", "/embed/")
|> youtube_iframe()
end
defp transform_link(:ytshort, href) do
String.replace(href, "youtu.be/", "www.youtube.com/embed/")
|> youtube_iframe()
end
defp youtube_iframe(src),
do:
{"div", [{"class", "responsive-embed"}],
[
{"iframe",
[
{"class", "youtube-player"},
{"loading", "lazy"},
{"allow", "fullscreen"},
{"src", src}
], []}
]}
end