From 3f7aaadd273596ff01c6f597f0ee73d1260ab515 Mon Sep 17 00:00:00 2001 From: rdiedrich Date: Tue, 30 Jun 2020 15:57:07 +0200 Subject: [PATCH] refactorrr --- src/clojsa/handler.clj | 25 ++++---- src/clojsa/saparser.clj | 124 +++++++++++++++++++++------------------- 2 files changed, 78 insertions(+), 71 deletions(-) diff --git a/src/clojsa/handler.clj b/src/clojsa/handler.clj index 55c8663..ed84c02 100644 --- a/src/clojsa/handler.clj +++ b/src/clojsa/handler.clj @@ -17,6 +17,19 @@ tresp (client/text-response session turl)] (parser/thread-map id page tresp))) +(defn thread-page [session id page] + (let [thread (get-thread session id page) + {:keys [id page page-count title]} thread + login-part (views/login-form + ["/thread/%d?page=%d" id page] + (get session :loggedin false)) + header-part (views/header-fragment login-part) + thread-part (views/thread-page thread) + paginate-part (views/paginate + (str "/thread/" id) page page-count)] + (views/main-template {:title title} + header-part thread-part paginate-part))) + (defn get-bookmarks [session page] (let [burl (client/bookmarks-url page) bresp (client/text-response session burl)] @@ -51,17 +64,7 @@ (GET "/thread/:id" [id :<< as-int page :<< as-int :as {session :session}] - (let [thread (get-thread session id page) - {:keys [id page page-count title]} thread - login-part (views/login-form - ["/thread/%d?page=%d" id page] - (get session :loggedin false)) - header-part (views/header-fragment login-part) - thread-part (views/thread-page thread) - paginate-part (views/paginate - (str "/thread/" id) page page-count)] - (views/main-template {:title title} - header-part thread-part paginate-part))) + (thread-page session id page)) (GET "/thread/:id" [id] {:status 302 :headers {"Location" (str "/thread/" id "?page=1")}}) diff --git a/src/clojsa/saparser.clj b/src/clojsa/saparser.clj index fc0bb1c..9160f12 100644 --- a/src/clojsa/saparser.clj +++ b/src/clojsa/saparser.clj @@ -14,88 +14,92 @@ :tag :div :content content}) -(defn parse-title [htree] - (-> (s/select (s/child (s/tag :title)) htree) - first :content first - (string/replace #" - The Something Awful Forums" ""))) +(defn element-node [kw htree] + (case kw -(defn parse-pagecount [htree] - (-> (s/select (s/descendant - (s/class :pages) (s/tag :option)) htree) - last :content first Integer/parseInt)) + :author + (-> (s/select (s/child (s/class :author)) htree) + first :content first) -(defn parse-thread [htree] - (-> (s/select (s/descendant - (s/id :thread)) - htree) - first)) + :bookmarks + (s/select (s/descendant + (s/id :forum) (s/tag :tbody) (s/tag :tr)) htree) -(defn select-td [class-key htree] - (s/select (s/descendant - (s/and (s/tag :td) (s/class class-key))) htree)) + :pagecount + (-> (s/select (s/descendant + (s/class :pages) (s/tag :option)) htree) + last :content first Integer/parseInt) -(defn parse-ui [ui] - (let [ui (first (s/select (s/descendant (s/tag :dl)) ui)) - author (-> (s/select (s/descendant (s/class :author)) ui) - first :content first) - regdate (-> (s/select (s/descendant (s/class :registered)) ui) - first :content first) - title (-> (s/select (s/descendant (s/class :title)) ui) first) - avatar (-> (s/select (s/descendant (s/tag :img)) title) first)] - {:author author - :regdate regdate - :avatar-title (hickory-to-hiccup - (hickory-div (:content title) "avatar-title")) - :avatar (when avatar (hickory-to-hiccup avatar))})) + :title + (-> (s/select (s/child (s/tag :title)) htree) + first :content first + (string/replace #" - The Something Awful Forums" "")) -(defn parse-pd [pd] - (string/trim (last (hickory-to-hiccup pd)))) + :thread + (let [thread-tree (first (s/select (s/descendant + (s/id :thread)) htree)) + td-classes [:userinfo :postdate :postbody]] + (for [class-key td-classes] + (s/select (s/descendant + (s/and (s/tag :td) (s/class class-key))) thread-tree))))) -(defn parse-pb [pb] - (let [pb (-> pb :content)] - (hickory-to-hiccup (hickory-div pb "postbody")))) +(defn processed-element [kw elem] + (case kw + :bookmark + (when-let [link (first + (s/select (s/descendant + (s/class :info) (s/tag :a)) elem))] + (let [thread-id (re-find #"\d+$" (:href (:attrs link))) + title (-> link :content first string/trim)] + {:id thread-id :title title})) + + :postbody + (hickory-to-hiccup (hickory-div (:content elem) "postbody")) + + :postdate + (string/trim (last (hickory-to-hiccup elem))) + + :userinfo + (let [ui (first (s/select (s/descendant (s/tag :dl)) elem)) + author (-> (s/select (s/descendant (s/class :author)) ui) + first :content first) + regdate (-> (s/select (s/descendant (s/class :registered)) ui) + first :content first) + title (-> (s/select (s/descendant (s/class :title)) ui) first) + avatar (-> (s/select (s/descendant (s/tag :img)) title) first)] + {:author author + :regdate regdate + :avatar-title (hickory-to-hiccup + (hickory-div (:content title) "avatar-title")) + :avatar (when avatar (hickory-to-hiccup avatar))}))) (defn thread-map [id page doc] (let [htree (hickory-doc doc) - title (parse-title htree) - page-count (parse-pagecount htree) - thread-tree (parse-thread htree) - userinfo (select-td :userinfo thread-tree) - postdate (select-td :postdate thread-tree) - postbody (select-td :postbody thread-tree)] + title (element-node :title htree) + page-count (element-node :pagecount htree) + thread-tree (element-node :thread htree) + [userinfo postdate postbody] thread-tree] {:title title :id id :page page :page-count page-count :content (for [[ui pd pb] (partition 3 (interleave userinfo postdate postbody)) - :when (not= "Adbot" (-> (s/select (s/child (s/class :author)) ui) - first :content first))] - {:ui (parse-ui ui) :pd (parse-pd pd) :pb (parse-pb pb)})})) - -(defn parse-bookmarks [htree] - (s/select (s/descendant - (s/id :forum) - (s/tag :tbody) - (s/tag :tr)) - htree)) - -(defn parse-row [htree] - (when-let [link (-> (s/select (s/descendant (s/class :info) (s/tag :a)) htree) first)] - (let [thread-id (re-find #"\d+$" (:href (:attrs link))) - title (-> link :content first string/trim)] - {:id thread-id :title title}))) + :when (not= "Adbot" (element-node :author ui))] + {:ui (processed-element :userinfo ui) + :pd (processed-element :postdate pd) + :pb (processed-element :postbody pb)})})) (defn bookmarks-map [page doc] (let [htree (hickory-doc doc) - title (parse-title htree) - page-count (parse-pagecount htree) - bookmarks-tree (parse-bookmarks htree)] + title (element-node :title htree) + page-count (element-node :pagecount htree) + bookmarks-tree (element-node :bookmarks htree)] {:title title :page page :page-count page-count :content (for [row bookmarks-tree - :let [parsed-row (parse-row row)] + :let [parsed-row (processed-element :bookmark row)] :when parsed-row] parsed-row)}))