From fc3ce481c3b8ada9150e3aaec398d05ac21fcf8a Mon Sep 17 00:00:00 2001 From: Janos Erdos Date: Fri, 26 Jul 2024 14:35:33 +0200 Subject: [PATCH] refactor stencil.merger namespace (#152) --- src/stencil/merger.clj | 240 ++++++++++++++++------------------- src/stencil/tokenizer.clj | 2 +- src/stencil/util.clj | 9 +- test/stencil/merger_test.clj | 92 ++++++-------- test/stencil/util_test.clj | 20 --- 5 files changed, 152 insertions(+), 211 deletions(-) diff --git a/src/stencil/merger.clj b/src/stencil/merger.clj index ff002af0..1abb30d3 100644 --- a/src/stencil/merger.clj +++ b/src/stencil/merger.clj @@ -1,144 +1,120 @@ (ns stencil.merger - "Token listaban a text tokenekbol kiszedi a parancsokat es action tokenekbe teszi." + "Given a seq of tokens, parses Stencil expressions and creates :action tokens." (:require [clojure.data.xml :as xml] - [clojure.string :refer [index-of ends-with?]] [stencil.postprocess.ignored-tag :as ignored-tag] [stencil [types :refer [open-tag close-tag]] - [tokenizer :as tokenizer] - [util :refer [prefixes suffixes subs-last string parsing-exception]]])) + [util :refer [parsing-exception]] + [tokenizer :as tokenizer]])) (set! *warn-on-reflection* true) -;; only fragment includes are evaluated +;; When true, only fragment includes are parsed and evaluated (def ^:dynamic *only-includes* false) -(defn peek-next-text - "Returns a lazy seq of text content characters from the token list." - [tokens] - ((fn f [stack tokens] - (when-let [[t & ts] (seq tokens)] - (if-let [text (:text t)] - (concat (for [[t & trs] (suffixes text)] - {:char t - :stack stack - :text-rest trs - :rest ts}) - (lazy-seq (f stack ts))) - (recur (cons t stack) ts)))) - nil tokens)) - -(defn find-first-code [^String s] - (assert (string? s)) - (when-let [ind (index-of s (str open-tag))] - (if-let [after-idx (index-of s (str close-tag) ind)] - (cond-> {:action (subs s (+ ind (count open-tag)) after-idx)} - (pos? ind) (assoc :before (subs s 0 ind)) - (not= (+ (count close-tag) after-idx) (count s)) - (assoc :after (subs s (+ (count close-tag) after-idx)))) - (cond-> {:action-part (subs s (+ ind (count open-tag)))} - (not (zero? ind)) (assoc :before (subs s 0 ind)))))) - -(defn text-split-tokens [^String s] - (assert (string? s)) - (loop [s s - output []] - (if-let [x (some-> s find-first-code)] - (if (:action-part x) - {:tokens (if-let [b (:before x)] (conj output {:text b}) output) - :action-part (:action-part x)} - (recur (:after x) - (if (seq (:before x)) - (conj output {:text (:before x)} {:action (:action x)}) - (conj output {:action (:action x)})))) - (if (seq s) - {:tokens (conj output {:text s})} - {:tokens output})))) - -;; returns a map of {:char :stack :text-rest :rest} -(defn -find-open-tag [last-chars-count next-token-list] - (assert (integer? last-chars-count)) - (assert (pos? last-chars-count)) - (assert (sequential? next-token-list)) - (let [next-text (peek-next-text next-token-list) - n (- (count open-tag) last-chars-count)] - (when (= (drop last-chars-count open-tag) - (take n (map :char next-text))) - (nth next-text (dec n))))) - -(defn -last-chars-count [sts-tokens] - (assert (sequential? sts-tokens)) - (when-let [last-text (some-> sts-tokens last :text string)] - (some #(when (ends-with? last-text (string %)) - (count %)) - (prefixes open-tag)))) - -(defn map-action-token [token] - (if-let [action (:action token)] - (let [parsed (tokenizer/text->cmd action) - parsed (assoc parsed :raw (str open-tag action close-tag))] - (if (and *only-includes* - (not= :cmd/include (:cmd parsed))) - {:text (str open-tag action close-tag)} - {:action parsed})) - token)) - -(declare cleanup-runs) - -(defn cleanup-runs-1 [[first-token & rest-tokens]] - (assert (:text first-token)) - (let [sts (text-split-tokens (:text first-token))] - - (if (:action-part sts) - ;; Ha van olyan akcio resz, amit elkezdtunk de nem irtunk vegig... - (let [next-token-list (cons {:text (:action-part sts)} rest-tokens) - [this that] (split-with #(not= (seq close-tag) - (take (count close-tag) (map :char %))) - (suffixes (peek-next-text next-token-list))) - that (if (empty? that) - (throw (parsing-exception "" (str "Stencil tag is not closed. Reading " open-tag - (string (comp (take 20) (map first) (map :char)) this)))) - ;; (throw (ex-info "Tag is not closed? " {:read (first this)})) - (first (nth that (dec (count close-tag))))) - ; action-content (apply str (map (comp :char first) this)) - ] - (concat - (map map-action-token (:tokens sts)) - (let [ap (map-action-token {:action (string (map (comp :char first)) this)})] - (if (:action ap) - (concat - [ap] - (reverse (:stack that)) - (if (seq (:text-rest that)) - (lazy-seq (cleanup-runs-1 (cons {:text (string (:text-rest that))} (:rest that)))) - (lazy-seq (cleanup-runs (:rest that))))) - (list* {:text (str open-tag (:action-part sts))} - (lazy-seq (cleanup-runs rest-tokens))))))) - - ;; If the current :text node ends with a prefix of open-tag: - (if-let [last-chars-count (-last-chars-count (:tokens sts))] - (if-let [this (-find-open-tag last-chars-count rest-tokens)] - (concat - (map map-action-token (butlast (:tokens sts))) - (when-let [s (seq (drop-last last-chars-count (:text (last (:tokens sts)))))] - [{:text (apply str s)}]) - - (let [tail (cleanup-runs-1 - (concat [{:text (apply str open-tag (:text-rest this))}] - (reverse (:stack this)) - (:rest this)))] - (if (:action (first tail)) - tail - (cons {:text (subs-last (:text (last (:tokens sts))) last-chars-count)} - (lazy-seq (cleanup-runs rest-tokens)))))) - (concat (map map-action-token (:tokens sts)) (cleanup-runs rest-tokens))) - (concat (map map-action-token (:tokens sts)) (cleanup-runs rest-tokens)))))) - -(defn cleanup-runs [token-list] - (when-let [[t & ts] (seq token-list)] - (if (:text t) - (cleanup-runs-1 token-list) - (cons t (lazy-seq (cleanup-runs ts)))))) +(defn map-action-token [{:keys [action]}] + (let [parsed (tokenizer/text->cmd action) + source (str open-tag action close-tag) + parsed (assoc parsed :raw source)] + (if (and *only-includes* + (not= :cmd/include (:cmd parsed))) + {:text source} + {:action parsed}))) + +;; Transducer that unwraps {:text .} objects. eg.: [1 2 {:text ab} 3] => [1 2 \a \b 3] +(defn- map-text-nodes [] + (fn [rf] + (fn ([acc] (rf acc)) + ([acc x] + (if (:text x) + (reduce rf acc (:text x)) + (rf acc x)))))) + +(declare parse-upto-open-tag) + +;; Constructs a function that reads the inside of a stencil expression until close-tag is reached. +;; The fn returns a collection when read fully or itself when there are characters left to read. +(defn- parse-until-close-tag [chars-and-tokens-to-append] + (let [expected-close-tag-chars (volatile! close-tag) + buffer-nonclose-chars-only (new java.util.ArrayList) + buffer-all-read (new java.util.ArrayList)] + (fn self + ([] + (when (seq buffer-all-read) + (throw (parsing-exception + "" (apply str "Stencil tag is not closed. Reading " open-tag buffer-nonclose-chars-only))))) + ([token] + (.add buffer-all-read token) + (if (= token (first @expected-close-tag-chars)) + (when-not (vswap! expected-close-tag-chars next) + (let [action (map-action-token {:action (apply str buffer-nonclose-chars-only)})] + (if (:action action) + (parse-upto-open-tag (concat [action] + (remove char? chars-and-tokens-to-append) + (remove char? buffer-all-read))) + (parse-upto-open-tag (concat (vec chars-and-tokens-to-append) + (vec buffer-all-read)))))) + (when (char? token) + (vreset! expected-close-tag-chars close-tag) + (.clear buffer-nonclose-chars-only) + (.addAll buffer-nonclose-chars-only (filter char? buffer-all-read)) + self)))))) + +;; Similar to the fn above. Consumes tokens up to the first open tag, then returns another parser (trampoline style). +(defn- parse-upto-open-tag [prepend] + (let [expected-open-tag-chars (volatile! open-tag) + buffer (new java.util.ArrayList ^java.util.Collection prepend)] + (fn self + ([] buffer) + ([token] + (if (= token (first @expected-open-tag-chars)) + (if (= open-tag @expected-open-tag-chars) + (let [already-read (vec buffer)] + (.clear buffer) + (.add buffer token) + (vswap! expected-open-tag-chars next) + already-read) + (do (.add buffer token) + (when-not (vswap! expected-open-tag-chars next) ; for cases when |open-tag|>2 + (parse-until-close-tag buffer)))) + (if (= open-tag @expected-open-tag-chars) + (let [result (concat (vec buffer) [token])] + (.clear buffer) ;; reading an open-tag from start => we dump the content of buffer + result) + (if (char? token) + (let [out (vec buffer)] + (vreset! expected-open-tag-chars open-tag) + (.clear buffer) + (if (= token (first @expected-open-tag-chars)) + (do (.add buffer token) + (vswap! expected-open-tag-chars next) + out) + (concat out [token]))) + (do (.add buffer token) + self)))))))) + +;; Constructs a transducer that uses the trampoline function to process elements +(defn- parser-trampoline [initial-trampoline] + (fn [rf] + (let [trampoline (volatile! initial-trampoline)] + (fn ([acc] (rf (reduce rf acc (@trampoline)))) + ([acc token] + (let [result (@trampoline token)] + (if (fn? result) + (do (vreset! trampoline result) acc) + (reduce rf acc result)))))))) + +;; Transducer that merges consecutive characters into a text token, eg.: (1 \a \b \c 2) to (1 {:text "abc"} 2) +(defn- unmap-text-nodes [] + (let [state (volatile! true)] + (comp (partition-by (fn [x] (when-not (char? x) (vswap! state not)))) + (map (fn [x] (if (char? (first x)) {:text (apply str x)} (first x))))))) + +(defn cleanup-runs [tokens-seq] + (eduction (comp (map-text-nodes) + (parser-trampoline (parse-upto-open-tag [])) + (unmap-text-nodes)) + tokens-seq)) (defn- map-token [token] (:action token token)) @@ -150,6 +126,6 @@ (ignored-tag/map-ignored-attr) (tokenizer/structure->seq) (cleanup-runs) - (map map-token))) + (eduction (map map-token)))) :OK diff --git a/src/stencil/tokenizer.clj b/src/stencil/tokenizer.clj index 697c6b20..f9d5e09a 100644 --- a/src/stencil/tokenizer.clj +++ b/src/stencil/tokenizer.clj @@ -8,7 +8,7 @@ (set! *warn-on-reflection* true) (defn- text->cmd-impl [^String text] - (assert (string? text)) + (assert (string? text) (str "Not string: " (pr-str text))) (let [text (trim text) pattern-elseif #"^(else\s*if|elif|elsif)(\(|\s+)"] (cond diff --git a/src/stencil/util.clj b/src/stencil/util.clj index 53d3bd3c..148cc6e3 100644 --- a/src/stencil/util.clj +++ b/src/stencil/util.clj @@ -52,9 +52,6 @@ (defn assoc-if-val [m k v] (if (some? v) (assoc m k v) m)) -(defn suffixes [xs] (take-while seq (iterate next xs))) -(defn prefixes [xs] (take-while seq (iterate butlast xs))) - (defmacro fail [msg obj] (assert (string? msg)) (assert (map? obj)) @@ -66,8 +63,6 @@ (number? x) (int x) :else (fail "Unexpected type of input" {:type (:type x) :input x}))) -(defn subs-last [^String s ^long n] (.substring s (- (.length s) n))) - (defn parsing-exception [expression message] (ParsingException/fromMessage (str expression) (str message))) @@ -126,9 +121,7 @@ `(let [b# ~body] (when (~pred b#) b#))) -(defn ^String string - ([values] (apply str values)) - ([xform coll] (transduce xform (fn ([^Object s] (.toString s)) ([^StringBuilder b v] (.append b v))) (StringBuilder.) coll))) +(defn string ^String [xform coll] (transduce xform (fn ([^Object s] (.toString s)) ([^StringBuilder b v] (.append b v))) (StringBuilder.) coll)) (defmacro whitespace?? [c] `(case ~c (\tab \space \newline diff --git a/test/stencil/merger_test.clj b/test/stencil/merger_test.clj index 28393aaa..6c4d2f38 100644 --- a/test/stencil/merger_test.clj +++ b/test/stencil/merger_test.clj @@ -1,46 +1,12 @@ (ns stencil.merger-test (:require [stencil.merger :refer :all] + [stencil.types] [clojure.test :refer [deftest testing is are use-fixtures]])) (def map-action-token' map-action-token) (use-fixtures :each (fn [f] (with-redefs [map-action-token identity] (f)))) -(deftest peek-next-text-test - (testing "Simple case" - (is (= nil (peek-next-text nil))) - (is (= nil (peek-next-text []))) - (is (= nil (peek-next-text [{:open 1} {:open 2} {:close 2}]))) - (is (= '({:char \a, :stack nil, :text-rest (\b), :rest ({:text "cd"})} - {:char \b, :stack nil, :text-rest nil, :rest ({:text "cd"})} - {:char \c, :stack nil, :text-rest (\d), :rest nil} - {:char \d, :stack nil, :text-rest nil, :rest nil}) - (peek-next-text [{:text "ab"} {:text "cd"}]))))) - -(deftest find-first-code-test - (testing "Simple cases" - (are [x res] (is (= res (find-first-code x))) - "asdf{%xy%}gh" {:action "xy" :before "asdf" :after "gh"} - "{%xy%}gh" {:action "xy" :after "gh"} - "asdf{%xy%}" {:action "xy" :before "asdf"} - "{%xy%}" {:action "xy"} - "a{%xy" {:action-part "xy" :before "a"} - "a{%x%" {:action-part "x%" :before "a"} - "{%xy" {:action-part "xy"}))) - -(deftest text-split-tokens-test - (testing "Simple cases" - (are [x expected] (is (= expected (text-split-tokens x))) - - "a{%a%}b{%d" - {:tokens [{:text "a"} {:action "a"} {:text "b"}] :action-part "d"} - - "{%a%}{%x%}" - {:tokens [{:action "a"} {:action "x"}]} - - "" - {:tokens []}))) - (deftest cleanup-runs-test (testing "Simple cases" (are [x expected] (= expected (cleanup-runs x)) @@ -80,14 +46,29 @@ [{:text "asdf"} {:action "123456"} {:text "ghi"}])) (testing "Complex case" - (are [x expected] (= expected (cleanup-runs x)) + (are [x expected] (= expected (cleanup-runs x)) [{:text "a{"} {:text "%"} {:text "="} {:text "1"} {:text "%"} {:text "}b"}] [{:text "a"} {:action "=1"} {:text "b"}])) (testing "Unchanged" (are [x expected] (= expected (cleanup-runs x)) [{:text "asdf{"} {:text "{aaa"}] - [{:text "asdf{"} {:text "{aaa"}]))) + [{:text "asdf{{aaa"}]))) + +(deftest cleanup-runs-test-redefined-tags + (testing "Redefining open-close tags does not affect parsing logic" + (testing "Redefined tags consist of repeating characters" + (with-redefs [stencil.types/open-tag "{{" + stencil.types/close-tag "}}"] + (are [x expected] (= expected (vec (cleanup-runs x))) + [{:text "asdf{{1234}}ghi"}] + [{:text "asdf"} {:action "1234"} {:text "ghi"}]))) + (testing "Redefined tags are longer" + (with-redefs [stencil.types/open-tag "<%!" + stencil.types/close-tag "!%>"] + (are [x expected] (= expected (vec (cleanup-runs x))) + [{:text "asdf<%!1234!%>ghi"}] + [{:text "asdf"} {:action "1234"} {:text "ghi"}]))))) (defmacro are+ [argv [& exprs] & bodies] (list* 'do (for [e exprs] `(are ~argv ~e ~@bodies)))) @@ -97,46 +78,57 @@ (def O4 {:open 4}) (def O5 {:open 5}) -(deftest ^:map-action-token cleanup-runs_fragments-only +(deftest cleanup-runs_fragments-only (testing "text token has full expression" (with-redefs [map-action-token map-action-token'] (are+ [x expected-literal expected-parsed] - [(= expected-literal (binding [*only-includes* true] (doall (cleanup-runs x)))) - (= expected-parsed (binding [*only-includes* false] (doall (cleanup-runs x))))] + [(= expected-literal (binding [*only-includes* true] (vec (cleanup-runs x)))) + (= expected-parsed (binding [*only-includes* false] (vec (cleanup-runs x))))] [{:text "{%=1%}"}] [{:text "{%=1%}"}] [{:action {:cmd :cmd/echo, :expression 1 :raw "{%=1%}"}}] [{:text "{{%=1%}"}] - [{:text "{"} {:text "{%=1%}"}] + [{:text "{{%=1%}"}] [{:text "{"} {:action {:cmd :cmd/echo, :expression 1, :raw "{%=1%}"}}] + [{:text "{a{%=1%}"}] + [{:text "{a{%=1%}"}] + [{:text "{a"} {:action {:cmd :cmd/echo, :expression 1, :raw "{%=1%}"}}] + [{:text "{%=x%2%}"}] [{:text "{%=x%2%}"}] [{:action {:cmd :cmd/echo, :expression '(:mod x 2), :raw "{%=x%2%}"}}] [{:text "abc{%=1%}b"}] - [{:text "abc"} {:text "{%=1%}"} {:text "b"}] + [{:text "abc{%=1%}b"}] [{:text "abc"} {:action {:cmd :cmd/echo, :expression 1 :raw "{%=1%}"}} {:text "b"}] [{:text "abc{%="} O1 O2 {:text "1"} O3 O4 {:text "%}b"}] - [{:text "abc"} {:text "{%="} O1 O2 {:text "1"} O3 O4 {:text "%}b"}] + [{:text "abc{%="} O1 O2 {:text "1"} O3 O4 {:text "%}b"}] [{:text "abc"} {:action {:cmd :cmd/echo, :expression 1 :raw "{%=1%}"}} O1 O2 O3 O4 {:text "b"}] [{:text "abc{%="} O1 O2 {:text "1%"} O3 O4 {:text "}b"}] - [{:text "abc"} {:text "{%="} O1 O2 {:text "1%"} O3 O4 {:text "}b"}] + [{:text "abc{%="} O1 O2 {:text "1%"} O3 O4 {:text "}b"}] [{:text "abc"} {:action {:cmd :cmd/echo, :expression 1 :raw "{%=1%}"}} O1 O2 O3 O4 {:text "b"}] [{:text "abcd{%="} O1 {:text "1"} O2 {:text "%"} O3 {:text "}"} O4 {:text "b"}] - [{:text "abcd"} {:text "{%="} O1 {:text "1"} O2 {:text "%"} O3 {:text "}"} O4 {:text "b"}] - [{:text "abcd"} {:action {:cmd :cmd/echo, :expression 1 :raw "{%=1%}"}} O1 O2 O3 O4{:text "b"}] + [{:text "abcd{%="} O1 {:text "1"} O2 {:text "%"} O3 {:text "}"} O4 {:text "b"}] + [{:text "abcd"} {:action {:cmd :cmd/echo, :expression 1 :raw "{%=1%}"}} O1 O2 O3 O4 {:text "b"}] [{:text "abc{"} O1 {:text "%"} O2 {:text "=1"} O3 {:text "2"} O4 {:text "%"} O5 {:text "}"} {:text "b"}] - [{:text "abc"} {:text "{"} O1 {:text "%"} O2 {:text "=1"} O3 {:text "2"} O4 {:text "%"} O5 {:text "}"} {:text "b"}] + [{:text "abc{"} O1 {:text "%"} O2 {:text "=1"} O3 {:text "2"} O4 {:text "%"} O5 {:text "}b"}] [{:text "abc"} {:action {:cmd :cmd/echo, :expression 12 :raw "{%=12%}"}} O1 O2 O3 O4 O5 {:text "b"}] [O1 {:text "{%if p"} O2 O3 {:text "%}one{%end%}"} O4] - [O1 {:text "{%if p"} O2 O3 {:text "%}one"} {:text "{%end%}"} O4] - [O1 {:action {:cmd :cmd/if, :condition 'p :raw "{%if p%}"}} O2 O3 {:text "one"} {:action {:cmd :cmd/end :raw "{%end%}"}} O4] - )))) + [O1 {:text "{%if p"} O2 O3 {:text "%}one{%end%}"} O4] + [O1 {:action {:cmd :cmd/if, :condition 'p :raw "{%if p%}"}} O2 O3 {:text "one"} {:action {:cmd :cmd/end :raw "{%end%}"}} O4])))) + +(deftest test-unmap-text-nodes + (let [unmap-text-nodes @#'stencil.merger/unmap-text-nodes] + (is (= [] (into [] (unmap-text-nodes) []))) + (is (= [1 2 3] (into [] (unmap-text-nodes) [1 2 3]))) + (is (= [{:text "abc"}] (into [] (unmap-text-nodes) "abc"))) + (is (= [1 2 {:text "bc"} 3 {:text "d"} 4 5 {:text "e"} 6] + (into [] (unmap-text-nodes) [1 2 \b \c 3 \d 4 5 \e 6]))))) diff --git a/test/stencil/util_test.clj b/test/stencil/util_test.clj index 853b59af..c95d910a 100644 --- a/test/stencil/util_test.clj +++ b/test/stencil/util_test.clj @@ -55,18 +55,6 @@ (testing "Difference clojure core" (is (not (zip/branch? (xml-zip 42)))))))) -(deftest test-suffixes - (is (= [] (suffixes nil))) - (is (= [] (suffixes []))) - (is (= [[1]] (suffixes [1]))) - (is (= [[1 2 3] [2 3] [3]] (suffixes [1 2 3])))) - -(deftest test-prefixes - (is (= [] (prefixes nil))) - (is (= [] (prefixes []))) - (is (= [[1]] (prefixes [1]))) - (is (= [[1 2 3] [1 2] [1]] (prefixes [1 2 3])))) - (deftest test-->int (is (= nil (->int nil))) (is (= 23 (->int 23))) @@ -97,14 +85,6 @@ (deftest fail-test (is (thrown? clojure.lang.ExceptionInfo (fail "test error" {})))) -(deftest prefixes-test - (is (= [] (prefixes []) (prefixes nil))) - (is (= [[1 2 3] [1 2] [1]] (prefixes [1 2 3])))) - -(deftest suffixes-test - (is (= [] (suffixes []) (suffixes nil))) - (is (= [[1 2 3] [2 3] [3]] (suffixes [1 2 3])))) - (deftest whitespace?-test (is (= true (whitespace? \space))) (is (= true (whitespace? \tab)))