fixing issues to parse code blocks in markdown + supporting created_at and updated_at based on git dates

2025-12-10 17:21:03 +00:00 · 2025-12-10 17:21:03 +00:00 · 781a86b8d5
commit 781a86b8d5
parent b89e8de06f
4 changed files with 100 additions and 57 deletions
--- a/76
+++ b/76
@ -55,29 +55,36 @@ init()
    # _index "$blog" "$lang" "$dark"
 }

+get_git_dates() 
+{
+    local mdfile="$1"
+    created_fmt=$(git log --diff-filter=A --format=%cd --date=format:'%Y-%m-%d %H:%M:%S' -- "$mdfile" | tail -1)
+    created_epoch=$(git log --diff-filter=A --format=%ct -- "$mdfile" | tail -1)
+    updated_fmt=$(git log -1 --format=%cd --date=format:'%Y-%m-%d %H:%M:%S' -- "$mdfile")
+    updated_epoch=$(git log -1 --format=%ct -- "$mdfile")
+}
+
 update_index()
 {
    # Build lines with creation/modification epochs and formatted timestamps, safely handling spaces.
    sorted_posts=$(
        find ./posts -maxdepth 1 -type f -name '*.html' -print0 |
        while IFS= read -r -d '' file; do
-            # Try birth time; fallback to mtime if birth not available
-            creation_epoch=$(stat -c %W "$file" 2>/dev/null)
-            if [ -z "$creation_epoch" ] || [ "$creation_epoch" -le 0 ]; then
-                creation_epoch=$(stat -c %Y "$file")
-            fi
-
-            modification_epoch=$(stat -c %Y "$file")
-
-            # Human-readable timestamps
-            created_fmt=$(date -d @"$creation_epoch" '+%Y-%m-%d %H:%M:%S')
-            modified_fmt=$(date -d @"$modification_epoch" '+%Y-%m-%d %H:%M:%S')

+            # Getting only the file name without the preceding path
            name=$(basename "$file")

+            # Remove extension
+            title="$name"
+            title=${title%.html}
+
+            mdfile="./drafts/published/${title}.md"
+
+            get_git_dates "$mdfile"
+
            # Use pipe as delimiter (filename already basename so cannot contain '/'; could contain '|', but that's rare;
            # if paranoid, choose NUL and handle differently). This is simple and readable.
-            printf '%s|%s|%s|%s|%s\n' "$creation_epoch" "$created_fmt" "$modification_epoch" "$modified_fmt" "$name"
+            printf '%s|%s|%s|%s|%s\n' "$created_epoch" "$created_fmt" "$updated_epoch" "$updated_fmt" "$name"
        done |
        # sort by numeric creation epoch descending (field 1)
        sort -t'|' -k1,1nr |
@ -90,7 +97,7 @@ update_index()
            sub(/\.html$/, "", display)
            gsub(/[_-]/, " ", display)
            # escape filename/display if needed (not done here); keep simple
-            printf "<li><a href=\"./posts/%s\">%s</a><div><p>created at %s</p><p>updated at %s</p></div></li>\n", filename, display, $2, $4
+            printf "<li><div><a href=\"./posts/%s\">%s</a><div><p>created at %s</p><p>updated at %s</p></div></div></li>\n", filename, display, $2, $4
        }
        END { print "</ul>" }'
    )
@ -105,34 +112,6 @@ update_index()
    }' "$template" > "./index.html"
 }

-
-publish()
-{ 
-    # Storing the path of the post/article to publish 
-    article_path=$1 
-
-    # from the relative path, only retrieving the name of the article (without file extension)
-    article_name=$(echo $article_path | cut -d '/' -f 2 | cut -d '.' -f 1)
-
-    # Convert the markdown draft into an html article and storing it locally
-    post=$(awk -f ${BOB_LIB}/markdown.awk ./$article_path)
-
-    # Retrieving the html article template
-    template="${BOB_LIB}/template/post.html"
-
-    # Escaping the & for next step to not confuse awk
-    escaped_post=$(echo "$post" | sed 's/&/\\&/g')
-
-    # In the template, replacing the string {{article}} by the actual content parsed above
-    awk -v content="$escaped_post" '{gsub(/\{\{article\}\}/, content); print}' "$template" > "./posts/$article_name.html"
-
-    # moving the draft into the "published" folder
-    mv ./$article_path ./drafts/published/$article_name.md
-
-    # updating the index.html as a new article shall appear in the list
-    update_index
-}
-
 publish_one()
 { 
    # Storing the path of the post/article to publish 
@ -145,14 +124,25 @@ publish_one()
    # Convert the markdown draft into an html article and storing it locally
    post=$(awk -f ${BOB_LIB}/markdown.awk ./$article_path)

+    get_git_dates "$article_path"
+    
    # Retrieving the html article template
    template="${BOB_LIB}/template/post.html"

    # Escaping the & for next step to not confuse awk
-    escaped_post=$(echo "$post" | sed 's/&/\\&/g')
+    escaped_post=$(echo "$post" | sed 's/&/\\\\&/g')

    # In the template, replacing the string {{article}} by the actual content parsed above
-    awk -v content="$escaped_post" '{gsub(/\{\{article\}\}/, content); print}' "$template" > "./posts/$article_name.html"
+    awk \
+        -v content="$escaped_post" \
+        -v created="$created_fmt" \
+        -v updated="$updated_fmt" \
+        '{
+            gsub(/{{article}}/, content);
+            gsub(/{{created}}/, created);
+            gsub(/{{updated}}/, updated);
+            print
+        }' "$template" > "./posts/$article_name.html"
 }

 publish_all()
--- a/lib/markdown.awk
+++ b/lib/markdown.awk
@ -61,16 +61,24 @@ function replaceInline(line,   result, start, end) {
        line = substr(line, 1, start-1) "<a href=\"" matched_url "\">" matched_link "</a>" substr(line, end+1)
    }

-    while (match(line, /`([^*]+)`/)) {
+    while (match(line, /`([^`]+)`/)) {
        start = RSTART
        end = RSTART + RLENGTH - 1
        # Build the result: before match, <code>, content, </code>, after match
-        line = substr(line, 1, start-1) "<code>" substr(line, start+1, RLENGTH-2) "</code>" substr(line, end+1)
+        line = substr(line, 1, start-1) "<code>" escape_html(substr(line, start+1, RLENGTH-2)) "</code>" substr(line, end+1)
    }

    return line
 }

+function escape_html(str) {
+    # gsub(/&/, "\\&amp;", str)  # Must be done first!
+    gsub(/</, "\\&lt;", str)
+    gsub(/>/, "\\&gt;", str)
+    gsub(/*/, "\\&#42;", str)
+    gsub(/\\/, "\\\\", str)
+    return str
+}

 function closeOne() {
    env = pop()
@ -131,34 +139,71 @@ function closeOne() {
    n = RLENGTH;
    if (env == "code") {
        # In a code block, print a new item 
-        print  substr($0, n+1)
+        print escape_html(substr($0, n+1))
    } else {
        # Otherwise, init the code block 
        push("pre")
        push("code")
-        print "<pre><code>" substr($0, n+1)
+        print "<pre><code>" escape_html(substr($0, n+1))
    }
+    next
 }


 # Matching a simple paragraph
-!/^(#|\*|-|\+|>|`|$|\t|    )/ {
-    env = last() 
-    if (env == "none") {
-        # If no block, print a paragraph
-        print "<p>" replaceInline($0) "</p>"
+!/^(#|*|-|+|>|`|$|\t|    )/ {
+env = last() 
+    
+    # NEW LOGIC: If we hit normal text, close any open code, pre, lists, or quotes.
+    while (env != "none") {
+        # Lists, blockquotes, and code must be closed by un-indented text
+        if (env == "ul" || env == "ol" || env == "blockquote" || env == "code" || env == "pre") {
+            env = pop()
+            print "</" env ">"
+            env = last()
+        } else {
+            # If we hit a 'p' or other non-container tag, stop closing
+            break
+        }
+    }
+    
+    # Now that we've closed the containing blocks, check if we need a new paragraph
+    if (env == "none" || env == "p") {
+        # Start a new paragraph or continue the existing one
+        if (env == "none") {
+            push("p")
+            printf "<p>"
+        } else {
+            # If env == "p", we are continuing it, so add a newline
+            printf "\n"
+        }
+        
+        # Print the line content
+        printf "%s", replaceInline($0)
+
    } else if (env == "blockquote") {
+        # If the environment is blockquote, we continue the quote but need to 
+        # explicitly close the current paragraph if the rule above didn't catch it
        print $0
    }
 }

 $0 == "" {
    env = last()
-    while (env != "none") {
-        env = pop()
-        print "</" env ">"
+    while (env != "none" && env !="code") {
+        # Check if the environment is one we want to close (ul, ol, blockquote)
+        if (env == "ul" || env == "ol" || env == "blockquote" || env == "p") {
+            env = pop()
+            print "</" env ">"
+        } else {
+            # If we hit an environment we don't want to close (like 'pre' under 'code'), 
+            # we need to break the loop or handle it. Since 'code' is the only 
+            # multi-level block you have, just break if it's not a closable one.
+            break 
+        }
        env = last()
    }
+    next
 }


--- a/lib/template/post.html
+++ b/lib/template/post.html
@ -12,6 +12,10 @@
 <body>
    <h1 class='title'><a href="../index.html">simpet</a></h1>
    <article>
+        <div class='dates'>
+            <p>Created at: <time datetime="{{created}}">{{created}}</time></p>
+            <p>Updated at: <time datetime="{{updated}}">{{updated}}</time></p>
+        </div>
        {{article}}
        <footer>
            <div></div>
--- a/test/parser/test_md_parser.sh
+++ b/test/parser/test_md_parser.sh
@ -88,6 +88,10 @@ declare -a tests=(
  $'\tcode1'
  "<pre><code>code1</code></pre>"

+  "Code Block 3: html code"
+  $'\t<h1>Title</h1>'
+  "<pre><code>&lt;h1&gt;Title&lt;/h1&gt;</code></pre>"
+
  "Paragraph 1"
  "paragraph 1"
  "<p>paragraph 1</p>"
@ -117,8 +121,8 @@ declare -a tests=(
  "<p>A link to <a href=\"https://www.wikipedia.org\">wikipedia</a></p>"

  "Inline Code"
-  $'A tiny variable `x`'
-  "<p>A tiny variable <code>x</code></p>"
+  $'A tiny variable `<h1>x</h1>`'
+  "<p>A tiny variable <code>&lt;h1&gt;x&lt;/h1&gt;</code></p>"
  # You can add more test cases following the same format...
 )