fixing issues to parse code blocks in markdown + supporting created_at and updated_at based on git dates

2025-12-10 17:21:03 +00:00 · 2025-12-10 17:21:03 +00:00 · 781a86b8d5
commit 781a86b8d5
parent b89e8de06f
4 changed files with 100 additions and 57 deletions
--- a/76
+++ b/76
@ -55,29 +55,36 @@ init()
    # _index "$blog" "$lang" "$dark"
 }
 get_git_dates() 
 {
    local mdfile="$1"
    created_fmt=$(git log --diff-filter=A --format=%cd --date=format:'%Y-%m-%d %H:%M:%S' -- "$mdfile" | tail -1)
    created_epoch=$(git log --diff-filter=A --format=%ct -- "$mdfile" | tail -1)
    updated_fmt=$(git log -1 --format=%cd --date=format:'%Y-%m-%d %H:%M:%S' -- "$mdfile")
    updated_epoch=$(git log -1 --format=%ct -- "$mdfile")
 }
 update_index()
 {
    # Build lines with creation/modification epochs and formatted timestamps, safely handling spaces.
    sorted_posts=$(
        find ./posts -maxdepth 1 -type f -name '*.html' -print0 |
        while IFS= read -r -d '' file; do
            # Try birth time; fallback to mtime if birth not available
            creation_epoch=$(stat -c %W "$file" 2>/dev/null)
            if [ -z "$creation_epoch" ] || [ "$creation_epoch" -le 0 ]; then
                creation_epoch=$(stat -c %Y "$file")
            fi
            modification_epoch=$(stat -c %Y "$file")
            # Human-readable timestamps
            created_fmt=$(date -d @"$creation_epoch" '+%Y-%m-%d %H:%M:%S')
            modified_fmt=$(date -d @"$modification_epoch" '+%Y-%m-%d %H:%M:%S')
            # Getting only the file name without the preceding path
            name=$(basename "$file")
            # Remove extension
            title="$name"
            title=${title%.html}
            mdfile="./drafts/published/${title}.md"
            get_git_dates "$mdfile"
            # Use pipe as delimiter (filename already basename so cannot contain '/'; could contain '|', but that's rare;
            # if paranoid, choose NUL and handle differently). This is simple and readable.
-            printf '%s|%s|%s|%s|%s\n' "$creation_epoch" "$created_fmt" "$modification_epoch" "$modified_fmt" "$name"
+            printf '%s|%s|%s|%s|%s\n' "$created_epoch" "$created_fmt" "$updated_epoch" "$updated_fmt" "$name"
        done |
        # sort by numeric creation epoch descending (field 1)
        sort -t'|' -k1,1nr |
@ -90,7 +97,7 @@ update_index()
            sub(/\.html$/, "", display)
            gsub(/[_-]/, " ", display)
            # escape filename/display if needed (not done here); keep simple
-            printf "<li><a href=\"./posts/%s\">%s</a><div><p>created at %s</p><p>updated at %s</p></div></li>\n", filename, display, $2, $4
+            printf "<li><div><a href=\"./posts/%s\">%s</a><div><p>created at %s</p><p>updated at %s</p></div></div></li>\n", filename, display, $2, $4
        }
        END { print "</ul>" }'
    )
@ -105,34 +112,6 @@ update_index()
    }' "$template" > "./index.html"
 }
 publish()
 { 
    # Storing the path of the post/article to publish 
    article_path=$1 
    # from the relative path, only retrieving the name of the article (without file extension)
    article_name=$(echo $article_path | cut -d '/' -f 2 | cut -d '.' -f 1)
    # Convert the markdown draft into an html article and storing it locally
    post=$(awk -f ${BOB_LIB}/markdown.awk ./$article_path)
    # Retrieving the html article template
    template="${BOB_LIB}/template/post.html"
    # Escaping the & for next step to not confuse awk
    escaped_post=$(echo "$post" | sed 's/&/\\&/g')
    # In the template, replacing the string {{article}} by the actual content parsed above
    awk -v content="$escaped_post" '{gsub(/\{\{article\}\}/, content); print}' "$template" > "./posts/$article_name.html"
    # moving the draft into the "published" folder
    mv ./$article_path ./drafts/published/$article_name.md
    # updating the index.html as a new article shall appear in the list
    update_index
 }
 publish_one()
 { 
    # Storing the path of the post/article to publish 
@ -145,14 +124,25 @@ publish_one()
    # Convert the markdown draft into an html article and storing it locally
    post=$(awk -f ${BOB_LIB}/markdown.awk ./$article_path)
    get_git_dates "$article_path"
    # Retrieving the html article template
    template="${BOB_LIB}/template/post.html"
    # Escaping the & for next step to not confuse awk
-    escaped_post=$(echo "$post" | sed 's/&/\\&/g')
+    escaped_post=$(echo "$post" | sed 's/&/\\\\&/g')
    # In the template, replacing the string {{article}} by the actual content parsed above
-    awk -v content="$escaped_post" '{gsub(/\{\{article\}\}/, content); print}' "$template" > "./posts/$article_name.html"
+    awk \
        -v content="$escaped_post" \
        -v created="$created_fmt" \
        -v updated="$updated_fmt" \
        '{
            gsub(/{{article}}/, content);
            gsub(/{{created}}/, created);
            gsub(/{{updated}}/, updated);
            print
        }' "$template" > "./posts/$article_name.html"
 }
 publish_all()
--- a/lib/markdown.awk
+++ b/lib/markdown.awk
@ -61,16 +61,24 @@ function replaceInline(line,   result, start, end) {
        line = substr(line, 1, start-1) "<a href=\"" matched_url "\">" matched_link "</a>" substr(line, end+1)
    }
-    while (match(line, /`([^*]+)`/)) {
+    while (match(line, /`([^`]+)`/)) {
        start = RSTART
        end = RSTART + RLENGTH - 1
        # Build the result: before match, <code>, content, </code>, after match
-        line = substr(line, 1, start-1) "<code>" substr(line, start+1, RLENGTH-2) "</code>" substr(line, end+1)
+        line = substr(line, 1, start-1) "<code>" escape_html(substr(line, start+1, RLENGTH-2)) "</code>" substr(line, end+1)
    }
    return line
 }
 function escape_html(str) {
    # gsub(/&/, "\\&amp;", str)  # Must be done first!
    gsub(/</, "\\&lt;", str)
    gsub(/>/, "\\&gt;", str)
    gsub(/*/, "\\&#42;", str)
    gsub(/\\/, "\\\\", str)
    return str
 }
 function closeOne() {
    env = pop()
@ -131,34 +139,71 @@ function closeOne() {
    n = RLENGTH;
    if (env == "code") {
        # In a code block, print a new item 
-        print  substr($0, n+1)
+        print escape_html(substr($0, n+1))
    } else {
        # Otherwise, init the code block 
        push("pre")
        push("code")
-        print "<pre><code>" substr($0, n+1)
+        print "<pre><code>" escape_html(substr($0, n+1))
    }
    next
 }
 # Matching a simple paragraph
-!/^(#|\*|-|\+|>|`|$|\t|    )/ {
+!/^(#|*|-|+|>|`|$|\t|    )/ {
 env = last() 
    # NEW LOGIC: If we hit normal text, close any open code, pre, lists, or quotes.
    while (env != "none") {
        # Lists, blockquotes, and code must be closed by un-indented text
        if (env == "ul" || env == "ol" || env == "blockquote" || env == "code" || env == "pre") {
            env = pop()
            print "</" env ">"
            env = last()
        } else {
            # If we hit a 'p' or other non-container tag, stop closing
            break
        }
    }
    # Now that we've closed the containing blocks, check if we need a new paragraph
    if (env == "none" || env == "p") {
        # Start a new paragraph or continue the existing one
        if (env == "none") {
-        # If no block, print a paragraph
+            push("p")
-        print "<p>" replaceInline($0) "</p>"
+            printf "<p>"
        } else {
            # If env == "p", we are continuing it, so add a newline
            printf "\n"
        }
        # Print the line content
        printf "%s", replaceInline($0)
    } else if (env == "blockquote") {
        # If the environment is blockquote, we continue the quote but need to 
        # explicitly close the current paragraph if the rule above didn't catch it
        print $0
    }
 }
 $0 == "" {
    env = last()
-    while (env != "none") {
+    while (env != "none" && env !="code") {
        # Check if the environment is one we want to close (ul, ol, blockquote)
        if (env == "ul" || env == "ol" || env == "blockquote" || env == "p") {
            env = pop()
            print "</" env ">"
        } else {
            # If we hit an environment we don't want to close (like 'pre' under 'code'), 
            # we need to break the loop or handle it. Since 'code' is the only 
            # multi-level block you have, just break if it's not a closable one.
            break 
        }
        env = last()
    }
    next
 }
--- a/lib/template/post.html
+++ b/lib/template/post.html
@ -12,6 +12,10 @@
 <body>
    <h1 class='title'><a href="../index.html">simpet</a></h1>
    <article>
        <div class='dates'>
            <p>Created at: <time datetime="{{created}}">{{created}}</time></p>
            <p>Updated at: <time datetime="{{updated}}">{{updated}}</time></p>
        </div>
        {{article}}
        <footer>
            <div></div>
--- a/test/parser/test_md_parser.sh
+++ b/test/parser/test_md_parser.sh
@ -88,6 +88,10 @@ declare -a tests=(
  $'\tcode1'
  "<pre><code>code1</code></pre>"
  "Code Block 3: html code"
  $'\t<h1>Title</h1>'
  "<pre><code>&lt;h1&gt;Title&lt;/h1&gt;</code></pre>"
  "Paragraph 1"
  "paragraph 1"
  "<p>paragraph 1</p>"
@ -117,8 +121,8 @@ declare -a tests=(
  "<p>A link to <a href=\"https://www.wikipedia.org\">wikipedia</a></p>"
  "Inline Code"
-  $'A tiny variable `x`'
+  $'A tiny variable `<h1>x</h1>`'
-  "<p>A tiny variable <code>x</code></p>"
+  "<p>A tiny variable <code>&lt;h1&gt;x&lt;/h1&gt;</code></p>"
  # You can add more test cases following the same format...
 )