bob/lib/markdown.awk
simonpetit 781a86b8d5
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/tag Build is failing
fixing issues to parse code blocks in markdown + supporting created_at and updated_at based on git dates
2025-12-10 17:21:03 +00:00

218 lines
5.9 KiB
Awk
Executable File

#!/usr/bin/awk
BEGIN {
env = "none"
stack_pointer = 0
push(env)
}
# Function to push a value onto the stack
function push(value) {
stack_pointer++
stack[stack_pointer] = value
}
# Function to pop a value from the stack (LIFO)
function pop() {
if (stack_pointer > 0) {
value = stack[stack_pointer]
delete stack[stack_pointer]
stack_pointer--
return value
} else {
return "empty"
}
}
# Function to get last value in LIFO
function last() {
return stack[stack_pointer]
}
function replaceInline(line, result, start, end) {
# Replace occurrences of **...** with <strong>...</strong>
while (match(line, /\*\*([^*]+)\*\*/)) {
start = RSTART
end = RSTART + RLENGTH - 1
# Build the result: before match, <strong>, content, </strong>, after match
line = substr(line, 1, start-1) "<strong>" substr(line, start+2, RLENGTH-4) "</strong>" substr(line, end+1)
}
# Replace occurrences of *...* with <em>...</em>
while (match(line, /\*([^*]+)\*/)) {
start = RSTART
end = RSTART + RLENGTH - 1
# Build the result: before match, <em>, content, </em>, after match
line = substr(line, 1, start-1) "<em>" substr(line, start+1, RLENGTH-2) "</em>" substr(line, end+1)
}
# Replace occurances of [link](url) with <a href="url">link</<a>
while (match(line, /\[([^\]]+)\]\([^\)]+\)/)) {
start = RSTART
end = RSTART + RLENGTH - 1
matched = substr($0, RSTART, RLENGTH)
if (match(matched, /\[([^\]]+)\]/)) {
matched_link = substr(matched, RSTART+1, RLENGTH-2)
}
if (match(matched, /\([^\)]+\)/)) {
matched_url = substr(matched, RSTART+1, RLENGTH-2)
}
# Build the result: before match, <a href="url">, content, </a>, after match
line = substr(line, 1, start-1) "<a href=\"" matched_url "\">" matched_link "</a>" substr(line, end+1)
}
while (match(line, /`([^`]+)`/)) {
start = RSTART
end = RSTART + RLENGTH - 1
# Build the result: before match, <code>, content, </code>, after match
line = substr(line, 1, start-1) "<code>" escape_html(substr(line, start+1, RLENGTH-2)) "</code>" substr(line, end+1)
}
return line
}
function escape_html(str) {
# gsub(/&/, "\\&amp;", str) # Must be done first!
gsub(/</, "\\&lt;", str)
gsub(/>/, "\\&gt;", str)
gsub(/*/, "\\&#42;", str)
gsub(/\\/, "\\\\", str)
return str
}
function closeOne() {
env = pop()
print "</" env ">"
}
# Matching headers
/^#+ / {
match($0, /#+ /);
n = RLENGTH;
print "<h" n-1 ">" substr($0, n + 1) "</h" n-1 ">"
}
# Matching blockquotes
/^> / {
env = last()
if (env == "blockquote")
{
# In a blockquote block only print the text
print substr($0, 3);
} else {
# Otherwise, init the blockquote block
push("blockquote")
print "<blockquote>\n" substr($0, 3)
}
}
# Matching unordered lists
/^[-+*] / {
env = last()
if (env == "ul" ) {
# In a unordered list block, print a new item
print "<li>" replaceInline(substr($0, 3)) "</li>"
} else {
# Otherwise, init the unordered list block
push("ul")
print "<ul>\n<li>" replaceInline(substr($0, 3)) "</li>"
}
}
# Matching ordered lists
/^[0-9]+\./ {
env = last()
if (env == "ol") {
# In a ordered list block, print a new item
print "<li>" substr($0, 4) "</li>"
} else {
# Otherwise, init the ordered list block
push("ol")
print "<ol>\n<li>" substr($0, 4) "</li>"
}
}
# Matching code block
/^( |\t)/ {
env = last()
match($0, /( |\t)/);
n = RLENGTH;
if (env == "code") {
# In a code block, print a new item
print escape_html(substr($0, n+1))
} else {
# Otherwise, init the code block
push("pre")
push("code")
print "<pre><code>" escape_html(substr($0, n+1))
}
next
}
# Matching a simple paragraph
!/^(#|*|-|+|>|`|$|\t| )/ {
env = last()
# NEW LOGIC: If we hit normal text, close any open code, pre, lists, or quotes.
while (env != "none") {
# Lists, blockquotes, and code must be closed by un-indented text
if (env == "ul" || env == "ol" || env == "blockquote" || env == "code" || env == "pre") {
env = pop()
print "</" env ">"
env = last()
} else {
# If we hit a 'p' or other non-container tag, stop closing
break
}
}
# Now that we've closed the containing blocks, check if we need a new paragraph
if (env == "none" || env == "p") {
# Start a new paragraph or continue the existing one
if (env == "none") {
push("p")
printf "<p>"
} else {
# If env == "p", we are continuing it, so add a newline
printf "\n"
}
# Print the line content
printf "%s", replaceInline($0)
} else if (env == "blockquote") {
# If the environment is blockquote, we continue the quote but need to
# explicitly close the current paragraph if the rule above didn't catch it
print $0
}
}
$0 == "" {
env = last()
while (env != "none" && env !="code") {
# Check if the environment is one we want to close (ul, ol, blockquote)
if (env == "ul" || env == "ol" || env == "blockquote" || env == "p") {
env = pop()
print "</" env ">"
} else {
# If we hit an environment we don't want to close (like 'pre' under 'code'),
# we need to break the loop or handle it. Since 'code' is the only
# multi-level block you have, just break if it's not a closable one.
break
}
env = last()
}
next
}
END {
env = last()
while (env != "none") {
env = pop()
print "</" env ">"
env = last()
}
}