#!/usr/bin/awk
BEGIN {
env = "none"
stack_pointer = 0
push(env)
}
# Function to push a value onto the stack
function push(value) {
stack_pointer++
stack[stack_pointer] = value
}
# Function to pop a value from the stack (LIFO)
function pop() {
if (stack_pointer > 0) {
value = stack[stack_pointer]
delete stack[stack_pointer]
stack_pointer--
return value
} else {
return "empty"
}
}
# Function to get last value in LIFO
function last() {
return stack[stack_pointer]
}
function replaceInline(line, result, start, end) {
# Replace occurrences of **...** with ...
while (match(line, /\*\*([^*]+)\*\*/)) {
start = RSTART
end = RSTART + RLENGTH - 1
# Build the result: before match, , content, , after match
line = substr(line, 1, start-1) "" substr(line, start+2, RLENGTH-4) "" substr(line, end+1)
}
# Replace occurrences of *...* with ...
while (match(line, /\*([^*]+)\*/)) {
start = RSTART
end = RSTART + RLENGTH - 1
# Build the result: before match, , content, , after match
line = substr(line, 1, start-1) "" substr(line, start+1, RLENGTH-2) "" substr(line, end+1)
}
# Replace occurances of [link](url) with link
while (match(line, /\[([^\]]+)\]\([^\)]+\)/)) {
start = RSTART
end = RSTART + RLENGTH - 1
matched = substr($0, RSTART, RLENGTH)
if (match(matched, /\[([^\]]+)\]/)) {
matched_link = substr(matched, RSTART+1, RLENGTH-2)
}
if (match(matched, /\([^\)]+\)/)) {
matched_url = substr(matched, RSTART+1, RLENGTH-2)
}
# Build the result: before match, , content, , after match
line = substr(line, 1, start-1) "" matched_link "" substr(line, end+1)
}
while (match(line, /`([^`]+)`/)) {
start = RSTART
end = RSTART + RLENGTH - 1
# Build the result: before match, , content, , after match
line = substr(line, 1, start-1) "" escape_html(substr(line, start+1, RLENGTH-2)) "" substr(line, end+1)
}
return line
}
function escape_html(str) {
# gsub(/&/, "\\&", str) # Must be done first!
gsub(/, "\\<", str)
gsub(/>/, "\\>", str)
gsub(/*/, "\\*", str)
gsub(/\\/, "\\\\", str)
return str
}
function closeOne() {
env = pop()
print "" env ">"
}
# Matching headers
/^#+ / {
match($0, /#+ /);
n = RLENGTH;
print "
\n" substr($0, 3) } } # Matching unordered lists /^[-+*] / { env = last() if (env == "ul" ) { # In a unordered list block, print a new item print "" replaceInline(substr($0, 3)) " " } else { # Otherwise, init the unordered list block push("ul") print "\n
- " replaceInline(substr($0, 3)) "
" } } # Matching ordered lists /^[0-9]+\./ { env = last() if (env == "ol") { # In a ordered list block, print a new item print "- " substr($0, 4) "
" } else { # Otherwise, init the ordered list block push("ol") print "\n
- " substr($0, 4) "
" } } # Matching code block /^( |\t)/ { env = last() match($0, /( |\t)/); n = RLENGTH; if (env == "code") { # In a code block, print a new item print escape_html(substr($0, n+1)) } else { # Otherwise, init the code block push("pre") push("code") print "" escape_html(substr($0, n+1)) } next } # Matching a simple paragraph !/^(#|*|-|+|>|`|$|\t| )/ { env = last() # NEW LOGIC: If we hit normal text, close any open code, pre, lists, or quotes. while (env != "none") { # Lists, blockquotes, and code must be closed by un-indented text if (env == "ul" || env == "ol" || env == "blockquote" || env == "code" || env == "pre") { env = pop() print "" env ">" env = last() } else { # If we hit a 'p' or other non-container tag, stop closing break } } # Now that we've closed the containing blocks, check if we need a new paragraph if (env == "none" || env == "p") { # Start a new paragraph or continue the existing one if (env == "none") { push("p") printf "" } else { # If env == "p", we are continuing it, so add a newline printf "\n" } # Print the line content printf "%s", replaceInline($0) } else if (env == "blockquote") { # If the environment is blockquote, we continue the quote but need to # explicitly close the current paragraph if the rule above didn't catch it print $0 } } $0 == "" { env = last() while (env != "none" && env !="code") { # Check if the environment is one we want to close (ul, ol, blockquote) if (env == "ul" || env == "ol" || env == "blockquote" || env == "p") { env = pop() print "" env ">" } else { # If we hit an environment we don't want to close (like 'pre' under 'code'), # we need to break the loop or handle it. Since 'code' is the only # multi-level block you have, just break if it's not a closable one. break } env = last() } next } END { env = last() while (env != "none") { env = pop() print "" env ">" env = last() } }