quick let's make a somewhat markdowny thing that isn't good!

I don't know! It's an example of something. And it's not incredibly large. Like I guess I kind of want to have a smallish example that touches on some of the things I usually run into with these kinds of things...

Anyway we're going translate from our thing to the Hypertext Markup Language. So:

escapechar = {
  ["<"] = "&lt;",
  [">"] = "&gt;",
  ['"'] = "&quot;",
  ["'"] =  "&apos;",
  ["&"] = "&amp;"
}

function escape(str)
  local res, _ = (str or ""):gsub("[<>\"'&]", escapechar)
  return res
end

strhtml = escape

function tagged(tag, inner)
  return function(s)
    return "<" .. tag .. ">" .. (inner or strhtml)(s) .. "</" .. tag .. ">"
  end
end

We can test it:

print(tagged("p")([["3 is < 4" & '4 > 3']]))

Okay.

A ton of functionality:

In some kind of order:

states = {
  { pattern = "^  (.*)$", open = "<pre><code>", close = "</code></pre>", f = escape, br = "\n" },
  { pattern = "^(%s*)$", f = function(str) return "" end },
  { pattern = "^## *(.*)$", close = "\n", f = tagged("h2") },
  { pattern = "^# *(.*)$", close = "\n", f = tagged("h1") },
  { pattern = "^[*] *(.*)$", open = "<ul>", close = "</ul>\n", f = tagged("li") },
  { pattern = "^ *(.*)$", open = "<p>", close = "</p>\n", f = function(str) return strhtml(str) end, br = "<br>" }
}

function parseline(str)
  for _, state in ipairs(states) do
    local text = str:match(state.pattern)
    if text then return { state = state, text = state.f(text) } end
  end
  error("unreachable")
end

Test:

function printline(s)
  local res = parseline(s)
  print((" "):rep(10 - #res.state.pattern) .. res.state.pattern .. ': ' .. res.text)
end

printline([[# H1!]])
printline([[## H2?]])
printline([[  some code]])
printline([[]])
printline([[normal text :)]])

Looks probably good.

We're only doing "document structure" stuff here, so that's almost it. (We can pretend that we're doing something more sophisticated with the bits of text, like maybe we're having special text formatting syntax and maybe support for links. Won't bother actually doing any of that. But we can pretend.)

Opening and closing things and switching between our states:

function parse(lines)
  local state = states[2]
  local function f()
    for line in lines do
      local res = parseline(line)
      if res.state ~= state then
        if state.close then coroutine.yield(state.close) end
        state = res.state
        if state.open then coroutine.yield(state.open) end
      elseif state.br then
        coroutine.yield(state.br)
      end
      coroutine.yield(res.text)
    end
    if state.close then coroutine.yield(state.close) end
  end
  return coroutine.wrap(f)
end

function parsestring(str)
  return parse(str:gmatch("[^\n]*"))
end

function html(str)
  local res = {}
  for html in parsestring(str) do
    table.insert(res, html)
  end
  return table.concat(res)
end

Let's try:

teststring = [[
# Hello

Meep moop.

## Some code

Okay.

  if this then
    that
  else
    nope
  end

More regular text.

* list
* of
* things

]]

print(html(teststring))
web.html(html(teststring))

Mlep.

The code is Lua code btw.

actually let's do formatting and links too

More finicky and less interesting. But, for "completeness" I guess.

We'll have links:

function link(str)
  local url, desc = str:match("^%s*(%S+)%s*(.-)%s*$")
  if not url then return escape(str) end

  url = escape(url)
  local desc = ((desc ~= "") and escape(desc)) or fullurl
  return '<a href="' .. url .. '">' .. desc .. '</a>'
end
print(link("https://dailybunny.org/ here be bunnies"))

And code (`) and emphasis (_).

formatting = {
  ["`"] = tagged("code", escape),
  ["_"] = tagged("em", escape),
  ["^"] = link,
  [""] = escape
}

This is the finicky bit. It's probably not done very well. But the idea is to loop through str by looking for the next interesting character in the string (str:find("[_`\\^]", startpos)) and handle:

function finickystrhtml(str)
  local res = {}
  local current = {}
  local mode = ""
  local function switch(char)
    local s = table.concat(current)
    local formatted = formatting[mode](s, url)
    table.insert(res, formatted)
    current = {}
    mode = char
  end
  local startpos = 1
  while true do
    local pos, _ = str:find("[_`\\^]", startpos)
    if not pos then
      table.insert(current, str:sub(startpos, #str))
      switch()
      return table.concat(res)
    end

    table.insert(current, str:sub(startpos, pos - 1))

    local char = str:sub(pos, pos)
    if char == "\\" then
      table.insert(current, str:sub(pos + 1, pos + 1))
      startpos = pos + 2
    else
      if mode == "" then switch(char)
      elseif char == mode then switch("")
      else table.insert(current, char)
      end
      startpos = pos + 1
    end
  end
end

Oh well. We'll try with and without using the new function:

teststring = [[
# Hel_lo_

Meep moop. `Beep boop.`

## Some code

Okay, I mean _okay_:

  if this then
    that
  else
    nope
  end

More regular text and also `more code` and ^https://dailybunny.org/ a bunnylink^.

* list
* with
* ^https://dailyotter.org/ otter^

]]

print("more texty:")

strhtml = escape
web.html(html(teststring))

print("more hypertexty:")
strhtml = finickystrhtml
web.html(html(teststring))

Blep.