-- Credit: https://gist.github.com/phi-gamma/2622252 packagedata = packagedata or { } -- namespace proposal for packages dofile(kpse.find_file"char-def.lua") -- unicode tables dofile(kpse.find_file"lualibs-table.lua") -- old Context table code local utf = unicode.utf8 local node = node local type = type local lower, utfchar, utfvalues = string.lower, utf.char, string.utfvalues local tableconcat, iowrite = table.concat, io.write local stringformat, texprint = string.format, tex.print local traverse_nodes = node.traverse local chardata = characters.data local glyph_code = node.id"glyph" local disc_code = node.id"disc" local kern_code = node.id"kern" local kerning_code = 0 -- from font -- * LaTeX counters interface local latex = latex or {} -- This reflects the standard Lua(La)TeX way -- Credit: https://tex.stackexchange.com/a/330403/238079 latex.count = {} setmetatable(latex.count, {__index = function(t, counter) return tex.count['c@' .. counter] end} ) function latex.getcount (counter) return tex.getcount('c@' .. counter) end function latex.setcount (counter, value) return tex.setcount('c@' .. counter, value) end function latex.inccount (counter) return latex.setcount(counter, latex.count[counter] + 1) end -- * Global wordcount state -- This table holds the counters we are counting into. Keys are names -- of LaTeX counters, values are true (we are counting into it) or -- false (we aren't). local counters = {} -- Least number of of characters required to count as a word. local threshold = 1 function enable_counter (counter) counters[counter] = true end function disable_counter (counter) counters[counter] = false end function set_threshold (n) if n then threshold = n end end -- * Counting local is_letter = table.tohash { "ll", "lm", "lo", "lt", "lu" } local charcache = { } --- memo without metatable local lcchar = function(code) if code then if charcache[code] then return charcache[code] end local c = chardata[code] c = c and c.lccode if c then --utfstring if type(c) == "table" then c = utfchar(unpack(c)) else c = utfchar(c) end else if type(code) == "number" then c = utfchar(code) else c = code end end charcache[code] = c return c end end local lowerchar = function (str) local new, n = { }, 0 for val in utfvalues(str) do n = n + 1 new[n] = lcchar(val) -- could be inlined here as well .. end return tableconcat(new) end local function mark_words (head, whenfound) local current, done = head, nil, 0, false local str, s, nds, n = { }, 0, { }, 0 local function action() if s > 0 then local word = tableconcat(str, "", 1, s) local mark = whenfound(word) if mark then done = true for i=1,n do mark(nds[i]) end end end n, s = 0, 0 end while current do -- iterate local id = current.id if id == glyph_code then local components = current.components if components then n = n + 1 nds[n] = current for g in traverse_nodes(components) do s = s + 1 str[s] = utfchar(g.char) end else local code = current.char local data = chardata[code] if data and is_letter[data.category] then n = n + 1 nds[n] = current s = s + 1 str[s] = utfchar(code) elseif s > 0 then action() end end elseif id == disc_code then -- take the replace if n > 0 then n = n + 1 nds[n] = current end elseif id == kern_code and current.subtype == kerning_code and s > 0 then -- ok elseif s > 0 then action() end current = current.next end if s > 0 then action() end return head, done end local function insert_word (str) -- -Y´sweep(l,s)¡ if #str >= threshold then -- For each enabled counter... for counter, enabled in pairs(counters) do -- ...increment the counter. if enabled then latex.inccount(counter) end end end end local callback = function (head) return mark_words(head, insert_word) end -- * Export module return { latex = latex, enable_counter = enable_counter, disable_counter = disable_counter, callback = callback, set_threshold = set_threshold }