Mòdulu:dati Unicode

La ducumintazzioni di stu mòdulu si pò criari nta Mòdulu:dati Unicode/doc

local m_tabbella = require("Mòdulu:tabbella")
local m_params = require("Mòdulu:paràmitri")
local p = {}
function p.getBlocks()
  return mw.loadData("Mòdulu:dati Unicode/data/blocks")
end
function p.getScripts()
  return mw.loadData("Mòdulu:dati Unicode/data/scripts")
end
function p.getScriptRanges()
  return require("Mòdulu:dati Unicode/data/script ranges")
end
function p.getNumberSystems()
  return mw.loadData("Mòdulu:dati Unicode/data/numbers")
end
function p.getBlock(lowerCodepoint)
  return p.getBlocks()[lowerCodepoint]
end
function p.getBlockForChar(char)
  local len = mw.ustring.len(char)
  if len ~= 1 then
    error(mw.ustring.format('Sulu nu caràttiri privistu, %d dati ("%s")', len, char))
  end
  local code = mw.ustring.codepoint(char)
  for _, block in pairs(p.getBlocks()) do
    if block.lower <= code and code <= block.upper then
      return block
    end
  end
  return nil
end
function p.getScript(code)
  return p.getScripts()[code]
end
function p.getScriptForChar(char)
  local len = mw.ustring.len(char)
  if len ~= 1 then
    error(mw.ustring.format("Sulu nu caràttiri privistu, %d dati", len))
  end
  local code = mw.ustring.codepoint(char)
  local scripts = p.getScripts()
  for range, script_code in pairs(p.getScriptRanges()) do
    if range[1] <= code and code <= range[2] then
      return scripts[script_code]
    end
  end
  return scripts["Unknown"]
end
function p.getScriptForText(text)
  local inheritedFound = false
  local commonFound = false
  local res
  for _, script in pairs(p.getScriptsForText(text)) do
    local name = script.code
    if not commonFound and name == "Common" then
      commonFound = true
    elseif not inheritedFound and name == "Inherited" then
      inheritedFound = true
    elseif name ~= "Common" and name ~= "Inherited" then
      if res == nil or res.code == "Unknown" then
        res = script
      elseif res and script.code ~= "Unknown" and script.code ~= res.code then
        return p.getScript("Unknown")
      end
    end
  end
  if res == nil then
    if inheritedFound then
      return p.getScript("Inherited")
    elseif commonFound then
      return p.getScript("Common")
    end
  end
  return res
end
function p.getScriptsForText(text, getRanges)
  local res = {}
  local scriptsRanges = {}
  local i = 1
  while i <= mw.ustring.len(text) do
    local c = mw.ustring.sub(text, i, i)
    local skip = false
    if c == "<" then
      local j = mw.ustring.find(text, ">", i, true)
      if j ~= nil then
        table.insert(scriptsRanges, { script = nil, from = i, to = j })
        i = j
        skip = true
      end
    end
    if not skip then
      local script = p.getScriptForChar(c)
      local name = script.code
      if not res[name] then
        res[name] = script
        table.insert(scriptsRanges, { script = name, from = i, to = i })
      else
        local lastRange = scriptsRanges[#scriptsRanges]
        if lastRange.script == name and lastRange.to + 1 == i then
          lastRange.to = i
        else
          table.insert(scriptsRanges, { script = name, from = i, to = i })
        end
      end
    end
    i = i + 1
  end
  if getRanges then
    return res, scriptsRanges
  end
  return res
end
function p.textHasScript(text, scriptCode)
  local script = p.getScript(scriptCode)
  return script ~= nil and p.getScriptsForText(text)[script.code] ~= nil
end
function p.shouldItalicize(text)
  local name = p.getScriptForText(text).code
  return name == "Latin" or name == "Common" or name == "Inherited"
end
local directionToCss = {
  ["lr"] = "horizontal-tb",
  ["rl"] = "horizontal-tb",
  ["tb"] = "vertical-lr",
  ["i"] = "inherit",
  ["m"] = "inherit",
}
local directionToDir = {
  ["lr"] = "ltr",
  ["rl"] = "rtl",
  ["tb"] = "ltr",
}
function p.setWritingDirection(text)
  local dirsToIgnore = { lr = true, rl = true }
  local res = ""
  local scripts, intervals = p.getScriptsForText(text, true)
  local prevScriptName
  local inSpan = false
  for i, interval in ipairs(intervals) do
    local substr = mw.ustring.sub(text, interval.from, interval.to)
    if interval.script then
      local script = scripts[interval.script]
      local scriptName = script.name
      local scriptDir = script.direction or "i"
      local nextScript = intervals[i + 1] and scripts[intervals[i + 1].script] or nil
      local nextScriptDir = nextScript and (nextScript.direction or "i") or nil
      if inSpan and scriptDir ~= "i" and scriptDir ~= "m" and scriptName ~= prevScriptName then
        res = res .. "</span>"
        inSpan = false
      end
      if dirsToIgnore[scriptDir] or prevScriptName == scriptName
          or ((scriptDir == "i" or scriptDir == "m") and ((i == 1 and (dirsToIgnore[nextScriptDir] or nextScriptDir == "i" or nextScriptDir == "m")) or i > 1 or not nextScript)) then
        res = res .. substr
      else
        local dir
        local cssDir
        if scriptDir ~= "i" and scriptDir ~= "m" then
          dir = directionToDir[scriptDir]
          cssDir = scriptDir
          prevScriptName = scriptName
        elseif nextScriptDir then
          dir = directionToDir[nextScriptDir]
          cssDir = nextScriptDir
          prevScriptName = nextScript.name
        end
        if dir then
          local dirAttr = dir and ('dir="' .. dir .. '"') or ""
          local writingMode = directionToCss[cssDir]
          res = res .. mw.ustring.format('<span %s style="writing-mode:%s">', dirAttr, writingMode) .. substr
          inSpan = true
        end
      end
    else
      if inSpan then
        res = res .. "</span>"
        inSpan = false
      end
      res = res .. substr
      prevScriptName = nil
    end
  end
  if inSpan then
    res = res .. "</span>"
  end
  return res
end
function p.convertNumber(n, system)
  if n < 0 then
    error("Le nùmmiru s'havi a èssiri nu nteru pusitivu o nuḍḍu!")
  end
  local s = p.getNumberSystems()[system]
  if not s then
    error("Sistema numèricu nu vàliddu: " .. tostring(system))
  end
  if (s.min_value and n < s.min_value) or (s.max_value and n > s.max_value) then
    error("Valuri nun vàlidda: " .. tostring(n))
  end
  if s.positional then
    local base = s.base
    local offset = s.zero_offset
    local digit = function(i)
      return mw.ustring.char(i + offset)
    end
    if n == 0 then
      return digit(0)
    end
    local res = ""
    local i = n
    while i > 0 do
      res = digit(i % base) .. res
      i = math.floor(i / base)
    end
    return res
  elseif system == "rumanu" then
    local symbols = {
      "I", "V",
      "X", "L",
      "C", "D",
      "M", "ↁ",
      "ↂ", "ↇ",
      "ↈ",
    }
    local exp = 0;
    local res = "";
    local i = n
    while i > 0 do
      local d = i % 10;
      local unit1 = symbols[exp * 2 + 1];
      local unit5 = symbols[(exp + 1) * 2];
      local unit10 = symbols[(exp + 1) * 2 + 1];
      local str = "";
      if d ~= 0 then
        if d <= 3 then
          for _ = 0, d - 1 do
            str = str .. unit1
          end
        elseif d == 4 then
          str = str .. unit1 .. unit5
        elseif d == 5 then
          str = str .. unit5
        elseif 5 < d and d < 9 then
          str = str .. unit5
          for _ = 0, d - 6 do
            str = str .. unit1
          end
        else
          str = str .. unit1 .. unit10
        end
      end
      res = str .. res;
      i = math.floor(i / 10);
      exp = exp + 1;
    end
    return res;
  elseif system == "grecu" then
    local symbols = {
      "α", "β", "γ", "δ", "ε", "ϛ", "ζ", "η", "θ",
      "ι", "κ", "λ", "μ", "ν", "ξ", "ο", "π", "ϟ",
      "ρ", "σ", "τ", "υ", "φ", "χ", "ψ", "ω", "ϡ",
    }
    local exp = 1;
    local res = "";
    local i = n
    while i > 0 do
      local d = i % 10
      if d > 0 then
        if exp == 10 or exp == 10000 then
          d = d + 9
        elseif exp == 100 or exp == 100000 then
          d = d + 18
        end
        res = symbols[d] .. res
        if exp >= 1000 then
          res = "͵" .. res
        end
      end
      exp = exp * 10
      i = math.floor(i / 10)
    end
    if n % 1000 ~= 0 then
      res = res .. "ʹ"
    end
    return res
  elseif system == "sinugrammi" then
    local symbols = { "〇", "一", "二", "三", "四", "五", "六", "七", "八", "九" }
    if n == 0 then
      return symbols[1]
    end
    local res = "";
    local i = n
    while i > 0 do
      res = symbols[(i % 10) + 1] .. res
      i = math.floor(i / 10)
    end
    return res
  end
end
function p.blockReference(frame)
  local args = m_params.process(frame.args, {
    [1] = { type = m_params.INT },
  })
  local blockCode = args[1]
  local block

  if blockCode then
    block = p.getBlock(blockCode)
  else
    block = p.getBlockForChar(mw.title.getCurrentTitle().text)
  end

  if block then
    return mw.ustring.format("Unicode, Inc., ''[%s %s]'', The Unicode Standard, version %s, %d",
        block.url, block.name.en, block.version, block.year)
  end
  error(mw.ustring.format('Blocco Unicode "%s" nun vàliddu', blockCode or ""))
end
function p.writingDirection(frame)
  local args = m_params.process(frame.args, {
    [1] = { required = true, allow_empty = true },
  })
  if args[1] then
    return p.setWritingDirection(args[1])
  end
  return ""
end
function p.codepoint(frame)
  local args = m_params.process(frame.args, {
    [1] = {
      required = true,
      checker = function(value)
        return mw.ustring.len(value) == 1
      end,
    },
    ["hexa"] = { type = m_params.BOOLEAN, default = false },
  })
  local char = args[1]
  local isHex = args["hexa"]

  local code = mw.ustring.codepoint(char)
  if isHex then
    return string.format("%04X", code)
  end
  return code
end
function p.character(frame)
  local args = m_params.process(frame.args, {
    [1] = { required = true, type = m_params.INT },
  })
  local code = tonumber(args[1])
  if code ~= nil then
    local success, char = pcall(mw.ustring.char, code)
    if success then
      return char
    end
  end
  error("Puntu di còdici nun bonu")
end
function p.number(frame)
  local args = m_params.process(frame.args, {
    [1] = { required = true, type = m_params.INT, checker = function(i)
      return tonumber(i) >= 0
    end },
    ["script"] = { enum = m_tabbella.keysToList(p.getNumberSystems()), default = "latin" }
  })
  return p.convertNumber(args[1], args["script"])
end
return p