Mòdulu:Sicilian
La ducumintazzioni di stu mòdulu si pò criari nta Mòdulu:Sicilian/doc
-- Stu mòdulu cunteni funzioni utili pi manipulari palori in Sicilianu,
-- in particulari è adupiratu pi criari autumaticamenti tavuli dâ
-- cugnugazzioni dî verbi rigulari e dâ flissioni di aggittivi e sustantivi
-- rigulari.
-- This module contains functions useful to manipolate Sicilian words,
-- in particular it's used to automatically create tables for the
-- conjugation of regular verbs and for the inflection of regular adjectives
-- and nouns.
local p = {}
-- Remove any accents from the vowels in the given word.
-- The function knows and applies the Sicilian rules
-- for changing E and O into I and U when they lose the accent.
function remove_accents(word)
local str = mw.ustring.gsub(word, "à", "a")
str = mw.ustring.gsub(str, "è", "i")
str = mw.ustring.gsub(str, "ì", "i")
str = mw.ustring.gsub(str, "ò", "u")
str = mw.ustring.gsub(str, "ù", "u")
str = mw.ustring.gsub(str, "ï̀", "ï")
return (str)
end
-- Return the same word as the one given, except in the case of an accented
-- diaeresis, which is not an acceptable character, and has to be replaced
-- with a plain accent in the returned word.
local function leave_accents(word)
return mw.ustring.gsub(word, "ï̀", "ì")
end
-- Return true if the given word contains any accented vowel.
function is_accented(word)
if mw.ustring.find(word, "[àèìòùï̀]") then
return true
else
return false
end
end
-- Truncate the given word at the position pos, and return the resulting
-- fragment of the word.
-- The truncation is done in such a way that the returned fragment does not
-- lose phonetic information, and can be glued again to the same or another tail
-- using the join_words function to obtain a valid word.
-- This means that truncated fragments ending with the "soft" "c" and "g" will
-- always end with the "soft" consonant and without an "i", while fragments
-- ending with the "hard" "c" and "g" will always end with the "h".
function truncate_word(word, pos)
-- If the word ends with "ci" or "gi", don't return the final "i".
if (pos > 1) and (
mw.ustring.sub(word, pos - 1, pos) == "ci"
or mw.ustring.sub(word, pos - 1, pos) == "gi"
) then
-- This case must not match *accented* "ci" or "gi" because they are
-- often used instead of "cij", "gij" or diaeresis to prevent diphthong,
-- e.g. "caccì-ari" must be split into "caccì-ari" and not "cacc-ari"
-- because it conjugates to "caccì-u" and not "càcc-iu",
-- which is a different verb.
return mw.ustring.sub(word, 1, pos - 1)
end
-- If the word ends with a "hard" "c" or "g", return a final "h".
if (pos < mw.ustring.len(word)) and (
mw.ustring.sub(word, pos, pos) == "c"
or mw.ustring.sub(word, pos, pos) == "g"
) then
local next_ltr = remove_accents(mw.ustring.sub(word, pos + 1, pos + 1))
if next_ltr ~= "e" and next_ltr ~= "i" then
return mw.ustring.sub(word, 1, pos) .. "h"
end
end
return mw.ustring.sub(word, 1, pos)
end
-- Join two word fragments together and return the result, in a way that the
-- returned word is phonetically equivalent to the two joined fragments.
-- This means that we must add an "i" between the two fragments if the first
-- one ends with a "soft" consonant, and we must remove an "h" at the end of
-- the first fragment if it ends with a "hard" consonant and the second one
-- does not begin with "e" or "i".
function join_words(word1, word2)
-- If the first fragment ends with "c" or "g", and the second fragment
-- does not begin with "e" or "i", then we need to add a "i" between
if mw.ustring.sub(word1, -1) == "c"
or mw.ustring.sub(word1, -1) == "g"
then
local first_ltr = remove_accents(mw.ustring.sub(word2, 1, 1))
if first_ltr == "e" or first_ltr == "i" then
return word1 .. word2
else
return word1 .. "i" .. word2
end
end
-- If the first fragment ends with "ch" or "gh", and the second fragment
-- does not begin with "e" or "i", then we need to strip the final "h" from
-- the first fragment
if mw.ustring.sub(word1, -2) == "ch"
or mw.ustring.sub(word1, -2) == "gh"
then
local first_ltr = remove_accents(mw.ustring.sub(word2, 1, 1))
if first_ltr == "e" or first_ltr == "i" then
return word1 .. word2
else
return mw.ustring.sub(word1, 1, -2) .. word2
end
end
-- If the first fragment ends with "i" and the second fragment begins
-- with "i" too, and none of them are accented, then we replace them with
-- a single "i with circumflex", because in sicilian ortography two
-- consecutive, unaccented "i" letters are never written as-is
if mw.ustring.sub(word1, -1) == "i"
and mw.ustring.sub(word2, 1, 1) == "i"
then
return mw.ustring.sub(word1, 1, -2) .. "î" .. mw.ustring.sub(word2, 2)
end
-- If the first fragment ends with unaccented "i" and the second fragment
-- begins with an accented "i", then the two merge with a single accented
-- "i", because "i with circumflex and grave" is not an acceptable
-- typographic representation of "ji". Alternatively and more accurately,
-- the first "i" could be replaced with a "j", but this is not common in
-- modern typography so we don't do that
if mw.ustring.sub(word1, -1) == "i"
and mw.ustring.sub(word2, 1, 1) == "ì"
then
return mw.ustring.sub(word1, 1, -2) .. word2
end
-- If the first fragment ends with a i with diaeresis, and the second
-- fragment does not begin with a vowel other than i, then strip the
-- diaeresis since there can't be a diphtong
if mw.ustring.sub(word1, -1) == "ï" then
local first_ltr = remove_accents(mw.ustring.sub(word2, 1, 1))
if mw.ustring.find(first_ltr, "[aeou]") then
return word1 .. word2
else
return mw.ustring.sub(word1, 1, -2) .. "i" .. word2
end
end
-- The common case
return word1 .. word2
end
-- Utility function used by a wikicode template.
-- It removes the tail of the given verb, and glues it to one of the three
-- given tails ari, iri, iiri depending on whether the verb ends with "àri",
-- "iri" or "ìri" respectively.
-- It removes the accent from the head of the verb if the tail is accented on
-- its own.
-- If the argument override is given, it merely returns that argument instead.
local function build_verb(verb, ari, iri, iiri, override)
if override and mw.ustring.len(override) > 0 then
return override
end
local tail = mw.ustring.sub(verb, mw.ustring.len(verb) - 2)
local head = truncate_word(verb, mw.ustring.len(verb) - 3)
if tail == "îri" then
head = head .. "i"
tail = "iri"
end
if tail == "àri" then
if is_accented(ari) then
return join_words(remove_accents(head), ari)
else
return join_words(leave_accents(head), ari)
end
elseif tail == "iri" then
if is_accented(iri) then
return join_words(remove_accents(head), iri)
else
return join_words(leave_accents(head), iri)
end
elseif tail == "ìri" then
if is_accented(iiri) then
return join_words(remove_accents(head), iiri)
else
return join_words(leave_accents(head), iiri)
end
end
error "did not specify a verb in infinitive mode"
end
-- Turn a regular adjective into the specified masculine or feminine,
-- singular or plural form, starting from the masculine singular form,
-- which is passed as the first argument.
-- The second argument specifies which form is desired:
-- "sm" for the singular masculine;
-- "sf" for the singular feminine;
-- "pm" for the plural masculine;
-- "pf" for the plural feminine.
-- The adjective is returned unchanged if the function can't figure out which
-- regular adjective pattern it belongs to.
function turn_adjective(adj_sm, gennum)
assert(gennum == "sm" or gennum == "sf" or gennum == "pm" or gennum == "pf",
"invalid argument as gender/number")
if gennum == "sm" then
return adj_sm
end
local len = mw.ustring.len(adj_sm)
local head, tail
if mw.ustring.sub(adj_sm, -3) == "icu" then
head = truncate_word(adj_sm, len - 2)
if gennum == "sf" then
tail = "ca"
elseif gennum == "pm" then
tail = "ci"
else
tail = "chi"
end
elseif mw.ustring.sub(adj_sm, -3) == "ìsi" then
head = truncate_word(adj_sm, len - 1)
if gennum == "sf" then
tail = "a"
else
tail = "i"
end
elseif mw.ustring.sub(adj_sm, -1) == "u" then
head = truncate_word(adj_sm, len - 1)
if gennum == "sf" then
tail = "a"
else
tail = "i"
end
end
if head and tail then
return join_words(head, tail)
end
return adj_sm
end
-- Utility function used by a wikicode template.
-- It works just like the turn_adective function but has an additional
-- argument, override, which gets retured as-is whenever it contains a
-- non-empty string.
local function build_adjective(adj_sm, gennum, override)
if override and mw.ustring.len(override) > 0 then
return override
end
return turn_adjective(adj_sm, gennum)
end
-- Worker functions used by turn_noun() to turn regular nouns across various
-- gender/number combinations. Not all combinations are implemented, only those
-- required by currently used templates, but adding new ones is trivial.
-- Turn a regular noun from singular masculine into plural masculine.
function turn_noun_sm_to_pm(noun)
local len = mw.ustring.len(noun)
local head, tail
if mw.ustring.sub(noun, -3) == "icu" then
-- miccànicu -> miccànici
head = truncate_word(noun, len - 2)
tail = "ci"
elseif mw.ustring.sub(noun, -1) == "a" then
-- puèta -> puèti
head = truncate_word(noun, len - 1)
tail = "i"
elseif mw.ustring.sub(noun, -1) == "u" then
-- lùpu -> lùpi
head = truncate_word(noun, len - 1)
tail = "i"
end
if head and tail then
return join_words(head, tail)
end
return noun
end
-- Turn a regular noun from singular masculine into singular feminine.
function turn_noun_sm_to_sf(noun)
local len = mw.ustring.len(noun)
local head, tail
if mw.ustring.sub(noun, -3) == "ìsi" then
-- catanìsi -> catanìsa
head = truncate_word(noun, len - 1)
tail = "a"
elseif mw.ustring.sub(noun, -1) == "u" then
-- lùpu -> lùpa
head = truncate_word(noun, len - 1)
tail = "a"
end
if head and tail then
return join_words(head, tail)
end
return noun
end
-- Turn a regular noun from singular neuter into plural neuter.
function turn_noun_sn_to_pn(noun)
local len = mw.ustring.len(noun)
local head, tail
if mw.ustring.sub(noun, -1) == "u" then
-- pùgnu -> pùgna
head = truncate_word(noun, len - 1)
tail = "a"
elseif mw.ustring.sub(noun, -1) == "i" then
-- casciùni -> casciùna
head = truncate_word(noun, len - 1)
tail = "a"
end
if head and tail then
return join_words(head, tail)
end
return noun
end
-- Turn a regular noun from singular masculine into plural feminine.
function turn_noun_sm_to_pf(noun)
local len = mw.ustring.len(noun)
local head, tail
if mw.ustring.sub(noun, -1) == "u" then
-- mèdicu -> mèdichi
head = truncate_word(noun, len - 1)
tail = "i"
end
if head and tail then
return join_words(head, tail)
end
return noun
end
-- Turn a regular noun from singular feminine into plural feminine.
function turn_noun_sf_to_pf(noun)
local len = mw.ustring.len(noun)
local head, tail
if mw.ustring.sub(noun, -1) == "a" then
-- ròsa --> ròsi
head = truncate_word(noun, len - 1)
tail = "i"
end
if head and tail then
return join_words(head, tail)
end
return noun
end
-- Turn a regular noun which is under the specified singular form into the
-- requested singular or plural, masculine or feminine form.
-- The result will only be correct if the noun is regular and it does have
-- the requested form.
-- The second argument specifies the form the noun is currently under:
-- "sm" for the singular masculine;
-- "sn" for the singular neuter-masculine;
-- "sf" for the singular feminine.
-- The third argument specifies which form the noun should be switched into:
-- "sm" or "sn" for the singular masculine;
-- "sf" for the singular feminine;
-- "pm" or "pn" for the plural masculine;
-- "pf" for the plural feminine.
-- The noun is returned unchanged if the function can't figure out which
-- regular noun pattern it belongs to.
-- Feminine to masculine conversion is not currently implemented as it wasn't
-- needed by the current users of this function.
function turn_noun(noun, source, dest)
if source == "sm" then
if dest == "sm" then return noun end
if dest == "sf" then return turn_noun_sm_to_sf(noun) end
if dest == "pm" or dest == "pn" then return turn_noun_sm_to_pm(noun) end
if dest == "pf" then return turn_noun_sm_to_pf(noun) end
error "invalid argument as destination gender / number"
elseif source == "sn" then
if dest == "sm" or dest == "sn" then return noun end
if dest == "pm" or dest == "pn" then return turn_noun_sn_to_pn(noun) end
error "invalid argument as destination gender / number"
elseif source == "sf" then
if dest == "sf" or dest == "sn" then return noun end
if dest == "pf" or dest == "pn" then return turn_noun_sf_to_pf(noun) end
error "invalid argument as destination gender / number"
end
error "invalid argument as source gender / number"
end
-- Utility function used by a wikicode template.
-- It works just like the turn_noun function but has an additional
-- argument, override, which gets retured as-is whenever it contains a
-- non-empty string.
local function build_noun(noun, source, dest, override)
if override and mw.ustring.len(override) > 0 then
return override
end
return turn_noun(noun, source, dest)
end
-- Database of the consonant groups that, when followed by a vowel, can begin
-- a new syllable in the Italian language. They can if there’s some word that
-- begins with them.
local syll_starters_ita = {
["bl"] = true, -- blòcco, blù
["br"] = true, -- bràttea, bràvo
["ch"] = true, -- chièsa, chìna
["cl"] = true, -- clàsse, clòro
["cr"] = true, -- crèsceri, cròce
["ḍḍ"] = true, -- needed for Sicilian words with Italian rules
["dr"] = true, -- drìtto, dràgo
["fl"] = true, -- flèmma, flùsso
["fr"] = true, -- frèddo, Frància
["gh"] = true, -- ghiànda, ghìro
["gl"] = true, -- glòria, glàssa
["gn"] = true, -- gnòcco, gnòmo
["gr"] = true, -- grèco, grànde
["pl"] = true, -- plàgio, plàstica
["pn"] = true, -- pneumàtico
["pr"] = true, -- pròprio, pràtico
["ps"] = true, -- psicòlogo
["sb"] = true, -- sbàrra, sbàttere
["sbl"] = true, -- sbloccàre
["sbr"] = true, -- sbrinàre, sbrottàre
["sc"] = true, -- scìvolo, scèndere
["sch"] = true, -- schìfo, schèma
["scl"] = true, -- sclèra
["scr"] = true, -- scrostàre, scremàre
["sd"] = true, -- sdentàto, sdoganàre
-- sdl: no words
["sdr"] = true, -- sdraiàre, sdrùcciolo
["sf"] = true, -- sfondàre, sfìda
["sfl"] = true, -- (tran-SFLu-èn-za)
["sfr"] = true, -- sfrontàto, sfruttàre
["sg"] = true, -- sgòzzare, sguazzàre
["sgh"] = true, -- sghèrro, sghèmbo
-- sgl: no words
["sgr"] = true, -- sgranàre, sgrassàre
["sl"] = true, -- Slovènia, slàvo
["sm"] = true, -- smèttere, smània
["sn"] = true, -- snòdo, snaturàre
["sp"] = true, -- sperànza, sparàre
["spl"] = true, -- splèndere, splènico
["spr"] = true, -- sprèmere, spropòsito
["sq"] = true, -- squàdra, squàllido
["sr"] = true, -- sradicàre, sregolàto
["st"] = true, -- stùfo, stèmma
-- stl: no words, "post-lu-dio" would break but it's semantic anyway
["str"] = true, -- stràda, strappàre
["sv"] = true, -- sventàre, svèndere
["tl"] = true, -- (a-TLè-ta)
["tr"] = true, -- tròppo, tràino
["vr"] = true, -- nop
}
-- Database of the consonant groups that, when followed by a vowel, can begin
-- a new syllable in the Sicilian language. They can if there’s some word
-- that begins with them.
local syll_starters_scn = {
["bl"] = true, -- blòccu, blè
["br"] = true, -- bràtta, Bràsi
["bbl"] = true, -- bblòccu, bblè
["bbr"] = true, -- bbràtta, Bbràsi
["ch"] = true, -- chiànu, chèccu
["cch"] = true, -- cchiù'
["cl"] = true, -- clàssi, clòru
["cr"] = true, -- crìsciri, crùci
["dr"] = true, -- drìtta, dràgu
["ddr"] = true, -- ddrìtta, ddrummintàri
["ḍḍ"] = true, -- ḍḍumari, ḍḍù
["fl"] = true, -- flèmma, flùssu
["fr"] = true, -- frìddu, Frància
["gh"] = true, -- ghiànna, ghiàcciu
["ggh"] = true, -- gghiòvu, gghiànu
["gl"] = true, -- glòria, glàssa
["gn"] = true, -- gnòccu, gnizziòni
["gr"] = true, -- grècu, grànni
["mb"] = true, -- mballàri, mbìviri
["mbr"] = true, -- mbriàcu, mbrattàri
["mp"] = true, -- mpizzàri, mpajàri
["mpr"] = true, -- mprìsa, mprinàri
["nc"] = true, -- nciràta, ncinnirìri
["nch"] = true, -- nchianàri, nchiajàtu
["ncl"] = true, -- nclùdiri, nclinàri
["ncr"] = true, -- ncruccàri, ncrustàri
["nd"] = true, -- ndùja, nduràri
["ndr"] = true, -- ndrizzàri, ndrìna
["nf"] = true, -- nfàmi, nfussàri
["nfl"] = true, -- nfluènza, nflatàrisi
["nfr"] = true, -- nfriddulùtu, nfrascàti
["ng"] = true, -- ngissàri, ngignùsu
["ngh"] = true, -- nghiùttiri
["ngl"] = true, -- nglìssi
["ngr"] = true, -- ngrasciàri, ngrìsi
["nq"] = true, -- nquatràri, nquilìnu
["ns"] = true, -- nsirtàri, nsivàtu
["nt"] = true, -- ntènniri, Ntòni
["ntr"] = true, -- ntràsiri, ntrallàzzu
["nv"] = true, -- nvèci, nvìdia
["nz"] = true, -- nzavanàri, nzìgna
["pl"] = true, -- planèta, plàstica
["pn"] = true, -- pneumàticu
["pr"] = true, -- pròpia, prùppu
["ps"] = true, -- psicòlogu
["sb"] = true, -- sbàrra, sbàttiri
["sbl"] = true, -- sbluccàri
["sbr"] = true, -- sbrizziàri, sbarazzàri
["sc"] = true, -- sciàrra, scìnniri
["sch"] = true, -- schìfu, schèma
["scl"] = true, -- sclamàri, sclùdiri
["scr"] = true, -- scrustàri, scrimàri
["sd"] = true, -- sdintàtu, sdisanuràtu
["sdr"] = true, -- sdrajàri, sdrùcciulu
["sf"] = true, -- sfunnàri, sfìda
["sfl"] = true, -- sflàvidu, sflavidìri
["sfr"] = true, -- sfruntàtu, sfruttàri
["sg"] = true, -- sgàrru, sguazzàri
["sgh"] = true, -- sghèrru, sghìcciu
["sgr"] = true, -- sgranàri, sgrasciàri
["sl"] = true, -- Slùvenia, slàvu
["sp"] = true, -- spirànza, sparàri
["spl"] = true, -- splènniri, splènnidu
["spr"] = true, -- sprèmiri, spropòsitu
["sq"] = true, -- squatrïàri, squaddàtu
["sr"] = true, -- sradicàri, sregulàtu
["st"] = true, -- stutàri, stèmma
["str"] = true, -- stràta, strazzàri
["sv"] = true, -- svintàri, svìnniri
["tl"] = true, -- (a-tlèta)
["tr"] = true, -- tròppu, tràsiri
["ttr"] = true, -- ttraccàri
["vr"] = true, -- vràzzu, vròdu
}
-- Return true if the argument is a single character that matches a Sicilian
-- vowel, unaccented, accented or with diaeresis. Return false in all other
-- cases.
local function is_vowel(l)
return (mw.ustring.len(l) == 1)
and mw.ustring.find(l, "[aeiouàèìòùâêîôûäëïöü]")
end
-- Return true if the argument is a single character that matches a Sicilian
-- consonant, except ḍ. Return false in all other cases.
local function is_consonant(l)
return (mw.ustring.len(l) == 1)
and mw.ustring.find(l, "[b-df-hj-np-tv-z]")
end
-- Return true if the passed string could be the beginning of a Sicilian
-- word. Return false otherwise.
local function can_begin_word(str, scn_rule)
if scn_rule == nil then scn_rule = true end
local first_ltr = mw.ustring.sub(str, 1, 1)
-- A vowel can always begin a word.
if is_vowel(first_ltr) then
return true
end
-- Any consonant followed by a vowel can begin a word.
if is_consonant(first_ltr)
and is_vowel(mw.ustring.sub(str, 2, 2))
then
return true
end
-- A geminated consonant followed by a vowel can always begin a word.
if scn_rule
and is_consonant(first_ltr)
and first_ltr == mw.ustring.sub(str, 2, 2)
and is_vowel(mw.ustring.sub(str, 3, 3))
then
return true
end
if scn_rule then
-- Lookup the database of valid two-letter syllable starters,
-- valid when they're followed by a vowel.
if syll_starters_scn[mw.ustring.sub(str, 1, 2)]
and is_vowel(mw.ustring.sub(str, 3, 3))
then
return true
end
-- Lookup the database of valid three-letter syllable starters,
-- valid when they're followed by a vowel.
if syll_starters_scn[mw.ustring.sub(str, 1, 3)]
and is_vowel(mw.ustring.sub(str, 4, 4))
then
return true
end
else
-- Lookup the database of valid two-letter syllable starters,
-- valid when they're followed by a vowel.
if syll_starters_ita[mw.ustring.sub(str, 1, 2)]
and is_vowel(mw.ustring.sub(str, 3, 3))
then
return true
end
-- Lookup the database of valid three-letter syllable starters,
-- valid when they're followed by a vowel.
if syll_starters_ita[mw.ustring.sub(str, 1, 3)]
and is_vowel(mw.ustring.sub(str, 4, 4))
then
return true
end
end
-- If none of the above tests succeeded, our string can't begin
-- a word.
return false
end
-- Return true if the substring starting at the 1-based position pos
-- inside the string str contains the first vowel of a hiatus.
-- Return false in all other cases.
function is_hiatus(str, pos)
assert(pos > 0 and pos <= mw.ustring.len(str),
"invalid position specified")
-- According to the Italian grammar, by default we have a hiatus
-- when:
-- - two vowels are next to each other, and EITHER:
-- 1) none of the two is a weak vowel (I or U);
-- 2) one of the two is an accented weak vowel (I or U)
-- and the other one is a strong vowel (A, E or O).
-- We also signal a hiatus:
-- 3) between two weak vowels (I or U), one accented, the other not;
-- 4) when the first vowel is actually a semiconsonant I
-- (that is, when we use I instead of J)
-- 5) when one of the two vowels is explicitly marked
-- with a diaeresis sign.
local l0 = mw.ustring.sub(str, pos - 1, pos - 1)
local l1 = mw.ustring.sub(str, pos, pos)
local l2 = mw.ustring.sub(str, pos + 1, pos + 1)
local l3 = mw.ustring.sub(str, pos + 2, pos + 2)
-- A hiatus can only happen between two vowels.
if (not is_vowel(l1)) or (not is_vowel(l2)) then
return false
end
-- A hiatus can not happen after the qu- or gu- groups.
-- This check is part of the detection of the semiconsonant I.
if mw.ustring.find(l0, "[gq]") and l1 == "u" then
return false
end
-- Case 1: strong vowel against strong vowel.
if mw.ustring.find(l1, "[aàeèoò]")
and mw.ustring.find(l2, "[aàeèoò]")
then
return true
end
-- Case 2: accented weak vowel against strong vowel.
if mw.ustring.find(l1, "[ìù]") and mw.ustring.find(l2, "[aeo]") then
return true
end
if mw.ustring.find(l1, "[aeo]") and mw.ustring.find(l2, "[ìù]") then
return true
end
-- Case 3: accented weak vowel against the same unaccented vowel.
if l1 == "ì" and l2 == "i" then return true end
if l1 == "i" and l2 == "ì" then return true end
if l1 == "ù" and l2 == "u" then return true end
if l1 == "u" and l2 == "ù" then return true end
-- Case 4: semiconsonant I (unaccented I between two vowels).
if l2 == "i" and is_vowel(l3) then
return true
end
-- Case 5: explicit diaeresis over either vowel.
if mw.ustring.find(l1, "[äëïöü]")
or mw.ustring.find(l2, "[äëïöü]")
then
return true
end
-- All checks failed, therefore we are not inside a hiatus.
return false
end
-- Return true if the substring starting at the 1-based position pos
-- inside the string str contains the first vowel of a dihptong.
-- Return false in all other cases.
function is_diphthong(str, pos)
-- A diphthong can only happen between two vowels.
if not is_vowel(mw.ustring.sub(str, pos, pos))
or not is_vowel(mw.ustring.sub(str, pos + 1, pos + 1))
then
return false
end
-- Just check that we haven't a hiatus.
return not is_hiatus(str, pos)
end
-- This function can be used to split a Sicilian word into syllables.
-- The first parameter is the word to split, and the second parameter is
-- an 1-based index into the word where the splitting will begin.
-- The return value is the 1-based index of the end of the first syllable
-- found in the word, starting at the given index.
-- The third parameter is optional and it tells the function whether to use,
-- in order to decide whether a syllable is legal or not, the Sicilian words
-- as a template (by passing true, which is the default) or to use the Italian
-- words instead (by passng false).
-- The function won't return meaningful results for non-Sicilian words.
function next_syllable(word, start_index, scn_rule)
if scn_rule == nil then scn_rule = true end
-- A syllable is extracted by cutting the shortest segment at the
-- beginning of the word such that:
-- a) it contains a vowel;
-- b) the breaking point does not happen inside a diphthong;
-- c) the remaining part of the word (the next syllable) is
-- legitimate as the beginning of a Sicilian word. That is, there
-- is some Sicilian word which begins with those letters.
-- Since, as a reference for the last point, many actually use
-- Italian words instead, there is support for that too.
local len = mw.ustring.len(word)
assert(start_index >= 1 and start_index <= len,
"invalid start index specified")
word = mw.ustring.lower(word)
local i = start_index - 1
local got_vowel = false
local next_ok = false
local breaking_diph = false
repeat
if is_vowel(mw.ustring.sub(word, i + 1, i + 1)) then
got_vowel = true
end
if can_begin_word(mw.ustring.sub(word, i + 2), scn_rule) then
next_ok = true
else
next_ok = false
end
if is_diphthong(word, i + 1) then
breaking_diph = true
else
breaking_diph = false
end
i = i + 1
until (i == len) or (got_vowel and next_ok and not breaking_diph)
return i
end
-- This function returns a hyphenated version of a given word.
-- The first argument specifies the word.
-- The optional second argument specifies the string to be inserted between the
-- syllables of the word, and it defaults to "-".
-- The optional third argument specifies whether to split the word into
-- syllables using Sicilian vocabulary as a template (if true)
-- or using the Italian vocabulary instead (if false, the default).
function hyphenate(word, separator, scn_rule)
if scn_rule == nil then scn_rule = false end
separator = separator or "-"
local i = 1
local hw = ""
while i <= mw.ustring.len(word) do
local n = next_syllable(word, i, rule)
if mw.ustring.len(hw) > 0 then
hw = hw .. separator
end
hw = hw .. mw.ustring.sub(word, i, n)
i = n + 1
end
return hw
end
-- Utility function used to access truncate_word() from wikicode.
function p.truncate_word(frame)
return truncate_word(frame.args[1], tonumber(frame.args[2]))
end
-- Utility function used to access join_words() from wikicode.
function p.join_words(frame)
return join_words(frame.args[1], frame.args[2])
end
-- Utility function used to access remove_accents() from wikicode.
function p.remove_accents(frame)
return remove_accents(frame.args[1])
end
-- Utility function used to access build_verb() from wikicode.
function p.build_verb(frame)
local verb = frame.args[1]
local ari = frame.args[2]
local iri = frame.args[3]
local iiri = frame.args[4]
local override = frame.args[5]
return build_verb(verb, ari, iri, iiri, override)
end
-- Utility function used to access build_adjective() from wikicode
function p.build_adjective(frame)
local adj_sm = frame.args[1]
local gennum = frame.args[2]
local override = frame.args[3]
return build_adjective(adj_sm, gennum, override)
end
-- Utility function used to access build_noun() from wikicode
function p.build_noun(frame)
local noun = frame.args[1]
local source = frame.args[2]
local dest = frame.args[3]
local override = frame.args[4]
return build_noun(noun, source, dest, override)
end
-- Utility function used to access hyphenate() from wikicode
function p.hyphenate(frame)
local word = frame.args[1]
local separator = frame.args[2]
local scn_rule = frame.args[3]
return hyphenate(word, separator, scn_rule)
end
return p