Module:grc-translit
Appearance
Documentation for this module may be created at Module:grc-translit/doc
local export = {}
local tt = {
-- Vowels
["α"] = "a",
["ε"] = "e",
["η"] = "ē",
["ι"] = "i",
["ο"] = "o",
["υ"] = "u",
["ω"] = "ō",
-- Consonants
["β"] = "b",
["γ"] = "g",
["δ"] = "d",
["ζ"] = "z",
["θ"] = "th",
["κ"] = "k",
["λ"] = "l",
["μ"] = "m",
["ν"] = "n",
["ξ"] = "x",
["π"] = "p",
["ρ"] = "r",
["σ"] = "s",
["ς"] = "s",
["τ"] = "t",
["φ"] = "ph",
["χ"] = "kh",
["ψ"] = "ps",
-- Archaic letters
["ϝ"] = "w",
["ϻ"] = "ś",
["ϙ"] = "q",
["ϡ"] = "š",
["ͷ"] = "v",
-- Diacritics
['̄'] = '̄', -- macron 304
['̆'] = '', -- breve 306
['̓'] = '', -- psili 313
['̔'] = '', -- dasia 314
['̈'] = '̈', -- trema 308
['̀'] = '̀', -- grave 300
['́'] = '́', -- acute 301
['͂'] = '̂', -- circumflex 342
['ͅ'] = 'i', -- hypogegrammene 345
-- For internal processing of diaeresis
['+'] = '',
}
local diacritics = '[̄̆̓̔̈̀́͂ͅ]'
function export.tr(text, lang, sc)
-- If the script is given as Cprt, then forward the transliteration to that module
if sc == "Cprt" then
return require("Module:Cprt-translit").tr(text, lang, sc)
end
local gsub = mw.ustring.gsub
-- decompose text
text = mw.ustring.toNFD(text)
text = gsub(text,'([ιυ])([̄̆]?)̈','+%1%2̈')
--tokenize
tokens = {}
ti = 0 -- it gets incremented every time
for i = 1,mw.ustring.len(text) do
ch = mw.ustring.sub(text,i,i)
if ch == 'ι' and tokens[ti] and mw.ustring.match(tokens[ti],'[ΑΕΗΟΥΩαεηουω]') then
tokens[ti] = tokens[ti]..'ι'
elseif ch == 'υ' and tokens[ti] and mw.ustring.match(tokens[ti],'[ΑΕΗΟΩαεηοω]') then
tokens[ti] = tokens[ti]..'υ'
elseif mw.ustring.match(ch,diacritics) then
tokens[ti] = tokens[ti]..ch
else
ti = ti+1
tokens[ti] = ch
end
end
--now read the tokens
out = ''
for i,token in pairs(tokens) do
t = mw.ustring.gsub(mw.ustring.lower(token),'.',function(x) return tt[x] end)
-- elseif is misleading (these are independent) but it's more concise this way
if token == 'γ' and tokens[i+1] and mw.ustring.match(tokens[i+1],'[κγχξ]') then
t = 'n'
elseif token == 'ρ' and tokens[i-1] and tokens[i-1] == 'ρ' then
t = 'rh'
elseif mw.ustring.match(token,'[ΑΕΗΟΩαεηοω]υ') or mw.ustring.match(token,'[Υυ]ι') then
t = mw.ustring.gsub(t,'y','u')
elseif mw.ustring.match(token,'[αΑ].*ͅ') then
t = mw.ustring.gsub(t,'([aA])','%1̄')
end
if mw.ustring.match(token,'̔') then
if mw.ustring.match(token,'[Ρρ]') then
t = t .. 'h'
else
t = 'h' .. t
end
end
t = mw.ustring.toNFD(t) -- we can't manually enter them as e/o + macron in the table because it'll recombine apparently
if mw.ustring.match(t,'̂') then
t = mw.ustring.gsub(t,'̄','')
end
if token ~= mw.ustring.lower(token) then
t = mw.ustring.upper(mw.ustring.sub(t,1,1) ) .. mw.ustring.lower(mw.ustring.sub(t,2) )
end
out = out .. t
end
return out
end
return export