local export = {}
local m_script_utils = require("Module:script utilities")
local m_links = require("Module:links")
local lang = require("Module:languages").getByCode("grc")
local sc = require("Module:scripts").getByCode("polytonic")
local m_data = mw.loadData("Module:grc-utilities/data")
local groups = m_data.groups
local conversions = m_data.conversions
local diacritics = m_data.diacritics
local diacritic = m_data.diacritic
local diaeresis = diacritics.diaeresis
local macron = diacritics.macron
local breve = diacritics.breve
local spacing_macron = diacritics.spacing_macron
local spacing_breve = diacritics.spacing_breve
local circumflex = diacritics.circum
local subscript = diacritics.subscript
local i_diphthong = "[ΑΕΗΟΥΩαεηουω]ι"
local u_diphthong = "[ΑΕΗΟΩαεηοω]υ"
local find = mw.ustring.find
local match = mw.ustring.match
local gmatch = mw.ustring.gmatch
local sub = mw.ustring.sub
local gsub = mw.ustring.gsub
local U = mw.ustring.char
local toNFC = mw.ustring.toNFC
local toNFD = mw.ustring.toNFD
local dottedCircle = U(0x25CC)
-- This concatenates or inserts a character, then removes it from the text.
local function add(list, index, chars, text)
if not chars then
error("The function add cannot act on a nil character.")
end
if list[index] then
list[index] = list[index] .. chars
else
list[index] = chars
end
local length = mw.ustring.len(chars)
return sub(text, length + 1)
end
function export.addDottedCircle(text)
if type(text) == "string" then
text = gsub(text, "(" .. diacritic .. ")", dottedCircle .. "%1")
return text
end
end
function export.tag(term, face)
return m_script_utils.tag_text(term, lang, sc, face)
end
function export.link(term, face, alt, tr)
return m_links.full_link( { term = term, alt = alt, lang = lang, sc = sc, tr = tr }, face)
end
local function linkNoTag(term, alt)
return m_links.language_link{ term = term, lang = lang, alt = alt }
end
-- Convert spacing to combining diacritics, and nonstandard to standard polytonic Greek.
function export.standardDiacritics(text)
text = toNFD(text)
for nonstandard, standard in pairs(conversions) do
text = gsub(text, nonstandard, standard)
end
return text
end
--[=[ This function arranges diacritics in the following order:
1. macron or breve
2. breathings or diaeresis
3. acute, circumflex, or grave
4. iota subscript
Used by [[Module:typing-aids]].
]=]
function export.reorderDiacritics(text)
text = toNFD(text)
-- Find a sequence of at least two diacritics.
for sequence in gmatch(text, diacritic .. diacritic .. "+") do
local outputDiacritics = {}
for i, group in ipairs(groups) do
local diacritic = match(sequence, group) or ""
outputDiacritics[i] = diacritic
end
local diacriticsReplacement = table.concat(outputDiacritics)
text = gsub(text, sequence, diacriticsReplacement)
end
return text
end
--[=[ This breaks a word into meaningful "tokens", which are
individual letters or diphthongs with their diacritics.
Used by [[Module:grc-accent]] and [[Module:grc-pronunciation]]. ]=]--
function export.tokenize(text)
-- standardize, decompose, and reorder diacritics
text = export.standardDiacritics(text)
text = export.reorderDiacritics(text)
if type(text) ~= "string" then
error("Text is not a string", 2)
end
local tokens = {}
-- token tracks our position in the table of tokens.
local i = 0
while mw.ustring.len(text) > 0 do
local char = sub(text, 1, 1) or ""
local chars = sub(text, 1, 2) or ""
local nextchars = sub(text, 3, 4) or ""
-- Look for a diacritic and add it to the current token. Remove it from the text.
if find(char, diacritic) then
text = add(tokens, i, char, text)
--[[ See if the next two characters form a diphthong and if so,
add them to the current token. Remove them from the text.
If there's a diaeresis, it will be immediately after
the second of the two characters, or after a macron or breve. ]]
elseif ( find(chars, '^' .. i_diphthong .. '$') or find(chars, '^' .. u_diphthong .. '$') ) and not match(nextchars, "^[" .. macron .. breve .. "]?" .. diaeresis) then
i = i + 1
text = add(tokens, i, chars, text)
else
-- Add the current character to the next token. Remove it from the text.
i = i + 1
text = add(tokens, i, char, text)
end
end
tokens.maxindex = i
tokens = require("Module:table").compressSparseArray(tokens)
local err = ""
if not tokens.maxindex == #tokens then
err = "There must have been a nil value in the tokens table."
end
return tokens, err
end
function export.printTokens(frame)
text = frame.args[1]
if text then
local tokens, err = export.tokenize(text)
for i, token in pairs(tokens) do
if token == " " then
tokens[i] = '<span style="background-color: lightgray;"> </span>'
end
end
return "|-\n| " .. export.tag(text) .. " || " .. export.tag(table.concat(tokens, ", ")) .. " || " .. err
else
error("Provide text to tokenize in first parameter.")
end
end
--[=[ Places diacritics in the following order:
1. breathings or diaeresis
2. acute, circumflex, or grave
3. macron or breve
4. iota subscript
Used by [[Module:grc-pronunciation]]. ]=]
function export.pronunciationOrder(text)
text = export.standardDiacritics(text)
for sequence in gmatch(text, diacritic .. diacritic .. "+") do
-- Put breathing and diaeresis first, then accents, then macron or breve
local diacriticsReplacement = table.concat{
match(sequence, groups[2]) or "",
match(sequence, groups[3]) or "",
match(sequence, groups[1]) or "",
match(sequence, groups[4]) or ""
}
text = gsub(text, sequence, diacriticsReplacement)
end
text = gsub(text, macron, spacing_macron) -- combining to spacing macron
text = gsub(text, breve, spacing_breve) -- combining to spacing breve
return toNFC(text)
end
-- Returns a table of any ambiguous vowels in the text, language-tagged.
function export.findAmbig(text, noTag)
if (not text) or type(text) ~= "string" then
error("The input to function findAmbig is nonexistent or not a string")
end
-- breaks the word into units
local tokens = export.tokenize(text)
if not tokens then
error("No tokens.")
elseif type(tokens) ~= "table" then
error("tokens aren't a table.")
end
-- ipairs() won't work because tokens[1] is nil.
local output = {}
local vowels = {}
for _, token in pairs(tokens) do
if not find(token, m_data.consonant) then
local vowel, diacritics = match(token, "^([" .. "αιυ" .. "])(" .. diacritic .. "*)$")
if vowel then
if not diacritics
or not (
find(diacritics, macron)
or find(diacritics, breve)
or find(diacritics, circumflex)
or find(diacritics, subscript) )
then
local diacriticked_vowel
if not noTag then
diacriticked_vowel = export.tag(vowel .. diacritics)
else
diacriticked_vowel = vowel
end
table.insert(output, diacriticked_vowel)
-- Lists the vowel letters that are ambiguous, for categorization purposes.
vowel = mw.ustring.lower(vowel)
if not vowels[vowel] then
vowels[vowel] = true
end
end
end
end
end
return output, vowels
end
function export.printDiacritics(frame)
local functionToPrint = frame.args[1] or error('Specify a function in the first parameter.')
local term = frame.args[2] or error('Add text in the second parameter.')
local result = export[functionToPrint](term)
-- Show diacritics above or below a dotted circle.
content = {
term = export.tag(term),
term_decomposition = export.tag(export.addDottedCircle(toNFD(term))),
result = export.tag(result),
result_decomposition = export.tag(export.addDottedCircle(result)),
}
local output = [[ term (term_decomposition) → result (result_decomposition)]]
local function addContent(item)
return content[item] or ""
end
output = gsub(output, "[%a_]+", addContent)
return output
end
function export.decompose(frame)
local params = {
[1] = {},
["link"] = { type = "boolean" },
}
args = require("Module:parameters").process(frame.args, params)
local text = args[1]
text = toNFD(text)
local link = args.link
local composed
if link then
composed = export.link(text, nil, nil, "-")
else
composed = export.tag(text)
end
local decomposed = export.addDottedCircle(text)
if link then
local result = {}
for seat, letter in gmatch(decomposed, "(" .. dottedCircle .. "?)(.)") do
local link
if letter then
link = linkNoTag(letter, seat .. letter)
end
table.insert(result, link)
end
decomposed = table.concat(result)
end
decomposed = export.tag(decomposed)
return composed .. " (" .. decomposed .. ")"
end
return export