Module:Languages/data2

local u = mw.ustring.char

-- Private use characters for sortkey placement. local a, b, c, d, e, f, g, h = u(0xF000), u(0xF001), u(0xF002), u(0xF003), u(0xF004), u(0xF005), u(0xF006), u(0xF007)

-- UTF-8 encoded strings for some commonly-used diacritics local GRAVE    = u(0x300) local ACUTE    = u(0x301) local CIRC     = u(0x302) local TILDE    = u(0x303) local MACRON   = u(0x304) local BREVE    = u(0x306) local DOTABOVE = u(0x307) local DIAER    = u(0x308) local RINGABOVE = u(0x30A) local DACUTE   = u(0x30B) local CARON    = u(0x30C) local DGRAVE   = u(0x30F) local INVBREVE = u(0x311) local DOTBELOW = u(0x323) local DIAERBELOW = u(0x324) local RINGBELOW = u(0x325) local CEDILLA  = u(0x327) local OGONEK   = u(0x328) local BREVEBELOW = u(0x32E) local CGJ      = u(0x34F) -- combining grapheme joiner local DOUBLEINVBREVE = u(0x361) local KASHIDA  = u(0x640) local FATHATAN = u(0x64B) local DAMMATAN = u(0x64C) local KASRATAN = u(0x64D) local FATHA    = u(0x64E) local DAMMA    = u(0x64F) local KASRA    = u(0x650) local SHADDA   = u(0x651) local SUKUN    = u(0x652) local NUNGHUNNA = u(0x658) local SUPERALEF = u(0x670)

-- Punctuation to be used for standardChars field local PUNCTUATION = ' !#%&*+,-./:;<=>?@^_`|~\'∅'

-- Use these in "scripts" to save a little memory. local Arab = {"Arab"} local CansLatn = {"Cans", "Latn"} local Cyrl = {"Cyrl"} local CyrlGeorLatn = {"Cyrl", "Geor", "Latn"} local CyrlLatnArab = {"Cyrl", "Latn", "Arab"} local Deva = {"Deva"} local Ethi = {"Ethi"} local Latn = {"Latn"} local LatnArab = {"Latn", "Arab"} local LatnBrai = {"Latn", "Brai"} local LatnCyrlfaArab = {"Latn", "Cyrl", "fa-Arab"} local LatnHani = {"Latn", "Hani"} local LatnLatg = {"Latn", "Latg"} local Tibt = {"Tibt"}

local m = {}

m["aa"] = { "Afar", 27811,	"cus-eas", Latn, entry_name = {remove_diacritics = ACUTE}, }

m["ab"] = { "Abkhaz", 5111,	"cau-abz", CyrlGeorLatn, translit_module = "translit-redirect", override_translit = true, entry_name = "cau-entryname", sort_key = { from = { "х'ә", -- 3 chars "гь", "гә", "ӷь", "ҕь", "ӷә", "ҕә", "дә", "жь", "жә", "ҙә", "ӡә", "ӡ'", "кь", "кә", "қь", "қә", "ҟь", "ҟә", "ҫә", "тә", "ҭә", "ф'", "хь", "хә", "х'", "ҳә", "ць", "цә", "ц'", "ҵә", "ҵ'", "шь", "шә", "џь", -- 2 chars "ӷ", "ҕ", "ё", "ҙ", "ӡ", "қ", "ҟ", "ԥ", "ҧ", "ҫ", "ҭ", "ҳ", "ҵ", "ҷ", "ҽ", "ҿ", "ҩ", "џ", "ә" -- 1 char },		to = { "х" .. d, "г" .. a, "г" .. b, "г" .. e, "г" .. f, "г" .. g, "г" .. h, "д" .. a, "ж" .. a, "ж" .. b, "з" .. b, "з" .. d, "з" .. e, "к" .. a, "к" .. b, "к" .. d, "к" .. e, "к" .. g, "к" .. h, "с" .. b, "т" .. a, "т" .. c, "ф" .. a, "х" .. a, "х" .. b, "х" .. c, "х" .. f, "ц" .. a, "ц" .. b, "ц" .. c, "ц" .. e, "ц" .. f, "ш" .. a, "ш" .. b, "ы" .. c, "г" .. c, "г" .. d, "е" .. a, "з" .. a, "з" .. c, "к" .. c, "к" .. f, "п" .. a, "п" .. b, "с" .. a, "т" .. b, "х" .. e, "ц" .. d, "ч" .. a, "ч" .. b, "ч" .. c, "ы" .. a, "ы" .. b, "ь" .. a		} }, }

m["ae"] = { "Avestan", 29572,	"ira-cen", {"Avst", "Gujr"}, translit_module = "Avst-translit", wikipedia_article = "Avestan", }

m["af"] = { "Afrikaans", 14196,	"gmw", LatnArab, ancestors = {"nl"}, sort_key = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. TILDE .. DIAER .. RINGABOVE .. CEDILLA .. "'"}, }

m["ak"] = { "Akan", 28026,	"alv-ctn", Latn, }

m["am"] = { "Amharic", 28244,	"sem-eth", Ethi, translit_module = "Ethi-translit", }

m["an"] = { "Aragonese", 8765,	"roa-ibe", Latn, ancestors = {"roa-oan"}, }

m["ar"] = { "Arabic", 13955,	"sem-arb", {"Arab", "Hebr", "Brai"}, translit_module = "ar-translit", entry_name = "ar-entryname", -- put Judeo-Arabic (Hebrew-script Arabic) under the category header -- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles sort_key = { from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},		to  = {u(0xFB21)}, }, }

m["as"] = { "Assamese", 29401,	"inc-eas", {"as-Beng"}, ancestors = {"inc-mas"}, translit_module = "as-translit", }

m["av"] = { "Avar", 29561,	"cau-ava", CyrlLatnArab, ancestors = {"oav"}, translit_module = "translit-redirect", override_translit = true, entry_name = "cau-entryname", sort_key = { from = {"гъ", "гь", "гӏ", "ё", "кк", "къ", "кь", "кӏ", "лъ", "лӏ", "тӏ", "хх", "хъ", "хь", "хӏ", "цӏ", "чӏ"}, to = {"г" .. a, "г" .. b, "г" .. c, "е" .. a, "к" .. a, "к" .. b, "к" .. c, "к" .. d, "л" .. a, "л" .. b, "т" .. a, "х" .. a, "х" .. b, "х" .. c, "х" .. d, "ц" .. a, "ч" .. a}	}, }

m["ay"] = { "Aymara", 4627,	"sai-aym", Latn, }

m["az"] = { "Azerbaijani", 9292,	"trk-ogz", LatnCyrlfaArab, ancestors = {"trk-oat"}, dotted_dotless_i = true, sort_key = { from = { "i", -- Ensure "i" comes after "ı". "ç", "ə", "ğ", "x", "ı", "q", "ö", "ş", "ü", -- Latin "ғ", "ә", "ы", "ј", "ҝ", "ө", "ү", "һ", "ҹ" -- Cyrillic },		to = { "i" .. a, "c" .. a, "e" .. a, "g" .. a, "h" .. a, "i", "k" .. a, "o" .. a, "s" .. a, "u" .. a, "г" .. a, "е" .. a, "и" .. a, "и" .. b, "к" .. a, "о" .. a, "у" .. a, "х" .. a, "ч" .. a		} }, }

m["ba"] = { "Bashkir", 13389,	"trk-kbu", Cyrl, translit_module = "ba-translit", override_translit = true, sort_key = { from = {"ғ", "ҙ", "ё", "ҡ", "ң", "ө", "ҫ", "ү", "һ", "ә"}, to = {"г" .. a, "д" .. a, "е" .. a, "к" .. a, "н" .. a, "о" .. a, "с" .. a, "у" .. a, "х" .. a, "э" .. a}	}, }

m["be"] = { "Belarusian", 9091,	"zle", Cyrl, ancestors = {"zle-ort"}, translit_module = "be-translit", entry_name = {remove_diacritics = GRAVE .. ACUTE}, sort_key = { from = {"ґ", "ё", "і", "ў"}, to = {"г" .. a, "е" .. a, "и" .. a, "у" .. a}	}, }

m["bg"] = { "Bulgarian", 7918,	"zls", Cyrl, ancestors = {"cu"}, translit_module = "bg-translit", entry_name = {remove_diacritics = GRAVE .. ACUTE}, }

m["bh"] = { "Bihari", 135305,	"inc-eas", Deva, ancestors = {"inc-mgd"}, }

m["bi"] = { "Bislama", 35452,	"crp", Latn, ancestors = {"en"}, }

m["bm"] = { "Bambara", 33243,	"dmn-emn", Latn, sort_key = { from = {"ɛ", "ɲ", "ŋ", "ɔ"}, to = {"e" .. a, "n" .. a, "n" .. b, "o" .. a}	}, }

m["bn"] = { "Bengali", 9610,	"inc-eas", {"Beng", "Newa"}, ancestors = {"inc-mbn"}, translit_module = "bn-translit", }

m["bo"] = { "Tibetan", 34271,	"sit-tib", Tibt, -- sometimes Deva? ancestors = {"xct"}, translit_module = "Tibt-translit", override_translit = true, entry_name = { from = {"ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ"}, to = {"ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"} },	sort_key = "Tibt-sortkey", }

m["br"] = { "Breton", 12107,	"cel-bry", Latn, ancestors = {"xbm"}, sort_key = { from = {"ch", "c'h"}, to = {"c" .. a, "c" .. b}	}, }

m["ca"] = { "Catalan", 7026,	"roa-ocr", Latn, ancestors = {"roa-oca"}, sort_key = { remove_diacritics = GRAVE .. ACUTE .. DIAER .. CEDILLA, from = {"l·l"}, to = {"ll"} }, }

m["ce"] = { "Chechen", 33350,	"cau-vay", CyrlLatnArab, translit_module = "translit-redirect", override_translit = true, entry_name = "cau-entryname", sort_key = { from = {"аь", "гӏ", "ё", "кх", "къ", "кӏ", "оь", "пӏ", "тӏ", "уь", "хь", "хӏ", "цӏ", "чӏ", "юь", "яь"}, to = {"а" .. a, "г" .. a, "е" .. a, "к" .. a, "к" .. b, "к" .. c, "о" .. a, "п" .. a, "т" .. a, "у" .. a, "х" .. a, "х" .. b, "ц" .. a, "ч" .. a, "ю" .. a, "я" .. a}	}, }

m["ch"] = { "Chamorro", 33262,	"poz-sus", Latn, sort_key = { remove_diacritics = "'", from = {"å", "ch", "ñ", "ng"}, to = {"a" .. a, "c" .. a, "n" .. a, "n" .. b}	}, }

m["co"] = { "Corsican", 33111,	"roa-itd", Latn, sort_key = { from = {"chj", "ghj", "sc", "sg"}, to = {"c" .. a, "g" .. a, "s" .. a, "s" .. b}	}, }

m["cr"] = { "Cree", 33390,	"alg", CansLatn, translit_module = "translit-redirect", }

m["cs"] = { "Czech", 9056,	"zlw", Latn, ancestors = {"zlw-ocs"}, sort_key = { from = {"á", "č", "ď", "é", "ě", "ch", "í", "ň", "ó", "ř", "š", "ť", "ú", "ů", "ý", "ž"}, to = {"a" .. a, "c" .. a, "d" .. a, "e" .. a, "e" .. b, "h" .. a, "i" .. a, "n" .. a, "o" .. a, "r" .. a, "s" .. a, "t" .. a, "u" .. a, "u" .. b, "y" .. a, "z" .. a}	}, }

m["cu"] = { "Old Church Slavonic", 35499,	"zls", {"Cyrs", "Glag"}, translit_module = "Cyrs-Glag-translit", entry_name = "Cyrs-entryname", sort_key = "Cyrs-sortkey", }

m["cv"] = { "Chuvash", 33348,	"trk-ogr", Cyrl, ancestors = {"xbo"}, translit_module = "cv-translit", override_translit = true, sort_key = { from = {"ӑ", "ё", "ӗ", "ҫ", "ӳ"}, to = {"а" .. a, "е" .. a, "е" .. b, "с" .. a, "у" .. a}	}, }

m["cy"] = { "Welsh", 9309,	"cel-bry", Latn, ancestors = {"wlm"}, sort_key = { from = {"ӑ", "ё", "ӗ", "ҫ", "ӳ"}, to = {"а" .. a, "е" .. a, "е" .. b, "с" .. a, "у" .. a}	}, standardChars = "A-IL-PR-UWYa-il-pr-uwy0-9ÂâÊêÎîÔôÛûŴŵŶŷ" .. PUNCTUATION, }

m["da"] = { "Danish", 9035,	"gmq", Latn, ancestors = {"gmq-oda"}, sort_key = { from = {"æ", "ø", "å"}, to = {"z" .. a, "z" .. b, "z" .. c}	}, }

m["de"] = { "German", 188,	"gmw", {"Latn", "Latf"}, ancestors = {"gmh"}, sort_key = { remove_diacritics = GRAVE .. ACUTE .. CIRC .. DIAER .. RINGABOVE, from = {"ß"}, to = {"ss"} },	standardChars = "A-Za-z0-9ÄäÖöÜüß" .. PUNCTUATION, }

m["dv"] = { "Dhivehi", 32656,	"inc-ins", {"Thaa", "Diak"}, ancestors = {"elu-prk"}, translit_module = "translit-redirect", override_translit = true, }

m["dz"] = { "Dzongkha", 33081,	"sit-tib", Tibt, ancestors = {"xct"}, translit_module = "Tibt-translit", override_translit = true, entry_name = { from = {"ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ"}, to = {"ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"} },	sort_key = "Tibt-sortkey", }

m["ee"] = { "Ewe", 30005,	"alv-gbe", Latn, sort_key = { remove_diacritics = TILDE, from = {"ɖ", "dz", "ɛ", "ƒ", "gb", "ɣ", "kp", "ny", "ŋ", "ɔ", "ts", "ʋ"}, to = {"d" .. a, "d" .. b, "e" .. a, "f" .. a, "g" .. a, "g" .. b, "k" .. a, "n" .. a, "n" .. b, "o" .. a, "t" .. a, "v" .. a}	}, }

m["el"] = { "Greek", 9129,	"grk", {"Grek", "Brai"}, ancestors = {"grc"}, translit_module = "el-translit", override_translit = true, entry_name = {remove_diacritics = CARON .. DIAERBELOW .. BREVEBELOW}, sort_key = "Grek-sortkey", standardChars = "ͺ;΄-ώϜϝ" .. PUNCTUATION, }

m["en"] = { "English", 1860,	"gmw", {"Latn", "Brai", "Shaw", "Dsrt"}, -- entries in Shaw or Dsrt might require prior discussion ancestors = {"enm"}, sort_key = { remove_diacritics = GRAVE .. ACUTE .. CIRC .. TILDE .. MACRON .. DIAER .. RINGABOVE .. CARON .. CEDILLA .. "'",		from = {"æ", "œ"}, to = {"ae", "oe"} },	wikimedia_codes = {"en", "simple"}, standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), }

m["eo"] = { "Esperanto", 143,	"art", Latn, sort_key = { remove_diacritics = GRAVE .. ACUTE, from = {"ĉ", "ĝ", "ĥ", "ĵ", "ŝ", "ŭ"}, to = {"c" .. a, "g" .. a, "h" .. a, "j" .. a, "s" .. a, "u" .. a}	}, standardChars = "A-PRSTUVZa-prstuvzĉĈĝĜĵĴŝŜŭŬ0-9" .. PUNCTUATION, }

m["es"] = { "Spanish", 1321,	"roa-ibe", LatnBrai, ancestors = {"osp"}, sort_key = { remove_diacritics = ACUTE .. DIAER .. CEDILLA, from = {"ñ"}, to = {"n" .. a}	}, standardChars = "A-VXYZa-vxyz0-9ÁáÉéÍíÓóÚúÑñ¿¡" .. PUNCTUATION, }

m["et"] = { "Estonian", 9072,	"fiu-fin", Latn, sort_key = { from = {"š", "z", "ž", "õ", "ä", "ö", "ü"}, to = {"s" .. a, "s" .. b, "s" .. c, "w" .. a, "w" .. b, "w" .. c, "w" .. d}	}, }

m["eu"] = { "Basque", 8752,	"euq", Latn, sort_key = { from = {"ç", "ñ"}, to = {"c" .. a, "n" .. a}	}, }

m["fa"] = { "Persian", 9168,	"ira-swi", {"fa-Arab"}, ancestors = {"pal"}, -- "ira-mid" entry_name = {remove_diacritics = KASHIDA .. FATHA .. DAMMA .. KASRA .. SHADDA .. SUKUN}, }

m["ff"] = { "Fula", 33454,	"alv-fwo", {"Latn", "Adlm"}, }

m["fi"] = { "Finnish", 1412,	"fiu-fin", Latn, entry_name = {remove_diacritics = "ˣ"}, -- used to indicate gemination of the next consonant sort_key = { remove_diacritics = GRAVE .. ACUTE .. CIRC .. TILDE .. DACUTE .. CARON .. CEDILLA .. "':",		from = {"ø", "æ", "œ", "ß"}, to = {"o", "ae", "oe", "ss"} }, }

m["fj"] = { "Fijian", 33295,	"poz-occ", Latn, }

m["fo"] = { "Faroese", 25258,	"gmq", Latn, ancestors = {"non"}, sort_key = { from = {"á", "ð", "í", "ó", "ú", "ý", "æ", "ø"}, to = {"a" .. a, "d" .. a, "i" .. a, "o" .. a, "u" .. a, "y" .. a, "z" .. a, "z" .. b}	}, }

m["fr"] = { "French", 150,	"roa-oil", LatnBrai, ancestors = {"frm"}, sort_key = "fr-sortkey", standardChars = "A-Za-z0-9ÀÂÇÉÈÊËÎÏÔŒÛÙÜàâçéèêëîïôœûùü«»" .. PUNCTUATION, }

m["fy"] = { "West Frisian", 27175,	"gmw-fri", Latn, ancestors = {"ofs"}, sort_key = { remove_diacritics = GRAVE .. ACUTE .. CIRC .. DIAER, from = {"y"}, to = {"i"} },	standardChars = "A-PR-WYZa-pr-wyz0-9Ææâäàéêëèïìôöòúûüùỳ" .. PUNCTUATION, }

m["ga"] = { "Irish", 9142,	"cel-gae", LatnLatg, ancestors = {"mga"}, sort_key = { remove_diacritics = ACUTE, from = {"ḃ", "ċ", "ḋ", "ḟ", "ġ", "ṁ", "ṗ", "ṡ", "ṫ"}, to = {"bh", "ch", "dh", "fh", "gh", "mh", "ph", "sh", "th"} },	standardChars = "A-IL-PR-Ua-il-pr-u0-9ÁáÉéÍíÓóÚú" .. PUNCTUATION, }

m["gd"] = { "Scottish Gaelic", 9314,	"cel-gae", LatnLatg, ancestors = {"mga"}, sort_key = {remove_diacritics = GRAVE .. ACUTE}, standardChars = "A-IL-PR-Ua-il-pr-u0-9ÀàÈèÌìÒòÙù" .. PUNCTUATION, }

m["gl"] = { "Galician", 9307,	"roa-ibe", Latn, ancestors = {"roa-opt"}, sort_key = { remove_diacritics = ACUTE, from = {"ñ"}, to = {"n" .. a}	}, }

m["gn"] = { "Guaraní", 35876,	"tup-gua", Latn, }

m["gu"] = { "Gujarati", 5137,	"inc-wes", {"Gujr"}, ancestors = {"inc-mgu"}, translit_module = "gu-translit", }

m["gv"] = { "Manx", 12175,	"cel-gae", Latn, ancestors = {"mga"}, sort_key = {remove_diacritics = CEDILLA .. "-"},	standardChars = "A-WYÇa-wyç0-9" .. PUNCTUATION, }

m["ha"] = { "Hausa", 56475,	"cdc-wst", LatnArab, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. TILDE .. MACRON}, sort_key = { from = {"ɓ", "b'", "ɗ", "d'", "ƙ", "k'", "sh", "ƴ", "'y"}, to = {"b" .. a, "b" .. b, "d" .. a, "d" .. b, "k" .. a, "k" .. b, "s" .. a, "y" .. a, "y" .. b}	}, }

m["he"] = { "Hebrew", 9288,	"sem-can", {"Hebr", "Phnx", "Brai"}, entry_name = {remove_diacritics = u(0x0591) .. "-" .. u(0x05BD) .. u(0x05BF) .. "-" .. u(0x05C5) .. u(0x05C7) .. CGJ}, }

m["hi"] = { "Hindi", 1568,	"inc-hnd", {"Deva", "Kthi", "Newa"}, ancestors = {"inc-ohi"}, translit_module = "hi-translit", standardChars = "ँंअ-ऊएऐओ-घच-झट-नप-रलवशसहा-ूेैो-◌्।-॰ड़ढ़" .. PUNCTUATION, }

m["ho"] = { "Hiri Motu", 33617,	"crp", Latn, ancestors = {"meu"}, }

m["ht"] = { "Haitian Creole", 33491,	"crp", Latn, ancestors = {"fr"}, sort_key = { from = { "oun", -- 3 chars "an", "ch", "en", "ng", "on", "ou", "ui", -- 2 chars "è", "ò" -- 1 char },		to = { "o" .. d, "a" .. a, "c" .. a, "e" .. b, "n" .. a, "o" .. b, "o" .. c, "u" .. a, "e" .. a, "o" .. a		} }, }

m["hu"] = { "Hungarian", 9067,	"urj-ugr", {"Latn", "Hung"}, ancestors = {"ohu"}, sort_key = { from = { "dzs", -- 3 chars "cs", "dz", "gy", "ly", "ny", "sz", "ty", "zs", -- 2 chars "á", "é", "í", "ó", "ö", "ő", "ú", "ü", "ű" -- 1 char },		to = { "d" .. b, "c" .. a, "d" .. a, "g" .. a, "l" .. a, "n" .. a, "s" .. a, "t" .. a, "z" .. a, "a" .. a, "e" .. a, "i" .. a, "o" .. a, "o" .. b, "o" .. c, "u" .. a, "u" .. b, "u" .. c		} }, }

m["hy"] = { "Armenian", 8785,	"hyx", {"Armn", "Brai"}, ancestors = {"axm"}, translit_module = "Armn-translit", override_translit = true, entry_name = { remove_diacritics = "՞՜՛՟", from = {"եւ", "յ", "ի", "է"}, to  = {"և", "յ", "ի", "է"} },	sort_key = { from = { "ու", "եւ", -- 2 chars "և" -- 1 char },		to = { "ւ", "եվ", "եվ" }	}, }

m["hz"] = { "Herero", 33315,	"bnt-swb", Latn, }

m["ia"] = { "Interlingua", 35934,	"art", Latn, }

m["id"] = { "Indonesian", 9240,	"poz-mly", Latn, ancestors = {"ms"}, }

m["ie"] = { "Interlingue", 35850,	"art", Latn, type = "appendix-constructed", entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC}, }

m["ig"] = { "Igbo", 33578,	"alv-igb", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. MACRON}, sort_key = { from = {"gb", "gh", "gw", "ị", "kp", "kw", "ṅ", "nw", "ny", "ọ", "sh", "ụ"}, to = {"g" .. a, "g" .. b, "g" .. c, "i" .. a, "k" .. a, "k" .. b, "n" .. a, "n" .. b, "n" .. c, "o" .. a, "s" .. a, "u" .. a}	}, }

m["ii"] = { "Sichuan Yi", 34235,	"tbq-lol", {"Yiii"}, translit_module = "ii-translit", }

m["ik"] = { "Inupiaq", 27183,	"esx-inu", Latn, sort_key = { from = { "ch", "dj", "ł̣", "ng", "r̂", "sr", "zr", -- 2 chars "ġ", "ḷ", "ł", "ñ", "ŋ", "ʼ" -- 1 char },		to = { "c" .. a, "h" .. a, "l" .. c, "n" .. b, "r" .. a, "s" .. a, "z" .. a, "g" .. a, "l" .. a, "l" .. b, "n" .. a, "n" .. b, "z" .. b		} }, }

m["io"] = { "Ido", 35224,	"art", Latn, }

m["is"] = { "Icelandic", 294,	"gmq", Latn, ancestors = {"non"}, sort_key = { from = {"á", "ð", "é", "í", "ó", "ú", "ý", "þ", "æ", "ö"}, to = {"a" .. a, "d" .. a, "e" .. a, "i" .. a, "o" .. a, "u" .. a, "y" .. a, "z" .. a, "z" .. b, "z" .. c}	}, }

m["it"] = { "Italian", 652,	"roa-itd", Latn, sort_key = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. DIAER .. RINGABOVE}, standardChars = "A-IL-VZa-il-vz0-9ÀàÈèÌìÒòÙùÉé" .. PUNCTUATION, }

m["iu"] = { "Inuktitut", 29921,	"esx-inu", CansLatn, translit_module = "translit-redirect", override_translit = true, }

m["ja"] = { "Japanese", 5287,	"jpx", {"Jpan", "Brai"}, ancestors = {"ojp"}, --[=[	-- Handled by jsort function in Module:ja. sort_key = { from = {"[ぁァア]", "[ぃィイ]", "[ぅゔゥウヴ]", "[ぇェエ]", "[ぉォオ]", "[がゕカガヵ]", "[ぎキギ]", "[ぐクグㇰ]", "[げゖケゲヶ]", "[ごコゴ]", "[ざサザ]", "[じシジㇱ]", "[ずスズㇲ]", "[ぜセゼ]", "[ぞソゾ]", "[だタダ]", "[ぢチヂ]", "[っづッツヅ]", "[でテデ]", "[どトドㇳ]", "ナ", "ニ", "[ヌㇴ]", "ネ", "ノ", "[ばぱハバパㇵ]", "[びぴヒビピㇶ]", "[ぶぷフブプㇷ]", "[べぺヘベペㇸ]", "[ぼぽホボポㇹ]", "マ", "ミ", "[ムㇺ]", "メ", "モ", "[ゃャヤ]", "[ゅュユ]", "[ょョヨ]", "[ラㇻ]", "[リㇼ]", "[ルㇽ]", "[レㇾ]", "[ロㇿ]", "[ゎヮワヷ]", "[ヰヸ]", "[ヱヹ]", "[ヲヺ]", "ン", "[゙゚゛゜ゝゞ・ヽヾ]", "𛀀"}, to  = {"あ", "い", "う", "え", "お", "か", "き", "く", "け", "こ", "さ", "し", "す", "せ", "そ", "た", "ち", "つ", "て", "と", "な", "に", "ぬ", "ね", "の", "は", "ひ", "ふ", "へ", "ほ", "ま", "み", "む", "め", "も", "や", "ゆ", "よ", "ら", "り", "る", "れ", "ろ", "わ", "ゐ", "ゑ", "を", "ん", "", "え"} },	--]=] }

m["jv"] = { "Javanese", 33549,	"poz-sus", {"Latn", "Java"}, translit_module = "jv-translit", ancestors = {"kaw"}, link_tr = true, sort_key = { from = {"dh", "é", "è", "ng", "ny", "th"}, to = {"d" .. a, "e" .. a, "e" .. b, "n" .. a, "n" .. b, "t" .. a}	} }

m["ka"] = { "Georgian", 8108,	"ccs-gzn", {"Geor", "Geok", "Hebr"}, -- Hebr is used to write Judeo-Georgian ancestors = {"oge"}, translit_module = "Geor-translit", override_translit = true, entry_name = {remove_diacritics = CIRC}, }

m["kg"] = { "Kongo", 33702,	"bnt-kng", Latn, }

m["ki"] = { "Kikuyu", 33587,	"bnt-kka", Latn, }

m["kj"] = { "Kwanyama", 1405077,	"bnt-ova", Latn, }

m["kk"] = { "Kazakh", 9252,	"trk-kno", {"Cyrl", "Latn", "kk-Arab"}, translit_module = "kk-translit", override_translit = true, sort_key = { from = {"ә", "ғ", "ё", "қ", "ң", "ө", "ұ", "ү", "һ", "і"}, to = {"а" .. a, "г" .. a, "е" .. a, "к" .. a, "н" .. a, "о" .. a, "у" .. a, "у" .. b, "х" .. a, "ы" .. a}	} }

m["kl"] = { "Greenlandic", 25355,	"esx-inu", Latn, sort_key = { from = {"æ", "ø", "å"}, to = {"z" .. a, "z" .. b, "z" .. c}	} }

m["km"] = { "Khmer", 9205,	"mkh-kmr", {"Khmr"}, ancestors = {"xhm"}, translit_module = "km-translit", }

m["kn"] = { "Kannada", 33673,	"dra", {"Knda"}, ancestors = {"dra-mkn"}, translit_module = "kn-translit", }

m["ko"] = { "Korean", 9176,	"qfa-kor", {"Kore", "Brai"}, ancestors = {"ko-ear"}, translit_module = "ko-translit", entry_name = "Kore-entryname", }

m["kr"] = { "Kanuri", 36094,	"ssa-sah", LatnArab, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. BREVE}, -- the sortkey and entry_name are only for standard Kanuri; when dialectal entries get added, someone will have to work out how the dialects should be represented orthographically sort_key = { from = {"ǝ", "ny", "ɍ", "sh"}, to = {"e" .. a, "n" .. a, "r" .. a, "s" .. a}	} }

m["ks"] = { "Kashmiri", 33552,	"inc-dar", {"ks-Arab", "Deva", "Shrd", "Latn"}, translit_module = "translit-redirect", }

-- "kv" IS TREATED AS "koi", "kpv", SEE WT:LT

m["kw"] = { "Cornish", 25289,	"cel-bry", Latn, ancestors = {"cnx"}, sort_key = { from = {"ch"}, to = {"c" .. a}	}, }

m["ky"] = { "Kyrgyz", 9255,	"trk-kip", CyrlLatnArab, translit_module = "ky-translit", override_translit = true, sort_key = { from = {"ё", "ң", "ө", "ү"}, to = {"е" .. a, "н" .. a, "о" .. a, "у" .. a}	}, }

m["la"] = { "Latin", 397,	"itc", {"Latn", "Ital"}, entry_name = {remove_diacritics = MACRON .. BREVE .. DIAER .. DOUBLEINVBREVE}, sort_key = { from = {"æ", "œ"}, to = {"ae", "oe"} },	standardChars = "A-Za-z0-9ÆæŒœĀ-ăĒ-ĕĪ-ĭŌ-ŏŪ-ŭȲȳ" .. MACRON .. BREVE .. PUNCTUATION, }

m["lb"] = { "Luxembourgish", 9051,	"gmw", Latn, ancestors = {"gmw-cfr"}, sort_key = { from = {"ä", "ë", "é"}, to = {"z" .. a, "z" .. b, "z" .. c}	}, }

m["lg"] = { "Luganda", 33368,	"bnt-nyg", Latn, entry_name = {remove_diacritics = ACUTE .. CIRC}, sort_key = { from = {"ŋ"}, to = {"n" .. a}	}, }

m["li"] = { "Limburgish", 102172,	"gmw", Latn, ancestors = {"dum"}, }

m["ln"] = { "Lingala", 36217,	"bnt-bmo", Latn, sort_key = { remove_diacritics = ACUTE .. CIRC .. CARON, from = {"ɛ", "gb", "mb", "mp", "nd", "ng", "nk", "ns", "nt", "ny", "nz", "ɔ"}, to = {"e" .. a, "g" .. a, "m" .. a, "m" .. b, "n" .. a, "n" .. b, "n" .. c, "n" .. d, "n" .. e, "n" .. f, "n" .. g, "o" .. a}	}, }

m["lo"] = { "Lao", 9211,	"tai-swe", {"Laoo"}, translit_module = "lo-translit", sort_key = "Laoo-sortkey", standardChars = "0-9ກຂຄງຈຊຍດຕຖທນບປຜຝພຟມຢຣລວສຫອຮຯ-ໝ" .. PUNCTUATION, }

m["lt"] = { "Lithuanian", 9083,	"bat", Latn, ancestors = {"olt"}, entry_name = {remove_diacritics = GRAVE .. ACUTE .. TILDE}, sort_key = { from = {"ą", "č", "ę", "ė", "į", "y", "š", "ų", "ū", "ž"}, to = {"a" .. a, "c" .. a, "e" .. a, "e" .. b, "i" .. a, "i" .. b, "s" .. a, "u" .. a, "u" .. b, "z" .. a}	}, }

m["lu"] = { "Luba-Katanga", 36157,	"bnt-lub", Latn, }

m["lv"] = { "Latvian", 9078,	"bat", Latn, entry_name = { -- This attempts to convert vowels with tone marks to vowels either with -- or without macrons. Specifically, there should be no macrons if the -- vowel is part of a diphthong (including resonant diphthongs such		-- pìrksts -> pirksts not #pīrksts). What we do is first convert the -- vowel + tone mark to a vowel + tilde in a decomposed fashion, -- then remove the tilde in diphthongs, then convert the remaining -- vowel + tilde sequences to macroned vowels, then delete any other -- tilde. We leave already-macroned vowels alone: Both e.g. ar and ār -- occur before consonants. FIXME: This still might not be sufficient. from = {"Ȩ", "ȩ", "[ÂÃÀ]", "[âãà]", "[ÊẼÈ]", "[êẽè]", "[ÎĨÌ]", "[îĩì]", "[ÔÕÒ]", "[ôõò]", "[ÛŨÙ]", "[ûũù]", "[ÑǸ]", "[ñǹ]", "[" .. CIRC .. TILDE ..GRAVE .."]", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .. "?([^aAeEiIoOuUāĀēĒīĪūŪ])", "([aAeEiIoOuU])" .. TILDE .."?([lrnmuiLRNMUI])" .. TILDE .."?$", "([iI])" .. TILDE .. "?([eE])" .. TILDE .. "?", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "U" .. TILDE, "u" .. TILDE, TILDE}, to  = {"E", "e", "A" .. TILDE, "a" .. TILDE, "E" .. TILDE, "e" .. TILDE, "I" .. TILDE, "i" .. TILDE, "O", "o", "U" .. TILDE, "u" .. TILDE, "N", "n", TILDE, "%1%2%3", "%1%2", "%1%2", "Ā", "ā", "Ē", "ē", "Ī", "ī", "Ū", "ū", ""} },	sort_key = { from = {"ā", "č", "ē", "ģ", "ī", "ķ", "ļ", "ņ", "š", "ū", "ž"}, to = {"a" .. a, "c" .. a, "e" .. a, "g" .. a, "i" .. a, "k" .. a, "l" .. a, "n" .. a, "s" .. a, "u" .. a, "z" .. a}	}, }

m["mg"] = { "Malagasy", 7930,	"poz-bre", Latn, }

m["mh"] = { "Marshallese", 36280,	"poz-mic", Latn, sort_key = { from = {"ā", "ļ", "m̧", "ņ", "n̄", "o̧", "ō", "ū"}, to = {"a" .. a, "l" .. a, "m" .. a, "n" .. a, "n" .. b, "o" .. a, "o" .. b, "u" .. a}	}, }

m["mi"] = { "Maori", 36451,	"poz-pep", Latn, sort_key = { remove_diacritics = MACRON, from = {"ng", "wh"}, to = {"z" .. a, "z" .. b}	}, }

m["mk"] = { "Macedonian", 9296,	"zls", Cyrl, translit_module = "mk-translit", entry_name = { remove_diacritics = ACUTE, remove_exceptions = {"Ѓ", "ѓ", "Ќ", "ќ"} -- possibly unnecessary },	sort_key = { remove_diacritics = GRAVE, from = {"ѓ", "ѕ", "ј", "љ", "њ", "ќ", "џ"}, to = {"д" .. a, "з" .. a, "и" .. a, "л" .. a, "н" .. a, "т" .. a, "ч" .. a}	}, }

m["ml"] = { "Malayalam", 36236,	"dra", {"Mlym"}, translit_module = "ml-translit", override_translit = true, }

m["mn"] = { "Mongolian", 9246,	"xgn", {"Cyrl", "Mong", "Latn"}, ancestors = {"cmg"}, translit_module = "mn-translit", override_translit = true, entry_name = {remove_diacritics = GRAVE .. ACUTE}, sort_key = { remove_diacritics = GRAVE, from = {"ё", "ө", "ү"}, to = {"е" .. a, "о" .. a, "у" .. a}	}, standardChars = "A-PR-UX-Za-pr-ux-zÇÖÜçöüŞşƟƵƶɵЁА-ШЫ-шы-яёҮүӨө—᠊-᠙ᠠ-ᡂ" .. PUNCTUATION, }

-- "mo" IS TREATED AS "ro", SEE WT:LT

m["mr"] = { "Marathi", 1571,	"inc-sou", {"Deva", "Modi"}, ancestors = {"omr"}, translit_module = "translit-redirect", entry_name = { from = {"च़", "ज़", "झ़"}, to  = {"च", "ज", "झ"} }, }

m["ms"] = { "Malay", 9237,	"poz-mly", {"Latn", "ms-Arab"}, }

m["mt"] = { "Maltese", 9166,	"sem-arb", Latn, ancestors = {"sqr"}, sort_key = { from = { "([cgz])", -- Ensure "c" comes after "ċ", "g" comes after "ġ" and "z" comes after "ż". "g" .. a .. "ħ", -- After initial conversion of "g". "ċ", "ġ", "ħ", "ie", "ż" },		to = { "%1" .. a, "g" .. b, "c", "g", "h" .. a, "i" .. a, "z" }	}, }

m["my"] = { "Burmese", 9228,	"tbq-brm", {"Mymr"}, ancestors = {"obr"}, translit_module = "my-translit", override_translit = true, sort_key = { from = {"ျ", "ြ", "ွ", "ှ", "ဿ"}, to  = {"္ယ", "္ရ", "္ဝ", "္ဟ", "သ္သ"} }, }

m["na"] = { "Nauruan", 13307,	"poz-mic", Latn, }

m["nb"] = { "Norwegian Bokmål", 25167,	"gmq", Latn, ancestors = {"gmq-mno"}, sort_key = "no-sortkey", wikimedia_codes = {"no"}, }

m["nd"] = { "Northern Ndebele", 35613,	"bnt-ngu", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. CARON}, }

m["ne"] = { "Nepali", 33823,	"inc-pah", {"Deva", "Newa"}, translit_module = "ne-translit", }

m["ng"] = { "Ndonga", 33900,	"bnt-ova", Latn, }

m["nl"] = { "Dutch", 7411,	"gmw", Latn, ancestors = {"dum"}, sort_key = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. TILDE .. DIAER .. RINGABOVE .. CEDILLA .. "'"},	standardChars = "A-Za-z0-9" .. PUNCTUATION .. u(0x2800) .. "-" .. u(0x28FF), }

m["nn"] = { "Norwegian Nynorsk", 25164,	"gmq", Latn, ancestors = {"gmq-mno"}, sort_key = "no-sortkey", }

m["no"] = { "Norwegian", 9043,	"gmq", Latn, ancestors = {"gmq-mno"}, sort_key = "no-sortkey", }

m["nr"] = { "Southern Ndebele", 36785,	"bnt-ngu", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. CARON}, }

m["nv"] = { "Navajo", 13310,	"apa", Latn, sort_key = { remove_diacritics = ACUTE .. OGONEK, from = { "chʼ", "tłʼ", "tsʼ", -- 3 chars "ch", "dl", "dz", "gh", "hw", "kʼ", "kw", "sh", "tł", "ts", "zh", -- 2 chars "ł", "ʼ" -- 1 char },		to = { "c" .. b, "t" .. b, "t" .. d, "c" .. a, "d" .. a, "d" .. b, "g" .. a, "h" .. a, "k" .. a, "k" .. b, "s" .. a, "t" .. a, "t" .. c, "z" .. a, "l" .. a, "z" .. b		} }, }

m["ny"] = { "Chichewa", 33273,	"bnt-nys", Latn, entry_name = {remove_diacritics = ACUTE .. CIRC}, sort_key = { from = {"ng'"}, to = {"ng"} }, }

m["oc"] = { "Occitan", 14185,	"roa-ocr", {"Latn", "Hebr"}, ancestors = {"pro"}, sort_key = { remove_diacritics = GRAVE .. ACUTE .. DIAER .. CEDILLA, from = {"([lns])·h"}, to = {"%1h"} }, }

m["oj"] = { "Ojibwe", 33875,	"alg", CansLatn, sort_key = { from = {"aa", "ʼ", "ii", "oo", "sh", "zh"}, to = {"a" .. a, "h" .. a, "i" .. a, "o" .. a, "s" .. a, "z" .. a}	}, }

m["om"] = { "Oromo", 33864,	"cus-eas", {"Latn", "Ethi"}, }

m["or"] = { "Oriya", 33810,	"inc-eas", {"Orya"}, ancestors = {"inc-mor"}, translit_module = "or-translit", }

m["os"] = { "Ossetian", 33968,	"xsc", CyrlGeorLatn, ancestors = {"oos"}, translit_module = "os-translit", override_translit = true, entry_name = {remove_diacritics = GRAVE .. ACUTE}, sort_key = { from = {"ӕ", "гъ", "дж", "дз", "ё", "къ", "пъ", "тъ", "хъ", "цъ", "чъ"}, to = {"а" .. a, "г" .. a, "д" .. a, "д" .. b, "е" .. a, "к" .. a, "п" .. a, "т" .. a, "х" .. a, "ц" .. a, "ч" .. a}	}, }

m["pa"] = { "Punjabi", 58635,	"inc-pan", {"Guru", "pa-Arab"}, ancestors = {"inc-opa"}, translit_module = "translit-redirect", entry_name = { remove_diacritics = FATHATAN .. DAMMATAN .. KASRATAN .. FATHA .. DAMMA .. KASRA .. SHADDA .. SUKUN .. NUNGHUNNA, from = {u(0x0768), u(0x08C7)}, to  = {"ن", "ل"} }, }

m["pi"] = { "Pali", 36727,	"inc-mid", {"Latn", "Brah", "Deva", "Beng", "Sinh", "Mymr", "Thai", "Lana", "Laoo", "Khmr"}, ancestors = {"sa"}, translit_module = "translit-redirect", entry_name = { from = {"ึ", u(0xF700), u(0xF70F), u(0xFE00)}, to  = {"ิํ", "ฐ", "ญ"} },	sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṭ", "([เโ])([ก-ฮ])", "([ເໂ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to  = {"a~", "i~", "u~", "d~", "l~", "m~", "n~", "n", "n", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"} }, }

m["pl"] = { "Polish", 809,	"zlw-lch", Latn, ancestors = {"zlw-opl"}, sort_key = { from = {"ą", "ć", "ę", "ł", "ń", "ó", "ś", "ź", "ż"}, to = {"a" .. a, "c" .. a, "e" .. a, "l" .. a, "n" .. a, "o" .. a, "s" .. a, "z" .. a, "z" .. b}	}, }

m["ps"] = { "Pashto", 58680,	"ira-pat", {"ps-Arab"}, }

m["pt"] = { "Portuguese", 5146,	"roa-ibe", LatnBrai, ancestors = {"roa-opt"}, sort_key = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. TILDE .. DIAER .. CEDILLA}, }

m["qu"] = { "Quechua", 5218,	"qwe", Latn, }

m["rm"] = { "Romansch", 13199,	"roa-rhe", Latn, }

m["ro"] = { "Romanian", 7913,	"roa-eas", {"Latn", "Cyrl"}, sort_key = { from = { "ă", "â", "î", "ș", "ț", -- Latin "ӂ" -- Cyrillic },		to = { "a" .. a, "a" .. b, "i" .. a, "s" .. a, "t" .. a, "ж" .. a		} }, }

m["ru"] = { "Russian", 7737,	"zle", {"Cyrl", "Brai"}, translit_module = "ru-translit", entry_name = { remove_diacritics = GRAVE .. ACUTE .. DIAER, remove_exceptions = {"Ё", "ё"} },	sort_key = { from = {"ё", "і", "ѣ", "ѳ", "ѵ"}, to = {"е" .. a, "и" .. a, "ь" .. a, "я" .. a, "я" .. b}	}, standardChars = "ЁА-яё0-9—" .. PUNCTUATION, }

m["rw"] = { "Rwanda-Rundi", 3217514,	"bnt-glb", Latn, entry_name = {remove_diacritics = ACUTE .. CIRC .. MACRON .. CARON}, }

m["sa"] = { "Sanskrit", 11059,	"inc-old", {"Deva", "Bali", "as-Beng", "Beng", "Bhks", "Brah", "Gran", "Gujr", "Guru", "Hani", "Java", "Kawi", "Khar", "Khmr", "Knda", "Lana", "Laoo", "Marc", "Mlym", "Modi", "Mong", "mnc-Mong", "xwo-Mong", "Mymr", "Nand", "Newa", "Orya", "Phag", "Ranj", "Saur", "Shrd", "Sidd", "Sinh", "Taml", "Tang", "Telu", "Thai", "Tibt", "Tirh"}, translit_module = "translit-redirect", entry_name = { from = {"ึ", u(0xF700), u(0xF70F), "ༀ", "༌", "།།", "༚༚", "༚༝", "༝༚", "༝༝", "ཷ", "ཹ", "ེེ", "ོོ", u(0xFE00)}, to  = {"ิํ", "ฐ", "ญ", "ཨོཾ", "་", "༎", "༛", "༟", "࿎", "༞", "ྲཱྀ", "ླཱྀ", "ཻ", "ཽ"} },	sort_key = { from = {"ā", "ī", "ū", "ḍ", "ḷ", "ḹ", "[ṁṃ]", "ṅ", "ñ", "ṇ", "ṛ", "ṝ", "ś", "ṣ", "ṭ", "([เโไ])([ก-ฮ])", "([ເໂໄ])([ກ-ຮ])", "ᩔ", "ᩕ", "ᩖ", "ᩘ", "([ᨭ-ᨱ])ᩛ", "([ᨷ-ᨾ])ᩛ", "ᩤ", u(0xFE00), u(0x200D)}, to  = {"a~", "i~", "u~", "d~", "l~", "l", "m~", "n~", "n", "n", "r~", "r", "s~", "s", "t~", "%2%1", "%2%1", "ᩈ᩠ᩈ", "᩠ᩁ", "᩠ᩃ", "ᨦ᩠", "%1᩠ᨮ", "%1᩠ᨻ", "ᩣ"} }, }

m["sc"] = { "Sardinian", 33976,	"roa", Latn, }

m["sd"] = { "Sindhi", 33997,	"inc-snd", {"sd-Arab", "Deva", "Sind", "Khoj"}, translit_module = "translit-redirect", entry_name = { remove_diacritics = KASHIDA .. FATHATAN .. DAMMATAN .. KASRATAN .. FATHA .. DAMMA .. KASRA .. SHADDA .. SUKUN .. SUPERALEF, from = {u(0x0671)}, to  = {u(0x0627)} },	ancestors = {"inc-vra"}, }

m["se"] = { "Northern Sami", 33947,	"smi", Latn, entry_name = {remove_diacritics = MACRON .. DOTBELOW .. "ˈ"}, sort_key = { from = {"á", "č", "đ", "ŋ", "š", "ŧ", "ž"}, to = {"a" .. a, "c" .. a, "d" .. a, "n" .. a, "s" .. a, "t" .. a, "z" .. a}	}, standardChars = "A-PR-VZa-pr-vz0-9ÁáČčĐđŊŋŠšŦŧŽž" .. PUNCTUATION, }

m["sg"] = { "Sango", 33954,	"crp", Latn, ancestors = {"ngb"}, }

m["sh"] = { "Serbo-Croatian", 9301,	"zls", {"Latn", "Cyrl", "Glag"}, entry_name = { remove_diacritics = GRAVE .. ACUTE .. TILDE .. MACRON .. DGRAVE .. INVBREVE, remove_exceptions = {"Ć", "ć", "З́", "з́", "С́", "с́"} -- Latin Ć and Cyrillic С́ },	sort_key = { from = { "č", "ć", "dž", "đ", "lj", "nj", "š", "ś", "ž", "ź", -- Latin "ђ", "з́", "ј", "љ", "њ", "с́", "ћ", "џ" -- Cyrillic },		to = { "c" .. a, "c" .. b, "d" .. a, "d" .. b, "l" .. a, "n" .. a, "s" .. a, "s" .. b, "z" .. a, "z" .. b, "д" .. a, "з" .. a, "и" .. a, "л" .. a, "н" .. a, "с" .. a, "т" .. a, "ч" .. a		} },	wikimedia_codes = {"sh", "bs", "hr", "sr"}, }

m["si"] = { "Sinhalese", 13267,	"inc-ins", {"Sinh"}, ancestors = {"elu-prk"}, translit_module = "si-translit", override_translit = true, }

m["sk"] = { "Slovak", 9058,	"zlw", Latn, sort_key = {remove_diacritics = ACUTE .. CIRC .. DIAER}, }

m["sl"] = { "Slovene", 9063,	"zls", Latn, entry_name = { remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. DGRAVE .. INVBREVE .. DOTBELOW, from = {"Ə", "ə", "Ł", "ł"}, to  = {"E", "e", "L", "l"} },	sort_key = { remove_diacritics = TILDE .. DOTABOVE .. DIAER .. RINGABOVE .. RINGBELOW .. OGONEK, from = {"č", "š", "ž"}, to = {"c" .. a, "s" .. a, "z" .. a}	}, }

m["sm"] = { "Samoan", 34011,	"poz-pnp", Latn, }

m["sn"] = { "Shona", 34004,	"bnt-sho", Latn, entry_name = {remove_diacritics = ACUTE}, }

m["so"] = { "Somali", 13275,	"cus-eas", {"Latn", "Arab", "Osma"}, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC}, }

m["sq"] = { "Albanian", 8748,	"sqj", {"Latn", "Grek", "Elba"}, entry_name = {remove_diacritics = ACUTE}, sort_key = {remove_diacritics = CIRC .. TILDE .. DIAER .. CEDILLA}, }

m["ss"] = { "Swazi", 34014,	"bnt-ngu", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. CARON}, }

m["st"] = { "Sotho", 34340,	"bnt-sts", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. CARON}, }

m["su"] = { "Sundanese", 34002,	"poz-msa", {"Latn", "Sund"}, ancestors = {"osn"}, translit_module = "su-translit", }

m["sv"] = { "Swedish", 9027,	"gmq", Latn, ancestors = {"gmq-osw"}, }

m["sw"] = { "Swahili", 7838,	"bnt-swh", LatnArab, sort_key = { from = {"ng'"}, to = {"ng" .. a}	}, }

m["ta"] = { "Tamil", 5885,	"dra", {"Taml"}, ancestors = {"oty"}, translit_module = "ta-translit", override_translit = true, }

m["te"] = { "Telugu", 8097,	"dra", {"Telu"}, translit_module = "te-translit", override_translit = true, }

m["tg"] = { "Tajik", 9260,	"ira-swi", {"Cyrl", "fa-Arab", "Latn"}, ancestors = {"pal"}, -- same as "fa", see WT:T:AFA translit_module = "tg-translit", override_translit = true, entry_name = {remove_diacritics = GRAVE .. ACUTE}, sort_key = { from = {"ғ", "ё", "ӣ", "қ", "ӯ", "ҳ", "ҷ"}, to = {"г" .. a, "е" .. a, "и" .. a, "к" .. a, "у" .. a, "х" .. a, "ч" .. a}	}, }

m["th"] = { "Thai", 9217,	"tai-swe", {"Thai", "Brai"}, translit_module = "th-translit", sort_key = "Thai-sortkey", }

m["ti"] = { "Tigrinya", 34124,	"sem-eth", Ethi, translit_module = "Ethi-translit", }

m["tk"] = { "Turkmen", 9267,	"trk-ogz", {"Latn", "Cyrl", "Arab"}, entry_name = {remove_diacritics = MACRON}, sort_key = { from = { "ç", "ä", "ž", "ň", "ö", "ş", "ü", "ý", -- Latin "ё", "җ", "ң", "ө", "ү", "ә" -- Cyrillic },		to = { "c" .. a, "e" .. a, "j" .. a, "n" .. a, "o" .. a, "s" .. a, "u" .. a, "y" .. a, "е" .. a, "ж" .. a, "н" .. a, "о" .. a, "у" .. a, "э" .. a		} }, }

m["tl"] = { "Tagalog", 34057,	"phi", {"Latn", "Tglg"}, translit_module = "tl-translit", override_translit = true, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC}, }

m["tn"] = { "Tswana", 34137,	"bnt-sts", Latn, }

m["to"] = { "Tongan", 34094,	"poz-pol", Latn, entry_name = {remove_diacritics = ACUTE}, sort_key = {remove_diacritics = MACRON}, }

m["tr"] = { "Turkish", 256,	"trk-ogz", Latn, ancestors = {"ota"}, dotted_dotless_i = true, sort_key = { from = { "i", -- Ensure "i" comes after "ı". "ç", "ğ", "ı", "ö", "ş", "ü" },		to = { "i" .. a, "c" .. a, "g" .. a, "i", "o" .. a, "s" .. a, "u" .. a		} }, }

m["ts"] = { "Tsonga", 34327,	"bnt-tsr", Latn, }

m["tt"] = { "Tatar", 25285,	"trk-kbu", {"Cyrl", "Latn", "tt-Arab"}, translit_module = "tt-translit", override_translit = true, dotted_dotless_i = true, sort_key = { from = { "i", -- Ensure "i" comes after "ı". "ä", "ə", "ç", "ğ", "ı", "ñ", "ŋ", "ö", "ɵ", "ş", "ü", -- Latin "ә", "ў", "ғ", "ё", "җ", "қ", "ң", "ө", "ү", "һ" -- Cyrillic },		to = { "i" .. a, "a" .. a, "a" .. b, "c" .. a, "g" .. a, "i", "n" .. a, "n" .. b, "o" .. a, "o" .. b, "s" .. a, "u" .. a, "а" .. a, "в" .. a, "г" .. a, "е" .. a, "ж" .. a, "к" .. a, "н" .. a, "о" .. a, "у" .. a, "х" .. a		} }, }

-- "tw" IS TREATED AS "ak", SEE WT:LT

m["ty"] = { "Tahitian", 34128,	"poz-pep", Latn, }

m["ug"] = { "Uyghur", 13263,	"trk-kar", {"ug-Arab", "Latn", "Cyrl"}, ancestors = {"chg"}, translit_module = "ug-translit", override_translit = true, }

m["uk"] = { "Ukrainian", 8798,	"zle", Cyrl, ancestors = {"zle-ort"}, translit_module = "uk-translit", entry_name = {remove_diacritics = GRAVE .. ACUTE}, sort_key = { from = {"ґ", "є", "і", "ї"}, to = {"г" .. a, "е" .. a, "и" .. a, "и" .. b}	}, standardChars = "ЄІЇА-ЩЬЮ-щьюяєії" .. PUNCTUATION, }

m["ur"] = { "Urdu", 1617,	"inc-hnd", {"ur-Arab"}, ancestors = {"inc-ohi"}, entry_name = {remove_diacritics = KASHIDA .. FATHATAN .. DAMMATAN .. KASRATAN .. FATHA .. DAMMA .. KASRA .. SHADDA .. SUKUN .. NUNGHUNNA}, -- put Judeo-Urdu (Hebrew-script Urdu) under the category header -- U+FB21 HEBREW LETTER WIDE ALEF so that it sorts after Arabic script titles sort_key = { from = {"^%f[" .. u(0x5D0) .. "-" .. u(0x5EA) .. "]"},		to  = {u(0xFB21)}, }, }

m["uz"] = { "Uzbek", 9264,	"trk-kar", LatnCyrlfaArab, ancestors = {"chg"}, translit_module = "uz-translit", sort_key = { from = { "oʻ", "gʻ", "sh", "ch", "ng", -- Latin "ё", "ў", "қ", "ғ", "ҳ" -- Cyrillic },		to = { "z" .. a, "z" .. b, "z" .. c, "z" .. d, "z" .. e, "е" .. a, "я" .. a, "я" .. b, "я" .. c, "я" .. d		} }, }

m["ve"] = { "Venda", 32704,	"bnt-bso", Latn, }

m["vi"] = { "Vietnamese", 9199,	"mkh-vie", LatnHani, ancestors = {"mkh-mvi"}, sort_key = "vi-sortkey", }

m["vo"] = { "Volapük", 36986,	"art", Latn, }

m["wa"] = { "Walloon", 34219,	"roa-oil", Latn, ancestors = {"fro"}, sort_key = "fr-sortkey", }

m["wo"] = { "Wolof", 34257,	"alv-fwo", LatnArab, }

m["xh"] = { "Xhosa", 13218,	"bnt-ngu", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. CARON}, }

m["yi"] = { "Yiddish", 8641,	"gmw", {"Hebr"}, ancestors = {"gmh"}, translit_module = "yi-translit", sort_key = { from = {"[אַאָ]", "בּ", "[וֹוּ]", "יִ", "ײַ", "פֿ"}, to = {"א", "ב", "ו", "י",	"יי", "פ"} }, }

m["yo"] = { "Yoruba", 34311,	"alv-yor", LatnArab, entry_name = {remove_diacritics = GRAVE .. ACUTE .. MACRON}, sort_key = { from = {"ẹ", "gb", "ọ", "ṣ"}, to = {"e" .. a, "g" .. a, "o" .. a, "s" .. a}	}, }

m["za"] = { "Zhuang", 13216,	"tai", LatnHani, sort_key = "za-sortkey", }

m["zh"] = { "Chinese", 7850,	"zhx", {"Hant", "Hans", "Hani", "Latn", "Brai", "Nshu"}, ancestors = {"ltc"}, sort_key = "zh-sortkey", }

m["zu"] = { "Zulu", 10179,	"bnt-ngu", Latn, entry_name = {remove_diacritics = GRAVE .. ACUTE .. CIRC .. MACRON .. CARON}, }

return m