Module:Lang

--[=[

Lua support for the and  templates and replacement of various supporting templates.

]=]

require('Module:No globals'); local p = {};

local getArgs = require ('Module:Arguments').getArgs; local lang_name_table = require ('Module:Language/name/data');

local lang_data = mw.loadData ('Module:Lang/data');							-- language name override and transliteration tool-tip tables

local namespace = mw.title.getCurrentTitle.namespace;							-- used for categorization

--[[--< I S _ S E T >--

Returns true if argument is set; false otherwise. Argument is 'set' when it exists (not nil) or when it is not an empty string.

]]

local function is_set( var ) return not (var == nil or var == ''); end

--[[--< I N _ A R R A Y >--

Whether needle is in haystack

]]

local function in_array( needle, haystack ) if needle == nil then return false; end for n,v in ipairs( haystack ) do		if v == needle then return n;		end end return false; end

--[[--< F O R M A T _ I E T F _ T A G >

prettify ietf tags to use recommended subtag formats: code: lower case script: sentence case region: upper case variant: lower case

]]

local function format_ietf_tag (code, script, region, variant) local out = {}; local c;	table.insert (out, code:lower); if is_set (script) then c = script:match ('^%a'):upper;										-- make script sentence case script = script:lower:gsub ('^%a', c, 1); table.insert (out, script); end

if is_set (region) then table.insert (out, region:upper); end if is_set (variant) then table.insert (out, variant:lower); end return table.concat (out, '-'); end

--[[--< G E T _ I E T F _ P A R T S >--

extracts and returns IETF language tag parts: primary language subtag (required) - 2 or 3 character IANA language code script subtag - four character IANA script code region subtag - two-letter or three digit IANA region code variant subtag - four digit or 5-8 alnum variant code

in any one of these forms lang					lang-variant lang-script				lang-script-variant lang-region				lang-region-variant lang-script-region		lang-script-region-variant each of lang, script, region, and variant, when used, must be valid

returns four values. Valid parts are returned as themselves; omitted parts are returned as empty strings, invalid parts are returned as nil.

see http://www.rfc-editor.org/rfc/bcp/bcp47.txt section 2.1

]]

local function get_ietf_parts (source) local code; local script = ''; local region = ''; local variant = ''; local c;	if not is_set (source) then return nil, nil, nil, nil; end if source:match ('^%a+%-%a%a%a%a%-%a%a%-(%d%d%d%d+$') then												-- ll-Ssss-RR-variant (where variant is 4 digits)		code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-(%d%d%d%d)$');	elseif source:match ('^%a+%-%a%a%a%a%-%d%d%d%-(%d%d%d%d+$') then										-- ll-Ssss-DDD-variant (where region is 3 digits; variant is 4 digits) code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-(%d%d%d%d)$'); elseif source:match ('^%a+%-%a%a%a%a%-%a%a%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then						-- ll-Ssss-RR-variant (where variant is 5-8 alnum characters) code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$'); elseif source:match ('^%a+%-%a%a%a%a%-%d%d%d%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then						-- ll-Ssss-DDD-variant (where region is 3 digits; variant is 5-8 alnum characters) code, script, region, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');

elseif source:match ('^%a+%-%a%a%a%a%-(%d%d%d%d+$') then												-- ll-Ssss-variant (where variant is 4 digits)		code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d%d)$');	elseif source:match ('^%a+%-%a%a%a%a%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then							-- ll-Ssss-variant (where variant is 5-8 alnum characters)		code, script, variant = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');

elseif source:match ('^%a+%-%a%a%-(%d%d%d%d+$') then													-- ll-RR-variant (where variant is 4 digits)		code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-(%d%d%d%d)$');	elseif source:match ('^%a+%-%d%d%d%-(%d%d%d%d+$') then													-- ll-DDD-variant (where region is 3 digits; variant is 4 digits) code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-(%d%d%d%d)$'); elseif source:match ('^%a+%-%a%a%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then								-- ll-RR-variant (where variant is 5-8 alnum characters) code, region, variant = source:match ('^(%a%a%a?)%-(%a%a)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$'); elseif source:match ('^%a+%-%d%d%d%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then								-- ll-DDD-variant (where region is 3 digits; variant is 4 digits) code, region, variant = source:match ('^(%a%a%a?)%-(%d%d%d)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');

elseif source:match ('^%a+%-(%d%d%d%d+)$') then								-- ll-variant (where variant is 4 digits) code, variant = source:match ('^(%a%a%a?)%-(%d%d%d%d)$'); elseif source:match ('^%a+%-[%a%d][%a%d][%a%d][%a%d][%a%d]+$') then			-- ll-variant (where variant is 5-8 alnum characters) code, variant = source:match ('^(%a%a%a?)%-([%a%d][%a%d][%a%d][%a%d][%a%d][%a%d]?[%a%d]?[%a%d]?)$');

elseif source:match ('^%a+%-%a%a%a%a%-%a%a$') then							-- ll-Ssss-RR code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%a%a)$'); elseif source:match ('^%a+%-%a%a%a%a%-%d%d%d$') then						-- ll-Ssss-DDD (region is 3 digits) code, script, region = source:match ('^(%a%a%a?)%-(%a%a%a%a)%-(%d%d%d)$');

elseif source:match ('^%a+%-%a%a%a%a$') then								-- ll-Ssss code, script = source:match ('^(%a%a%a?)%-(%a%a%a%a)$');

elseif source:match ('^%a+%-%a%a$') then									-- ll-RR code, region = source:match ('^(%a%a%a?)%-(%a%a)$'); elseif source:match ('^%a+%-%d%d%d$') then									-- ll-DDD (region is 3 digits) code, region = source:match ('^(%a%a%a?)%-(%d%d%d)$');

elseif source:match ('^%a+$') then											-- ll		code = source:match ('^(%a%a%a?)$');

else return nil, nil, nil, nil;												-- don't know what we got but it is malformed end code = code:lower;														-- ensure that we use and return lower case version of this if not (lang_data.override[code] or lang_name_table.lang[code]) then return nil, nil, nil, nil;												-- invalid language code, don't know about the others (don't care?) end if is_set (script) then if not lang_name_table.script[script:lower] then return code, nil, nil, nil;											-- language code ok, invalid script, don't know about the others (don't care?) end end if is_set (region) then if not lang_name_table.region[region:lower] then return code, script, nil, nil; end end if is_set (variant) then if not lang_name_table.variant[variant:lower] then return code, script, region, nil; end if not in_array (code, lang_name_table.variant[variant:lower]['prefixes']) then return code, script, region, nil; end end

return code, script, region, variant;										-- return the good bits end

--[=[-< M A K E _ E R R O R _ M S G >--

]=]

local function make_error_msg (msg, nocat) local out = {}; table.insert (out, ' error: '); table.insert (out, msg); table.insert (out, ' (help)') table.insert (out, ' '); --	if (0 == namespace) and not is_set (nocat) then								-- only categorize in article space table.insert (out, ''); --	end

return table.concat (out); end

--[=[-< M A K E _ W I K I L I N K >

Makes a wikilink; when both link and display text is provided, returns a wikilink in the form L|D; if only link is provided, returns a wikilink in the form L; if neither are provided or link is omitted, returns an empty string.

]=]

local function make_wikilink (link, display) if is_set (link) then if is_set (display) then return table.concat ({, display, }); else return table.concat ({, link, }); end else return ''; end end

--[[--< M A K E _ T E X T _ S P A N >--

]]

local function make_text_span (code, text, rtl, italic, size) local span = {};

table.insert (span, '');													-- close the opening span tag if 'yes' == italic then table.insert (span, table.concat ({"", text, ""}));					-- text with italic markup else table.insert (span, text);												-- DEFAULT: text is not italicized end table.insert (span, ' ');												-- close the span if 'yes' == rtl then table.insert (span, '&lrm;');											-- make sure the browser knows that we're at the end of the rtl end return table.concat (span);													-- put it all together and done end

--[[--< M A K E _ C A T E G O R Y >

]]

local function make_category (code, language_name, nocat) local cat = {}; if (0 ~= namespace) or nocat then											-- only categorize in article space return '';																-- return empty string for concatenation end table.insert (cat, '[[Category:Articles containing ');	if ('en' == code) or ('eng' == code) then		table.insert (cat, 'explicitly cited English');	elseif 'art' == code then		table.insert (cat, 'constructed')	else		table.insert (cat, language_name);	end	table.insert (cat, '-language text]]');

return table.concat (cat); end

--[[--< M A K E _ T R A N S L I T >

return translit ... else return empty string

The value |script= is not used in for this purpose; instead it uses |code. Because language scripts are listed in the switches they are included in the data tables. The script parameter is introduced at. If |script= is set, this function uses it in preference to code.

]]

local function make_translit (code, language_name, translit, std, script) local title; local tout = {}; local title_table = lang_data.translit_title_table;						-- table of transliteration standards and the language codes and scripts that apply to those standards table.insert (tout, "<span title=\"");	if not is_set (std) and not is_set (script) then							-- when neither standard nor script specified		table.insert (tout, language_name);										-- write a generic tool tip		table.insert (tout, ' transliteration');	elseif is_set (std) and is_set (script) then								-- when both are specified		if title_table[std][script] then										-- and legitimate			table.insert (tout, title_table[std][script]);						-- add the appropriate text to the tool tip		else			return ;															-- one or both invalid, set up for an error message		end	elseif is_set (std) then													-- script not set, use language code		if not title_table[std] then return ''; end								-- invalid standard, setupt for error message		if title_table[std][code] then			table.insert (tout, title_table[std][code]);		else																	-- code doesn't match			table.insert (tout, title_table[std]['default']);					-- so use the standard's default end else																		-- here if script set but std not set if title_table['NO_STD'][script] then table.insert (tout, title_table['NO_STD'][script]);					-- use script if set elseif title_table['NO_STD'][code] then table.insert (tout, title_table['NO_STD'][code]);					-- use language code else table.insert (tout, language_name);									-- write a generic tool tip table.insert (tout, ' transliteration'); end end

table.insert (tout, '" class="Unicode" style="white-space:normal; text-decoration:none">');	table.insert (tout, translit);	table.insert (tout, " ''");	return table.concat (tout); end

--[[--< L A N G >--


 * code = the BCP47 language code
 * text = the displayed text in language specified by code
 * rtl = boolean true identifies the language specified by code as a right-to-left language
 * size = css keyword appropriate for use with css font-size:
 * nocat = boolean true inhibits normal categorization; error categories are not affected

]]

function p.lang (frame) local args = getArgs(frame); local out = {}; local language_name; local code, script, region, variant = get_ietf_parts (args.code); if not (code and script and region and variant) then return make_error_msg (table.concat ({': unknown language code: ', args.code or 'missing'}), args.no_cat); end if not is_set (args.text) then return make_error_msg (': no text', args.no_cat); end

if not is_set (args.italic) then args.italic = 'no';														-- DEFAULT for templates is to not italicize end

if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then	-- italic but not bold or bold italic return make_error_msg (': text has italic markup', args.no_cat); end args.code = format_ietf_tag (code, script, region, variant);				-- format to recommended subtag styles

if lang_data.override[code] then language_name = lang_data.override[code][1] elseif lang_name_table.lang[code] then language_name = lang_name_table.lang[code][1];							-- table entries sometimes have multiple names, always take the first one end

table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size)); table.insert (out, make_category (args.code, language_name, args.nocat)); return table.concat (out);													-- put it all together and done end

--[[--< L A N G _ X X >


 * code = the BCP47 language code
 * text = the displayed text in language specified by code
 * link = boolean true (default) links language specified by code to associated language article
 * rtl = boolean true identifies the language specified by code as a right-to-left language
 * nocat = boolean true inhibits normal categorization; error categories are not affected
 * italic = boolean true (default) renders displayed text in italic font
 * lit = text that is a literal translation of text

for those templates that support transliteration:
 * translit = text that is a transliteration of text
 * std = the standard that applies to the transliteration
 * script = ISO 15924 script name; falls back to code

]]

function p.lang_xx (frame) local args = getArgs(frame); if not is_set (args.italic) then args.italic = 'yes';													-- DEFAULT for templates is to italicize end args.size = nil;															-- size not supported in 	local out = {}; local language_name; local code, script, region, variant = get_ietf_parts (args.code);

local translit_script; local translit; local translit_title; if not (code and script and region and variant) then return make_error_msg (table.concat ({': unknown language code: ', args.code or 'missing'}), args.no_cat); end if not is_set (args.text) then return make_error_msg (': no text', args.no_cat); end

if args.text:match ("%f[\']\'\'[^\']+\'\'%f[^\']") or args.text:match ("\'\'\'\'\'[^\']+\'\'\'\'\'") then	-- italic but not bold or bold italic return make_error_msg (': text has italic markup', args.no_cat); end args.code = format_ietf_tag (code, script, region, variant);				-- format to recommended subtag styles

if lang_data.override[code] then language_name = lang_data.override[code][1] elseif lang_name_table.lang[code] then language_name = lang_name_table.lang[code][1];							-- table entries sometimes have multiple names, always take the first one end

translit_script = args.script or language_name;								-- for translit prefer script over language

if 'no' == args.link then table.insert (out, language_name);										-- language name without wikilink else table.insert (out, make_wikilink (language_name .. ' language', language_name));	-- language name with wikilink end table.insert (out, ': ');													-- separator

table.insert (out, make_text_span (args.code, args.text, args.rtl, args.italic, args.size)) if is_set (args.translit) then												-- transliteration (not supported in ); not supported in all table.insert (out, ', '); translit_title = mw.title.makeTitle (0, 'Romanization of ' .. language_name) if translit_title.exists and ('no' ~= args.link) then table.insert (out, make_wikilink ('Romanization of ' .. translit_script or language_name, 'translit.')); else table.insert (out, 'translit. '); end table.insert (out, ' '); translit = make_translit (args.code, language_name, args.translit, args.std, args.script) if is_set (translit) then table.insert (out, translit); else return make_error_msg (table.concat ({': invalid translit std: \, args.std or 'missing', '\' or script: \, args.script or 'missing', '\''}), args.nocat); end end if is_set (args.lit) then													-- translation (not supported in ) table.insert (out, ', '); if 'no' == args.link then table.insert (out, 'lit. '); else table.insert (out, make_wikilink ('Literal translation', 'lit.')); end table.insert (out, " '"); table.insert (out, args.lit); table.insert (out, "'"); end table.insert (out, make_category (args.code, language_name, args.nocat)); return table.concat (out);													-- put it all together and done end

return p;