Module:WikidataIB

-- Module to implement use of a blacklist and whitelist for infobox fields -- can take a named parameter |qid which is the Wikidata ID for the article. This will not normally be used -- Fields in blacklist are never to be displayed, i.e. module must return nil in all circumstances -- Fields in whitelist return local value if it exists or the Wikidata value otherwise -- The name of the field that this function is called from is passed in named parameter |name -- The name is compulsory when blacklist or whitelist is used, so the module returns nil if it is not supplied -- blacklist is passed in named parameter |suppressfields -- whitelist is passed in named parameter |fetchwikidata

local p = {}

local i18n = {	["errors"] = {		["property-not-found"] = "Property not found.", ["entity-not-found"] = "Wikidata entity not found.", ["unknown-claim-type"] = "Unknown claim type.", ["unknown-entity-type"] = "Unknown entity type.", ["qualifier-not-found"] = "Qualifier not found.", ["site-not-found"] = "Wikimedia project not found.", ["unknown-datetime-format"] = "Unknown datetime format.", ["local-article-not-found"] = "Article is available on Wikidata, but not on Wikipedia" },	["months"] = {		"January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December" },	["century"] = "century", ["BC"] = "BC", ["BCE"] = "BCE", ["ordinal"] = {		[1] = "st", [2] = "nd", [3] = "rd", ["default"] = "th" },	["filespace"] = "File", ["editonwikidata"] = "Edit this on Wikidata", ["latestdatequalifier"] = function (date) return "before " .. date end, }

require("Module:i18n").loadI18n("Module:WikidataIB/i18n", i18n)

--- -- Private functions --- -- --- -- makeOrdinal needs to be internationalised along with the above: -- takes cardinal numer as a numeric and returns the ordinal as a string -- we need three exceptions in English for 1st, 2nd, 3rd, 21st, .. 31st, etc. local function makeOrdinal (cardinal) local ordsuffix = i18n.ordinal.default if cardinal % 10 == 1 then ordsuffix = i18n.ordinal[1] elseif cardinal % 10 == 2 then ordsuffix = i18n.ordinal[2] elseif cardinal % 10 == 3 then ordsuffix = i18n.ordinal[3] end -- In English, 1, 21, 31, etc. use 'st', but 11, 111, etc. use 'th' -- similarly for 12 and 13, etc.	if (cardinal % 100 == 11) or (cardinal % 100 == 12) or (cardinal % 100 == 13) then ordsuffix = i18n.ordinal.default end return tostring(cardinal) .. ordsuffix end

--- -- formatDate takes a datetime of the usual format from mw.wikibase.entity:formatPropertyValues -- like "1 August 30 BCE" as parameter 1 and formats it according to the df (date format) and bc parameters -- df = ["dmy" / "mdy" / "y"] default will be "dmy" -- bc = ["BC" / "BCE"] default will be "BCE" -- first the local version local format_Date = function(datetime, dateformat, bc) local datetime = datetime or "1 August 30 BCE" -- in case of nil value -- chop off multiple vales and/or any hours, mins, etc.	-- keep anything before punctuation - we just want a single date: local dateval = string.match( datetime, "[%w ]+")

local dateformat = string.lower(dateformat or "dmy") -- default to dmy

local bc = string.upper(bc or "") -- can't use nil for bc	-- we only want to accept two possibilities: BC or default to BCE if bc == "BC" then bc = " " .. i18n["BC"] -- prepend a non-breaking space. else bc = " " .. i18n["BCE"] end

local postchrist = true -- start by assuming no BCE local dateparts = {} for word in string.gmatch(dateval, "%w+") do		if word == "BCE" or word == "BC" then -- **internationalise later** postchrist = false else -- we'll keep the parts that are not 'BCE' in a table dateparts[#dateparts + 1] = word end end if postchrist then bc = "" end -- set AD dates to no suffix **internationalise later**

local sep = " " -- separator is nbsp local fdate = table.concat(dateparts, " ") -- formatted date defaults to same order as input

-- if we have day month year, check dateformat if #dateparts == 3 then if dateformat == "y" then fdate = dateparts[3] elseif dateformat == "mdy" then fdate = dateparts[2] .. sep .. dateparts[1] .. "," .. sep .. dateparts[3] end elseif #dateparts == 2 and dateformat == "y" then fdate = dateparts[2] end

return fdate .. bc end

--- -- parseParam takes a (string) parameter, e.g. from the list of frame arguments, -- and makes "false", "no", and "0" into the (boolean) false -- it makes the empty string and nil into the (boolean) value passed as default -- allowing the parameter to be true or false by default. local parseParam = function(param, default) if param and param ~= "" then param = param:lower if (param == "false") or (param == "no") or (param == "0") then return false else return true end else return default end end

--- -- The label in a Wikidata item is subject to vulnerabilities -- that an attacker might try to exploit. -- It needs to be 'sanitised' by removing any wikitext before use. -- If it doesn't exist, just return the id for the item local labelOrId = function (id) local label = mw.wikibase.label(id) if label then return mw.text.nowiki(label) else return id	end end

--- -- sourced takes a table representing a statement that may or may not have references -- it counts how many references are sourced to something not contianing the word "wikipedia" -- the reference string "ref" is available for debugging -- it returns a boolean = true if there are any sourced references. local sourced = function(claim) local refs = 0 if claim.references then for kr, vr in pairs(claim.references) do			local ref = mw.wikibase.renderSnaks(vr.snaks) if not ref:find("Wikipedia") then refs = refs + 1 end end end return refs > 0 end

-- parseInput processes the Q-id, the blacklist and the whitelist -- if an input parameter is supplied, it returns that and ends the call. -- it returns a boolean indicating whether or not the call should continue -- and an object containing all of the Wikidata for the Qid supplied or the current page local parseInput = function(frame, input_parm, property_id) -- There may be a local parameter supplied, if it's blank, set it to nil local input_parm = mw.text.trim(input_parm or "") if input_parm == "" then input_parm = nil end

-- can take a named parameter |qid which is the Wikidata ID for the article. -- This will not normally be used because it's an expensive call. local qid = frame.args.qid if qid == "" then qid = nil end

-- The blacklist is passed in named parameter |suppressfields local blacklist = frame.args.suppressfields

-- The whitelist is passed in named parameter |fetchwikidata local whitelist = frame.args.fetchwikidata

-- The name of the field that this function is called from is passed in named parameter |name local fieldname = frame.args.name if blacklist then -- The name is compulsory when blacklist is used, so return nil if it is not supplied if not fieldname or fieldname == "" then return false, nil, nil end -- If this field is on the blacklist, then return nil if blacklist:find(fieldname) then return false, nil, nil end end

-- If we got this far then we're not on the blacklist -- The blacklist overrides any locally supplied parameter as well -- If a non-blank input parameter was supplied return it	if input_parm then return false, input_parm, nil end

-- Otherwise see if this field is on the whitelist: if not (whitelist and (whitelist == 'ALL' or whitelist:find(fieldname))) then -- not on the whitelist so just return what should be a nil input parameter return false, input_parm, nil end

-- See what's on Wikidata: local entity = mw.wikibase.getEntityObject(qid) if entity and entity.claims then local props = entity.claims[property_id] if props and props[1] then return true, entity, props end end -- no property on Wikidata return false, input_parm, nil end

local function _getvalue(frame, entity, props, filter, propertyID) -- onlysourced is a boolean passed to return only values sourced to other than Wikipedia -- if nothing or an empty string is passed set it true -- if "false" or "no" or "0" is passed set it false local onlysrc = parseParam(frame.args.onlysourced, true)

-- noicon is a boolean passed to suppress the trailing "edit at Wikidata" icon -- for use when the value is processed further by the infobox -- if nothing or an empty string is passed set it false -- if "false" or "no" or "0" is passed set it false local noic = parseParam(frame.args.noicon, false)

-- wdlinks is a boolean passed to enable links to Wikidata when no article exists -- if nothing or an empty string is passed set it false -- if "false" or "no" or "0" is passed set it false local wdl = parseParam(frame.args.wdlinks, false)

-- sorted is a boolean passed to enable sorting of the values returned -- if nothing or an empty string is passed set it false -- if "false" or "no" or "0" is passed set it false local sorted = parseParam(frame.args.sorted, false)

-- separator is a string that is used to separate multiple returned values -- if nothing or an empty string is passed set it to the default -- any double-quotes " are stripped out, so that spaces may be passed	-- e.g. |sep=" - "	local sepdefault = ", " -- **internationalise later**	local separator = frame.args.sep or ""	separator = string.gsub(separator, '"', '') if #separator == 0 then separator = sepdefault end

-- list is a string that may be "", "hlist" or "ubl" -- this controls whether multiple values are output as comma-separated -- as a horizontal list (hlist) or unbulleted list (ubl) local list = frame.args.list or "" if list~="hlist" and list~="ubl" then list = "" end

-- prefix is a string that may be nil, empty (""), or a string of characters -- this is prefixed to each value -- useful when when multiple values are returned -- any double-quotes " are stripped out, so that spaces may be passed	local prefix = frame.args.prefix or ""	prefix = string.gsub(prefix, '"', '')

-- postfix is a string that may be nil, empty (""), or a string of characters -- this is postfixed to each value -- useful when when multiple values are returned -- any double-quotes " are stripped out, so that spaces may be passed	local postfix = frame.args.postfix or ""	postfix = string.gsub(postfix, '"', '')

-- linkprefix is a string that may be nil, empty (""), or a string of characters -- this creates a link and is then prefixed to each value -- useful when when multiple values are returned and indirect links are needed -- any double-quotes " are stripped out, so that spaces may be passed	local lprefix = frame.args.linkprefix or ""	lprefix = string.gsub(lprefix, '"', '')

-- linkpostfix is a string that may be nil, empty (""), or a string of characters -- this is postfixed to each value when linking is enabled with lprefix -- useful when when multiple values are returned -- any double-quotes " are stripped out, so that spaces may be passed	local lpostfix = frame.args.linkpostfix or ""	lpostfix = string.gsub(lpostfix, '"', '')

-- maxvals is a string that may be nil, empty (""), or a number -- this determines how many items may be returned when multiple values are available -- the behaviour is customisable for each different data type local maxvals = tonumber(frame.args.maxvals) or 0

-- So now we have something to return: deal with: -- (1) ["datatype"] = "wikibase-item"; -- (2) ["datatype"] = "time"; -- (3) ["datatype"] = "commonsMedia", --    ["datatype"] = "external-id", --    ["datatype"] = "string", --    ["datatype"] = "url" -- (4) ["datatype"] = "globe-coordinate", or anything else

local lang = mw.language.getContentLanguage.code local thisQid = entity.id	-- table 'out' is going to to store the return value(s): local out = {} local icon = " frameless |text-top |10px |alt=" .. i18n["editonwikidata"] .. "|link=https://www.wikidata.org/wiki/" .. thisQid .. "?uselang=" .. lang .. "#" .. propertyID .. "|" .. i18n["editonwikidata"] .. "" local datatype = props[1].mainsnak.datatype for k, v in pairs(props) do		local datavalue = v.mainsnak.datavalue datavalue = datavalue and datavalue.value if (not filter(v)) or (onlysrc and not sourced(v)) then -- do nothing; either isn't preferred when prefered values are requested, or isn't sourced when onlysourced=true elseif v.mainsnak.snaktype == "somevalue" then -- check for value is unknown out[#out + 1] = "Unknown" elseif v.mainsnak.snaktype == "novalue" then -- check for value is none -- out[#out + 1] = "No value" -- don't return a value for this -- data type is a wikibase item: elseif datatype == "wikibase-item" then -- it's wiki-linked value, so output as link if possible local qnumber = "Q" .. datavalue["numeric-id"] local sitelink = mw.wikibase.sitelink(qnumber) local label = labelOrId(qnumber) if sitelink then out[#out + 1] = "" .. label .. "" else -- no sitelink, so check first for a redirect with that label local artitle = mw.title.new(label, 0) if artitle.id > 0 then if artitle.isRedirect then -- no sitelink, but there's a redirect with the same title as the label -- let's link to that out[#out + 1] = "" .. label .. "" else -- no sitelink and not a redirect but an article exists with the same title as the label -- that's probably a dab page, so output the plain label out[#out + 1] = label end else -- no article or redirect with the same title as the label if wdl then -- show that there's a Wikidata entry available out[#out + 1] = "" .. label .. "  "					else -- no wikidata links required, so just give the plain label out[#out + 1] = label end end end -- If the property has a qualifier of latest date, add that in all cases: if v.qualifiers then local quals = v.qualifiers["P1326"] -- latest date qualifier if quals then out[#out] = out[#out] .. " (" .. i18n.latestdatequalifier(mw.wikibase.renderSnaks(quals)) .. ")" end end -- data type is time: elseif datatype == "time" then -- it's a date value, so output according to formatting preferences local timestamp = datavalue.time -- A year can be stored like this: "+1872-00-00T00:00:00Z", -- which is processed here as if it were the day before "+1872-01-01T00:00:00Z", -- and that's the last day of 1871, so the year is wrong. -- So fix the month 0, day 0 timestamp to become 1 January instead: timestamp = timestamp:gsub("%-00%-00T", "-01-01T") local dateprecision = datavalue.precision local fpvdate = tonumber(timestamp:sub(2, 5)) local fdate if dateprecision >= 9 then -- 9 is year precision local dateformat = "y" if dateprecision >= 10 then -- prepend month fpvdate = i18n.months[tonumber(timestamp:sub(7, 8))] .. " " .. fpvdate dateformat = frame.args.df					if dateprecision >= 11 then -- prepend day fpvdate = tonumber(timestamp:sub(10, 11)) .. " " .. fpvdate end if timestamp:sub(1, 1) == "-" then fpvdate = fpvdate .. " BCE" end end fdate = format_Date(fpvdate, dateformat, frame.args.bc) elseif dateprecision == 7 then -- century local century = math.floor((fpvdate - 1) / 100) + 1 fdate = makeOrdinal(century) .. " " .. i18n["century"] if timestamp:sub(1, 1) == "-" then -- date is BC					local bc = string.upper(frame.args.bc or "") -- can't use nil for bc					-- we only want to accept two possibilities: BC or default to BCE if bc == "BC" then fdate = fdate .. " " .. i18n["BC"] -- use non-breaking space. else fdate = fdate .. " " .. i18n["BCE"] end end else -- date precisions 0 to 6 (billion years to millenium) TODO: --			end out[#out+1] = fdate -- data types which are strings: elseif datatype == "commonsMedia" or datatype == "external-id" or datatype == "string" or datatype == "url" then -- commonsMedia or external-id or string or url -- all have mainsnak.datavalue.value as string if lprefix ~= "" then out[#out+1] = "" .. prefix .. datavalue .. postfix .. "" else out[#out+1] = prefix .. datavalue .. postfix end -- check for link requested -- data types which are quantities: elseif datatype == "quantity" then -- quantities have mainsnak.datavalue.value.amount and mainsnak.datavalue.value.unit -- the unit is of the form http://www.wikidata.org/entity/Q829073 -- convert amount to a number local amt local amount = tonumber(datavalue.amount) or "Not a number" -- check if upper and/or lower bounds are given and construct a string if so			local upb = tonumber(datavalue.upperBound) if upb then amt = amount .. " +" .. (upb - amount) end local lowb = tonumber(datavalue.lowerBound) if lowb then amt = (amt or amount) .. " -" .. (amount - lowb) end -- check if bounds are symmetrical and re-write string if so			if upb and lowb and ((upb - amount) == (amount - lowb)) then amt = amount .. " ±" .. (amount - lowb) end -- extract the qid in the form 'Qnnn' from the value.unit url and then fetch the label from that local unit = "" local unitqid = string.match( datavalue.unit, "(Q%d+)" ) if unitqid then unit = mw.wikibase.label( unitqid ) if unit then unit = " " .. unit end end out[#out+1] = (amt or amount) .. unit else -- some other data type -- e.g. globe-coordinate where mainsnak.datavalue.value is a table -- either write a specific handler -- or we can use formatPropertyValues as a temporary measure. This won't work with multiple valid values. out[#out+1] = entity:formatPropertyValues(propertyID).value break end -- of datatype/unknown value/sourced check if maxvals > 0 and #out >= maxvals then break end end -- of for each value loop

-- if there's anything to return, then return a list -- comma-separated by default, but may be specified by the sep parameter -- optionally specify a hlist or ubl if #out > 0 then if sorted then table.sort(out) end if list == "hlist" then return frame:expandTemplate{title = 'Hlist', args = out} elseif list == "ubl" then return frame:expandTemplate{title = 'Unbulleted list', args = out} else if noic then return table.concat(out, separator) else return table.concat(out, separator) .. icon end end else return nil -- no items had valid reference end end

--- -- Public functions --- -- -- getValue is used to get a value, or a comma separated list of them if multiple values exist --

p.getValue = function(frame) local propertyID = mw.text.trim(frame.args[1] or "")

local success, errorOrEntity, props = parseInput(frame, frame.args[2], propertyID) if not success then return errorOrEntity end local function filter(claim) return true end return _getvalue(frame, errorOrEntity, props, filter, propertyID) end

--- -- getPreferredValue is used to get a value, or a comma separated list of them if multiple values exist -- If preferred ranks are set, it will return those values, otherwise values with normal ranks -- p.getPreferredValue = function(frame) local propertyID = mw.text.trim(frame.args[1] or "")

local success, errorOrEntity, props = parseInput(frame, frame.args[2], propertyID) if not success then return errorOrEntity end local prefflag = false for k, v in pairs(props) do		if v.rank == "preferred" then prefflag = true break end end local function filter(claim) return claim.rank == "preferred" or prefflag == false end return _getvalue(frame, errorOrEntity, props, filter, propertyID) end

--- -- getSourcedValue is used to get a value, or a comma separated list of them if multiple values exist -- but only values that are sourced are returned -- redundant to getValue with onlysourced=true but kept for backwards compatibility -- now defined via getValue -- p.getSourcedValue = function(frame) frame.args.onlysourced = "yes" return p.getValue(frame) end

--- -- getCoords is used to get coordinates for display in an infobox -- whitelist and blacklist are implemented -- optional 'display' parameter is allowed, defaults to "inline, title" -- p.getCoords = function(frame) local propertyID = "P625"

-- if there is a 'display' parameter supplied, use it	-- otherwise default to "inline, title" local disp = frame.args.display if not disp or disp == "" then disp = "inline, title" end

local success, errorOrEntity = parseInput(frame, frame.args[1], propertyID) if not success then return errorOrEntity else local entity = errorOrEntity local lat_long = {} local coords = entity:formatPropertyValues(propertyID).value -- the latitude and longitude are returned like this: nn°nn&#39;nn.n&#34; -- using html entities with hex values really screws up parsing the numbers - thanks devs local lat = mw.ustring.match(coords, "^[^,]*") -- everything from the start to before the comma local long = mw.ustring.match(coords, "[^ ]*$") -- everything from after the space to the end lat = lat:gsub("&#%d%d;", ":")                 -- clean out the html entities long = long:gsub("&#%d%d;", ":")               -- clean out the html entities -- read the latitude numbers into a table for num in mw.ustring.gmatch(lat, "%d+%.?%d*") do			lat_long[#lat_long + 1] = num end -- add the N/S lat_long[#lat_long + 1] = lat:sub(-1) -- read the longitude numbers into a table for num in mw.ustring.gmatch(long, "%d+%.?%d*") do			lat_long[#lat_long + 1] = num end -- add E/W for long lat_long[#lat_long + 1] = long:sub(-1) -- add named parameter for display lat_long["display"] = disp -- invoke template Coord with the values stored in the table return frame:expandTemplate{title = 'coord', args = lat_long} end end

--- -- getQualifierValue is used to get a formatted value of a qualifier -- -- The call needs:	a property (the unnamed parameter or 1=) -- 					a target value for that property (pval=) --					a qualifier for that target value (qual=) -- The usual whitelisting and blacklisting of the property is implemented -- The boolean onlysourced= parameter can be set to return nothing -- when the property is unsourced (or only sourced to Wikipedia) -- p.getQualifierValue = function(frame) local propertyID = mw.text.trim(frame.args[1] or "")

-- The PropertyID of the target value of the property -- whose qualifier is to be returned is passed in named parameter |pval= local propvalue = frame.args.pval

-- The PropertyID of the qualifier -- whose value is to be returned is passed in named parameter |qual= local qualifierID = frame.args.qual

-- onlysourced is a boolean passed to return qualifiers -- only when property values are sourced to something other than Wikipedia -- if nothing or an empty string is passed set it false -- if "false" or "no" or 0 is passed set it false local onlysrc = parseParam(frame.args.onlysourced, false)

local success, errorOrEntity, props = parseInput(frame, frame.args[2], propertyID) if not success then return errorOrEntity else local entity = errorOrEntity -- Scan through the values of the property -- we want something like property is P793, significant event (in propertyID) -- whose value is something like Q385378, construction (in propvalue) -- then we can return the value(s) of a qualifier such as P580, start time (in qualifierID) for k1, v1 in pairs(props) do			if v1.mainsnak.snaktype == "value" and v1.mainsnak.datavalue.type == "wikibase-entityid" then -- It's a wiki-linked value, so check if it's the target (in propvalue) -- and if it has qualifiers if v1.mainsnak.datavalue.value.id == propvalue and v1.qualifiers then if (onlysrc == true) and not sourced(v1) then return end -- if we've got this far, we have a (sourced) claim with qualifiers -- which matches the target, so find the value(s) of the qualifier we want local quals = v1.qualifiers[qualifierID] local out = {} if quals then if quals[1].datatype == "wikibase-item" then for k2, v2 in pairs(quals) do								local valueID = v2.datavalue.value.id								local sitelink = mw.wikibase.sitelink(valueID) local label = labelOrId(valueID) if sitelink then out[#out + 1] = "" .. label .. "" else out[#out + 1] = "" .. label .. "  "								end end return table.concat(out, ", ") else return mw.wikibase.renderSnaks(quals) end end end end -- of loop through values of propertyID end end return nil end

--- -- getLink returns the label for a Qid wiki-linked to the local article (if the article exists) -- if label doesn't exist, it returns the Qid wiki-linked to the local article (if the article exists) -- p.getLink = function(frame) local itemID = mw.text.trim(frame.args[1] or "") if itemID == "" then return end local sitelink = mw.wikibase.sitelink(itemID) local label = labelOrId(itemID) if sitelink then return "" .. label .. "" else return label end end

--- -- getLabel returns the label for a Qid -- if label doesn't exist, it returns the Qid -- p.getLabel = function(frame) local itemID = mw.text.trim(frame.args[1] or frame.args.qid or "") if itemID == "" then return end return labelOrId(itemID) end

--- -- getAT returns the article title for a Qid -- or nothing if it doesn't exist -- p.getAT = function(frame) local itemID = mw.text.trim(frame.args[1] or frame.args.qid or "") if itemID == "" then return end return mw.wikibase.sitelink(itemID) end

--- -- getDescription returns the article description for a Qid -- Qid defaults to the current article if omitted -- Any local parameter supplied (other than "Wikidata") becomes the return value -- Nothing is returned if the description doesn't exist or 'none' is passed locally -- p.getDescription = function(frame) local desc = mw.text.trim(frame.args[1] or "") local itemID = mw.text.trim(frame.args.qid or "") if itemID == "" then itemID = nil end if desc:lower == 'wikidata' then return mw.wikibase.description(itemID) elseif desc:lower == 'none' then return nil else return desc end end

--- -- formatDate is a wrapper to export the private function format_Date -- p.formatDate = function(frame) return format_Date(frame.args[1], frame.args.df, frame.args.bc) end

--- -- checkBlacklist allows a test to check whether a named field is suppressed -- -- displays "blacklisted" -- -- displays "not blacklisted" -- p.checkBlacklist = function(frame) local blacklist = frame.args.suppressfields local fieldname = frame.args.name if blacklist and fieldname then if blacklist:find(fieldname) then return nil end return true end end

--- -- emptyor returns nil if its first unnamed argument is just punctuation, whitespace or html tags -- otherwise it returns the argument unchanged (including leading/trailing space) -- unless the argument may contain "=" when it must be called explicitly: -- |1=arg -- (when leading/trailing spaces are trimmed) p.emptyor = function(frame) local s = frame.args[1] if not s or s == "" then return nil end local sx = s:gsub("%s", ""):gsub("<[^>]*>", ""):gsub("%p", "") if sx == "" then return nil else return s	end end

return p