Module:UtilsString

From Zelda Wiki, the Zelda encyclopedia
Jump to: navigation, search

This module provides utility functions for manipulating Lua strings in general. For string manipulation and formatting that is specific to wikitext, see Module:UtilsMarkup.

This module re-implements some of the functions in the mw.text library. Use these functions whenever possible — mw.text is an order of magnitude slower because it uses mw.ustring.

Like Module:UtilsTable, some functions have both procedural and functional variants.

This module exports the following functions.

isEmpty

isEmpty(str)

Returns
  • true if and only if the value is nil or ""
Examples
InputOutputStatus
isEmpty(nil)
true
Green check.svg
isEmpty("")
true
Green check.svg
isEmpty(" ")
false
Green check.svg

notEmpty

notEmpty(str)

Returns
  • true if and only if str is neither nil nor an empty string.
Examples
InputOutputStatus
notEmpty(" ")
true
Green check.svg
notEmpty("")
false
Green check.svg
notEmpty(nil)
false
Green check.svg

isBlank

isBlank(str)

Returns
  • true if and only if str is nil, blank, or whitespace.
Examples
InputOutputStatus
isBlank("  ")
true
Green check.svg
isBlank("\n\n\n")
true
Green check.svg
isBlank(nil)
true
Green check.svg
isBlank("foo")
false
Green check.svg

nilIfEmpty

nilIfEmpty(str)

Returns
  • nil if value is nil or empty string, otherwise returns the given value.
Examples
InputOutputStatus
nilIfEmpty("")
nil
Green check.svg
nilIfEmpty(nil)
nil
Green check.svg
nilIfEmpty(" ")
" "
Green check.svg

startsWith

startsWith_startsWith
startsWith(str, pattern)
Returns
  • true if str starts with pattern, else false.
Examples
InputOutputStatus
startsWith("Fooloo Limpah", "Foo")
true
Green check.svg
startsWith("Fooloo Limpah", "foo")
false
Green check.svg
startsWith("Fooloo Limpah", "")
true
Green check.svg
startsWith("[[foo]]", "[[")
true
Green check.svg
_startsWith(pattern)(str)
Returns
  • true if str starts with pattern, else false.
Examples
InputOutputStatus
_startsWith("Foo")("Fooloo Limpah")
true
Green check.svg
_startsWith("foo")("Fooloo Limpah")
false
Green check.svg
_startsWith("")("Fooloo Limpah")
true
Green check.svg
_startsWith("[[")("[[foo]]")
true
Green check.svg

startsWithRegex

startsWithRegex_startsWithRegex
startsWithRegex(str, pattern)
Returns
  • true if str starts with regular expression pattern, else false.
Examples
InputOutputStatus
startsWithRegex("foo", "[af]")
true
Green check.svg
startsWithRegex("aoo", "[af]")
true
Green check.svg
startsWithRegex("boo", "[af]")
false
Green check.svg
_startsWithRegex(pattern)(str)
Returns
  • true if str starts with regular expression pattern, else false.
Examples
InputOutputStatus
_startsWithRegex("[af]")("foo")
true
Green check.svg
_startsWithRegex("[af]")("aoo")
true
Green check.svg
_startsWithRegex("[af]")("boo")
false
Green check.svg

endsWith

endsWith_endsWith
endsWith(str, pattern)
Returns
  • true if str ends with pattern, else false.
Examples
InputOutputStatus
endsWith("Fooloo Limpah", "Limpah")
true
Green check.svg
endsWith("Fooloo Limpah", "limpah")
false
Green check.svg
endsWith("Fooloo Limpah", "")
true
Green check.svg
endsWith("Wood (Character)", ")", true)
true
Green check.svg
_endsWith(pattern)(str)
Returns
  • true if str ends with pattern, else false.
Examples
InputOutputStatus
_endsWith("Limpah")("Fooloo Limpah")
true
Green check.svg
_endsWith("limpah")("Fooloo Limpah")
false
Green check.svg
_endsWith("")("Fooloo Limpah")
true
Green check.svg
_endsWith(")")("Wood (Character)")
true
Green check.svg

endsWithRegex

endsWithRegex_endsWithRegex
endsWithRegex(str, pattern)
Returns
  • true if str ends with regular expression pattern, else false.
Examples
InputOutputStatus
endsWithRegex("Wood (Character)", "%([^)]*%)")
true
Green check.svg
endsWithRegex("Wood", "%([^)]*%)")
false
Green check.svg
_endsWithRegex(pattern)(str)
Returns
  • true if str ends with regular expression pattern, else false.
Examples
InputOutputStatus
_endsWithRegex("%([^)]*%)")("Wood (Character)")
true
Green check.svg
_endsWithRegex("%([^)]*%)")("Wood")
false
Green check.svg

split

split_split
split(str, [pattern], [plain])

A performant alternative to mw.text.split.

Parameters
Returns
  • A table of the split strings.
Examples
InputOutputStatus
split(" foo,    bar,baz ")
{" foo", "bar", "baz "}
Green check.svg
split("foo bar baz", " ")
{"foo", "bar", "baz"}
Green check.svg
Limited support for Unicode strings
split("アイウエオ", "")
{"ア", "イ", "ウ", "エ", "オ"}
Green check.svg
_split([pattern], [plain])(str)

A performant alternative to mw.text.split.

Parameters
Returns
  • A table of the split strings.
Examples
InputOutputStatus
_split(nil)(" foo,    bar,baz ")
{" foo", "bar", "baz "}
Green check.svg
_split(" ")("foo bar baz")
{"foo", "bar", "baz"}
Green check.svg
Limited support for Unicode strings
_split("")("アイウエオ")
{"ア", "イ", "ウ", "エ", "オ"}
Green check.svg

sub

sub_sub
sub(str, startIndex, [endIndex])

Equivalent to string.sub.

Parameters
Returns
  • Function returning a substring of str from startIndex to endIndex (inclusive).
Examples
InputOutputStatus
sub("Fooloo Limpah", 8)
"Limpah"
Green check.svg
sub("Fooloo Limpah", 1, 6)
"Fooloo"
Green check.svg
sub("Fooloo Limpah", 20)
""
Green check.svg
sub("Fooloo Limpah", -20)
"Fooloo Limpah"
Green check.svg
sub("Fooloo Limpah", 8, 20)
"Limpah"
Green check.svg
_sub(startIndex, [endIndex])(str)

Equivalent to string.sub.

Parameters
Returns
  • Function returning a substring of str from startIndex to endIndex (inclusive).
Examples
InputOutputStatus
_sub(8)("Fooloo Limpah")
"Limpah"
Green check.svg
_sub(1, 6)("Fooloo Limpah")
"Fooloo"
Green check.svg
_sub(20)("Fooloo Limpah")
""
Green check.svg
_sub(-20)("Fooloo Limpah")
"Fooloo Limpah"
Green check.svg
_sub(8, 20)("Fooloo Limpah")
"Limpah"
Green check.svg

trim

trim_trim
trim(str, [pattern])

A performant alternative to mw.text.trim.

Parameters
Returns
  • The trimmed string.
Examples
InputOutputStatus
trim("  foo")
"foo"
Green check.svg
trim(":Category:Link", ":")
"Category:Link"
Green check.svg
_trim([pattern])(str)

A performant alternative to mw.text.trim.

Parameters
Returns
  • The trimmed string.
Examples
InputOutputStatus
_trim(nil)("  foo")
"foo"
Green check.svg
_trim(":")(":Category:Link")
"Category:Link"
Green check.svg

interpolate

interpolate(formatStr, args)

Approximation of string interpolation

Parameters
Returns
  • The formatted string.
Examples
InputOutputStatus
interpolate(
  "${wiki} is a ${franchise} encyclopedia that anyone can edit.",
  {
    wiki = "Zelda Wiki",
    franchise = "''Zelda''",
  }
)
"Zelda Wiki is a ''Zelda'' encyclopedia that anyone can edit."
Green check.svg

local p = {}
local h = {}

function p.isEmpty(str)
	return str == nil or str == ""
end

function p.notEmpty(str)
	return not p.isEmpty(str)
end

function p.isBlank(str)
	return str == nil or mw.text.trim(str) == ""
end

function p.nilIfEmpty(str)
	if str and str ~= "" then
		return str
	end
end

function p.startsWith(str, pattern)
	return h.startsWith(str, pattern, true)
end
function p.startsWithRegex(str, pattern)
	return h.startsWith(str, pattern, false)
end
function p._startsWith(pattern)
	return function(str)
		return h.startsWith(str, pattern, true)
	end
end
function p._startsWithRegex(pattern)
	return function(str)
		return h.startsWith(str, pattern, false)
	end
end
function h.startsWith(str, pattern, plain)
	return str:find(pattern, 1, plain) == 1
end

function p.endsWith(str, pattern)
	return h.endsWith(str, pattern, true)
end
function p.endsWithRegex(str, pattern)
	return h.endsWith(str, pattern, false)
end
function p._endsWith(pattern)
	return function(str)
		return h.endsWith(str, pattern, true)
	end
end
function p._endsWithRegex(pattern)
	return function(str)
		return h.endsWith(str, pattern, false)
	end
end
function h.endsWith(str, pattern, plain)
	local endIndex = 0
	repeat
		endIndex = select(2, str:find(pattern, endIndex + 1, plain))
	until endIndex == nil or endIndex == #str or endIndex == 0
	return endIndex ~= nil
end


function p._split(pattern, plain)
	return function(str)
		return p.split(str, pattern, plain)
	end
end
-- Original source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua
function p.split(text, pattern, plain)
	local ret = {}
	-- Edge case: gsplit with pattern "" breaks Unicode characters which are composed of multiple bytes.
	-- Instead we simply iterate over the characters, which is effectively what splitting with "" does.
	-- If we had Lua 5.3 we could probably do something with the utf8 library.
	-- Instead we use a snippet from http://lua-users.org/wiki/LuaUnicode
	if pattern == "" then
		for m in string.gmatch(text, "([%z\1-\127\194-\244][\128-\191]*)") do
          ret[#ret+1] = m
        end
	else
		for m in h.gsplit( text, pattern, plain ) do
			ret[#ret+1] = m
		end
	end
	return ret
end
function h.gsplit(text, pattern, plain)
	if not pattern then pattern = '%s*,%s*' end
	local s, l = 1, text:len()
	return function ()
		if s then
			local e, n = text:find( pattern, s, plain )
			local ret
			if not e then
				ret = text:sub( s )
				s = nil
			elseif n < e then
				-- Empty separator!
				ret = text:sub( s, e )
				if e < l then
					s = e + 1
				else
					s = nil
				end
			else
				ret = e > s and text:sub( s, e - 1 ) or ''
				s = n + 1
			end
			return ret
		end
	end, nil, nil
end

function p.sub(str, s, e)
	return p._sub(s, e)(str)
end
function p._sub(s, e)
	return function(str)
		return string.sub(str, s, e)
	end
end


local char_to_hex = function(c)
  return string.format("%%%02X", string.byte(c))
end

-- Source: https://phabricator.wikimedia.org/diffusion/ELUA/browse/master/includes/engines/LuaCommon/lualib/mw.text.lua
function p.trim(s, charset)
	return p._trim(charset)(s)
end
function p._trim(charset)
	charset = charset or '\t\r\n\f '
	return function(s)
		s = s:gsub( '^[' .. charset .. ']*(.-)[' .. charset .. ']*$', '%1' )
		return s
	end
end

-- By http://lua-users.org/wiki/RiciLake
function p.interpolate(formatStr, tab)
  return (formatStr:gsub('($%b{})', function(w) return tab[w:sub(3, -2)] or w end))
end

p.Schemas = {
	split = {
		str = {
			type = "string",
			required = true,
		},
		pattern = {
			type = "string",
			default = mw.dumpObject("%s*,%s*"),
		},
		plain = {
			type = "boolean",
		},
	},
	sub = {
		str = {
			type = "string",
			required = true,
		},
		startIndex = {
			type = "number",
			required = true,
		},
		endIndex = {
			type = "number",
			default = "#str",
		},
	},
	trim = {
		pattern = {
			type = "string",
		},
		str = {
			type = "string",
			required = true,
		},
	},
	format = {
		formatStr = {
			type = "string",
			required = true,
		},
		["..."] = {
			type = "array",
			items = {
				type = "string",
			},
			required = true,
		},
	},
	interpolate = {
		formatStr = {
			type = "string",
			required = true,
		},
		args = {
			type = "map",
			required = true,
			keys = { type = "string" },
			values = { type = "string" },
		},
	},
}

p.Documentation = {
	isEmpty = {
		params = {"str"},
		returns = '<code>true</code> if and only if the value is <code>nil</code> or <code>""</code>',
		cases = {
			{
				args = {nil},
				expect = true,
			},
			{
				args = {""},
				expect = true,
			},
			{
				args = {" "},
				expect = false,
			},
		},
	},
	isBlank = {
		params = {"str"},
		returns = "<code>true</code> if and only if <code>str</code> is nil, blank, or whitespace.",
		cases = {
			{
				args = {"  "},
				expect = true,
			},
			{
				args = {"\n\n\n"},
				expect = true,
			},
			{
				args = {nil},
				expect = true,
			},
			{
				args = {"foo"},
				expect = false,
			},
		},
	},
	notEmpty = {
		params = {"str"},
		returns = "<code>true</code> if and only if <code>str</code> is neither nil nor an empty string.",
		cases = {
			{
				args = {" "},
				expect = true,
			},
			{
				args = {""},
				expect = false,
			},
			{
				args = {nil},
				expect = false,
			},
		}
	},
	nilIfEmpty = {
		params = {"str"},
		returns = "<code>nil</code> if value is nil or empty string, otherwise returns the given value.",
		cases = {
			outputOnly = true,
			{
				args = {""},
				expect = nil,	
			},
			{
				args = {nil},
				expect = nil,
			},
			{
				args = {" "},
				expect = " ",
			},
		},
	},
	startsWith = {
		params = {"str", "pattern"},
		_params = {{"pattern"}, {"str"}},
		returns = "<code>true</code> if <code>str</code> starts with <code>pattern</code>, else <code>false</code>.",
		cases = {
			{
				args = {"Fooloo Limpah", "Foo"},
				expect = true,
			},
			{
				args = {"Fooloo Limpah", "foo"},
				expect = false,
			},
			{
				args = {"Fooloo Limpah", ""},
				expect = true,
			},
			{
				args = {"[[foo]]", "[["},
				expect = true,
			},
		},
	},
	startsWithRegex = {
		params = {"str", "pattern"},
		_params = {{"pattern"}, {"str"}},
		returns = "<code>true</code> if <code>str</code> starts with regular expression <code>pattern</code>, else <code>false</code>.",
		cases = {
			{
				args = {"foo", "[af]"},
				expect = true,
			},
			{
				args = {"aoo", "[af]"},
				expect = true,
			},
			{
				args = {"boo", "[af]"},
				expect = false,
			},
		}
	},
	endsWith = {
		params = {"str", "pattern"},
		_params = {{"pattern"}, {"str"}},
		returns = "<code>true</code> if <code>str</code> ends with <code>pattern</code>, else <code>false</code>.",
		cases = {
			{
				args = {"Fooloo Limpah", "Limpah"},
				expect = true,
			},
			{
				args = {"Fooloo Limpah", "limpah"},
				expect = false,
			},
			{
				args = {"Fooloo Limpah", ""},
				expect = true,
			},
			{
				args = {"Wood (Character)", ")", true},
				expect = true,
			}
		},
	},
	endsWithRegex = {
		params = {"str", "pattern"},
		_params = {{"pattern"}, {"str"}},
		returns = "<code>true</code> if <code>str</code> ends with regular expression <code>pattern</code>, else <code>false</code>.",
		cases = {
			{
				args = {"Wood (Character)", "%([^)]*%)"},
				expect = true,
			},
			{
				args = {"Wood", "%([^)]*%)"},
				expect = false,
			},
		},
	},
	trim = {
		desc = "A [[gphelp:Extension:Scribunto#mw.text.trim is slow|performant alternative]] to {{Scribunto Manual|lib=mw.text.trim}}.",
		params = {"str", "pattern"},
		_params = {{"pattern"}, {"str"}},
		returns = "The trimmed string.",
		cases = {
			outputOnly = true,
			{
				args = {"  foo"},
				expect = "foo",
			},
			{
				args = {":Category:Link", ":"},
				expect = "Category:Link",
			},
		},
	},
	split = {
		desc = "A [[gphelp:Extension:Scribunto#mw.text.split is very slow|performant alternative]] to  {{Scribunto Manual|lib=mw.text.split}}.",
		params = {"str", "pattern", "plain"},
		_params = {{"pattern", "plain"}, {"str"}},
		returns = "A <code>table</code> of the split strings.",
		cases = {
			{
				args = {" foo,    bar,baz "},
				expect = {" foo", "bar", "baz "},
			},
			{
				args = {"foo bar baz", " "},
				expect = {"foo", "bar", "baz"},
			},
			{
				desc = "Limited support for Unicode strings",
				args = {"アイウエオ", ""},
				expect = {"ア","イ","ウ","エ","オ"},
			},
		},
	},
	sub = {
		desc = "Equivalent to <code>string.sub</code>.",
		params = {"str", "startIndex", "endIndex"},
		_params = {{"startIndex", "endIndex"}, {"str"}},
		returns = "Function returning a substring of <code>str</code> from <code>startIndex</code> to <code>endIndex</code> (inclusive).",
		cases = {
			outputOnly = true,
			{
				args = {"Fooloo Limpah", 8},
				expect = "Limpah",
			},
			{
				args = {"Fooloo Limpah", 1, 6},
				expect = "Fooloo",
			},
			{
				args = {"Fooloo Limpah", 20},
				expect = "",
			},
			{
				args = {"Fooloo Limpah", -20},
				expect = "Fooloo Limpah",
			},
			{
				args = {"Fooloo Limpah", 8, 20},
				expect = "Limpah", 
			},
		},
	},
	interpolate = {
		desc = "Approximation of [http://lua-users.org/wiki/StringInterpolation string interpolation]",
		params = {"formatStr", "args"},
		returns = "The formatted string.",
		cases = {
			outputOnly = true,
			{
				args = {"${wiki} is a ${franchise} encyclopedia that anyone can edit.", {
					wiki = "Zelda Wiki",
					franchise = "''Zelda''",
				}},
				expect = "Zelda Wiki is a ''Zelda'' encyclopedia that anyone can edit."
			}
		}
	},
}

return p