模組:沙盒/Artoria2e5/unicode-other

-- [[Special:滥用过滤器/180|不可见字符]]诊断工具
-- {{see|Template:uw-unicode-other}}
-- {{#invoke:沙盒/Artoria2e5/unicode-other|main|}}
-- Released under CC0.

local function __unframe(func)
	return function(maybe_frame)
	    if maybe_frame == mw.getCurrentFrame() then
	        return func(maybe_frame.args)
	    else
    	    return func(maybe_frame)
    	end
	end
end

-- ret: <function():iter => <matched: ustr, start_idx: num, end_idx: num>@i,j>
local function gmatch_with_idx(us, pattern)
	i, j = 1, 0
	return function()
		i, j = mw.ustring.find(us, pattern, j + 1)
		if i ~= nil then
		    return mw.ustring.sub(us, i, j), i, j
		end
	end
end

uni_gc_others = {
	["Cc"] = 
		'[%z-' .. mw.ustring.char(0x001f) ..
		mw.ustring.char(0x007f) .. '-' .. mw.ustring.char(0x009f) ..
	']',
	["Cs"] = '[' ..
		mw.ustring.char(0xD800) .. '-' .. mw.ustring.char(0xDFFF) ..
	']',
	["Cp"] = '[' ..
	    mw.ustring.char(0xE000) .. '-' .. mw.ustring.char(0xF8FF) ..
	    -- sua planes (15, 16)
	    mw.ustring.char(0xF0000) .. '-' .. mw.ustring.char(0xF00FD) ..
	    mw.ustring.char(0x100000) .. '-' .. mw.ustring.char(0x1000FD) ..
	']',
	["Cn"] = '[' ..
	    mw.ustring.char(0xFDD0) .. '-' .. mw.ustring.char(0xFDEF) ..
	    -- for each plane...
		mw.ustring.char(0xFFFE, 0xFFFF) ..
		mw.ustring.char(0x1FFFE, 0x1FFFF) ..
		mw.ustring.char(0x2FFFE, 0x2FFFF) ..
		mw.ustring.char(0x3FFFE, 0x3FFFF) ..
		mw.ustring.char(0x4FFFE, 0x4FFFF) ..
		mw.ustring.char(0x5FFFE, 0x5FFFF) ..
		mw.ustring.char(0x6FFFE, 0x6FFFF) ..
		mw.ustring.char(0x7FFFE, 0x7FFFF) ..
		mw.ustring.char(0x8FFFE, 0x8FFFF) ..
		mw.ustring.char(0x9FFFE, 0x9FFFF) ..
		mw.ustring.char(0xAFFFE, 0xAFFFF) ..
		mw.ustring.char(0xBFFFE, 0xBFFFF) ..
		mw.ustring.char(0xCFFFE, 0xCFFFF) ..
		mw.ustring.char(0xDFFFE, 0xDFFFF) ..
		mw.ustring.char(0xEFFFE, 0xEFFFF) ..
		mw.ustring.char(0xFFFFE, 0xFFFFF) ..
		mw.ustring.char(0x10FFFE, 0x10FFFF) ..
	']',
	["Cf"] = '[' ..
		mw.ustring.char(0x00AD, 0x070F, 0x17B4, 0x17B5) ..
		mw.ustring.char(0x200B) .. '-' .. mw.ustring.char(0x200F) ..
		mw.ustring.char(0x202A) .. '-' .. mw.ustring.char(0x202E) ..
		mw.ustring.char(0x2060) .. '-' .. mw.ustring.char(0x2064) ..
		mw.ustring.char(0x206A) .. '-' .. mw.ustring.char(0x206F) ..
		mw.ustring.char(0xFEFF) ..
		mw.ustring.char(0x0600) .. '-' .. mw.ustring.char(0x0603) ..
		mw.ustring.char(0x06DD) ..  -- << ^^ five visible Cf chars
	    mw.ustring.char(0x110BD) ..
		mw.ustring.char(0x1D173) .. '-' .. mw.ustring.char(0x1D17A) ..
		mw.ustring.char(0xE0001) ..
	    mw.ustring.char(0xE0020) .. '-' .. mw.ustring.char(0xE0096) ..
	']'
}

-- ret: wikitext<ustr>
local function main(args)
	ret = {}
	hex = "%04X"
	
	lineno = 1
	charno = 1
	for chr in mw.ustring.gmatch(line, ".") do
		__for_find_cat_break = (chr == "\n" or chr == "\t")
		for cat, patt in pairs(uni_gc_others) do
			if (not __for_find_cat_break) and mw.ustring.find(chr, patt) then
				table.insert(ret,
					"* '''" .. cat .. "''': <tt>U+" .. (hex:format(mw.ustring.codepoint(chr))) ..
					'</tt> at line ' .. lineno .. ', char ' .. charno .. '.')
				__for_find_cat_break = true
			end
		end
		if chr == "\n" then
			lineno = lineno + 1
			charno = 1
		else
			charno = charno + 1
		end
	end
	return table.concat(ret, '\n')
end

return {
	["main"] = __unframe(main)
}