-- [[Special:滥用过滤器/180|不可见字符]]诊断工具
-- {{see|Template:uw-unicode-other}}
-- {{#invoke:沙盒/Artoria2e5/unicode-other|main|}}
-- Released under CC0.
local function __unframe(func)
return function(maybe_frame)
if maybe_frame == mw.getCurrentFrame() then
return func(maybe_frame.args)
else
return func(maybe_frame)
end
end
end
-- ret: <function():iter => <matched: ustr, start_idx: num, end_idx: num>@i,j>
local function gmatch_with_idx(us, pattern)
i, j = 1, 0
return function()
i, j = mw.ustring.find(us, pattern, j + 1)
if i ~= nil then
return mw.ustring.sub(us, i, j), i, j
end
end
end
uni_gc_others = {
["Cc"] =
'[%z-' .. mw.ustring.char(0x001f) ..
mw.ustring.char(0x007f) .. '-' .. mw.ustring.char(0x009f) ..
']',
["Cs"] = '[' ..
mw.ustring.char(0xD800) .. '-' .. mw.ustring.char(0xDFFF) ..
']',
["Cp"] = '[' ..
mw.ustring.char(0xE000) .. '-' .. mw.ustring.char(0xF8FF) ..
-- sua planes (15, 16)
mw.ustring.char(0xF0000) .. '-' .. mw.ustring.char(0xF00FD) ..
mw.ustring.char(0x100000) .. '-' .. mw.ustring.char(0x1000FD) ..
']',
["Cn"] = '[' ..
mw.ustring.char(0xFDD0) .. '-' .. mw.ustring.char(0xFDEF) ..
-- for each plane...
mw.ustring.char(0xFFFE, 0xFFFF) ..
mw.ustring.char(0x1FFFE, 0x1FFFF) ..
mw.ustring.char(0x2FFFE, 0x2FFFF) ..
mw.ustring.char(0x3FFFE, 0x3FFFF) ..
mw.ustring.char(0x4FFFE, 0x4FFFF) ..
mw.ustring.char(0x5FFFE, 0x5FFFF) ..
mw.ustring.char(0x6FFFE, 0x6FFFF) ..
mw.ustring.char(0x7FFFE, 0x7FFFF) ..
mw.ustring.char(0x8FFFE, 0x8FFFF) ..
mw.ustring.char(0x9FFFE, 0x9FFFF) ..
mw.ustring.char(0xAFFFE, 0xAFFFF) ..
mw.ustring.char(0xBFFFE, 0xBFFFF) ..
mw.ustring.char(0xCFFFE, 0xCFFFF) ..
mw.ustring.char(0xDFFFE, 0xDFFFF) ..
mw.ustring.char(0xEFFFE, 0xEFFFF) ..
mw.ustring.char(0xFFFFE, 0xFFFFF) ..
mw.ustring.char(0x10FFFE, 0x10FFFF) ..
']',
["Cf"] = '[' ..
mw.ustring.char(0x00AD, 0x070F, 0x17B4, 0x17B5) ..
mw.ustring.char(0x200B) .. '-' .. mw.ustring.char(0x200F) ..
mw.ustring.char(0x202A) .. '-' .. mw.ustring.char(0x202E) ..
mw.ustring.char(0x2060) .. '-' .. mw.ustring.char(0x2064) ..
mw.ustring.char(0x206A) .. '-' .. mw.ustring.char(0x206F) ..
mw.ustring.char(0xFEFF) ..
mw.ustring.char(0x0600) .. '-' .. mw.ustring.char(0x0603) ..
mw.ustring.char(0x06DD) .. -- << ^^ five visible Cf chars
mw.ustring.char(0x110BD) ..
mw.ustring.char(0x1D173) .. '-' .. mw.ustring.char(0x1D17A) ..
mw.ustring.char(0xE0001) ..
mw.ustring.char(0xE0020) .. '-' .. mw.ustring.char(0xE0096) ..
']'
}
-- ret: wikitext<ustr>
local function main(args)
ret = {}
hex = "%04X"
lineno = 1
charno = 1
for chr in mw.ustring.gmatch(line, ".") do
__for_find_cat_break = (chr == "\n" or chr == "\t")
for cat, patt in pairs(uni_gc_others) do
if (not __for_find_cat_break) and mw.ustring.find(chr, patt) then
table.insert(ret,
"* '''" .. cat .. "''': <tt>U+" .. (hex:format(mw.ustring.codepoint(chr))) ..
'</tt> at line ' .. lineno .. ', char ' .. charno .. '.')
__for_find_cat_break = true
end
end
if chr == "\n" then
lineno = lineno + 1
charno = 1
else
charno = charno + 1
end
end
return table.concat(ret, '\n')
end
return {
["main"] = __unframe(main)
}