- 欢迎来到偶像大师中文维基,本站仍在筹备阶段,如有兴趣参与建设可加群申请账号:542540342
Module:Furigana
跳转到导航
跳转到搜索
此模块的文档可以在Module:Furigana/doc创建
local p = {}
-- 转化为upvalue,提升速度
local type = type
local ipairs = ipairs
local concat = table.concat
local u_gsub = mw.ustring.gsub
local u_sub = mw.ustring.sub
local KANJI_LIKE_UNICODE_RANGES = {
{ 0x2E80, 0x2EFF }, -- CJK部首补充
{ 0x3005, 0x3007 }, -- "々、〆、〇"
{ 0x31C0, 0x31EF }, -- CJK笔画
{ 0x3400, 0x4DBF }, -- CJK统一表意文字扩展A
{ 0x4E00, 0x9FFF }, -- CJK统一表意文字
{ 0xF900, 0xFAFF }, -- CJK兼容表意文字
{ 0x20000, 0x2A6DF }, -- CJK统一表意文字扩展B
{ 0x2A700, 0x2EE5F }, -- CJK统一表意文字扩展C-I
{ 0x2F800, 0x2FA1F }, -- CJK兼容表意文字补充
{ 0x30000, 0x323AF }, -- CJK统一表意文字扩展G-H
}
local KANJI_LIKE_PATTERN = (function ()
local char = mw.ustring.char
local parts = {}
for i, range in ipairs(KANJI_LIKE_UNICODE_RANGES) do
parts[i] = type(range) == 'table' and char(range[1])..'-'..char(range[2]) or char(range)
end
return '['..concat(parts)..']'
end)()
--[[
把<tag>、</tag>与其他文本分开。
```lua
divide_tags_and_non_tags('A<span title="(title)">B</span>C')
-->
{
{ type = 'unknown', content = 'A' },
{ type = 'tag', content = '<span title="(title)">' },
{ type = 'unknown', content = 'B' },
{ type = 'tag', content = '</span>' },
{ type = 'unknown', content = 'C' },
```
]]
---@param text string
---@return {type: 'tag' | 'unkown', content: string}[]
local function extract_tags(text)
local result = {}
local rest = text
:gsub(
'(.-)(</?[:%a_][:%w_-.]*[^<>]*>)',
function (non_tag, tag)
if non_tag ~= '' then
result[#result+1] = { type = 'unknown', content = non_tag }
end
result[#result+1] = { type = 'tag', content = tag }
return ''
end
)
if rest ~= '' then
result[#result+1] = { type = 'unknown', content = rest }
end
return result
end
local EXTRACT_RB_RT_PATTERN = '(.-)('..KANJI_LIKE_PATTERN..'+)(%b())'
---@param text string
---@param into_tokens ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[]
local function extract_rb_rt(text, into_tokens)
for line_with_lf in text:gmatch('[^\n]*\n*') do
local rest = u_gsub(
line_with_lf,
EXTRACT_RB_RT_PATTERN,
function (before, rb, rt)
if before ~= '' then
into_tokens[#into_tokens+1] = { type = 'text', content = before }
end
into_tokens[#into_tokens+1] = {
type = 'rb-rt',
rb = rb,
rt = u_sub(rt, 2, -2), -- 去除括号
}
return ''
end
)
if rest ~= '' then
into_tokens[#into_tokens+1] = { type = 'text', content = rest }
end
end
end
---细节见测试 [[Module:Furigana/tests]]。
---@param tokens {type: 'tag' | 'unkown', content: string}[]
---@param into_tokens ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[]
local function extract_others(tokens, into_tokens)
local ruby_start_index = nil
for i, token in ipairs(tokens) do
if ruby_start_index then -- 如果处于<ruby>标签内,则所有token都是ruby-part
into_tokens[#into_tokens+1] = { type = 'ruby-part', content = token.content }
if token.type == 'tag' and token.content:match('^</[Rr][Uu][Bb][Yy]') then
ruby_start_index = nil
end
elseif token.type == 'tag' then
if token.content:match('^<[Rr][Uu][Bb][Yy]') then
into_tokens[#into_tokens+1] = { type = 'ruby-part', content = token.content }
ruby_start_index = i
else
---@diagnostic disable-next-line: assign-type-mismatch
into_tokens[#into_tokens+1] = token
end
else
-- assert(token.type == 'unknown')
extract_rb_rt(token.content, into_tokens)
end
end
end
---将`texts`解析为tokens,细节见测试[[Module:Furigana/tests]]。
---@param texts string[]
---@return ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[]
local function parse(texts)
local tokens = {}
for _, text in ipairs(texts) do
extract_others(extract_tags(text), tokens)
end
return tokens
end
p.parse = parse
---用`render_ruby`渲染`tokens`中的`rb-rt` token,其余token不变,最终连接为一个字符串。
---@param tokens ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[]
---@param render_ruby fun(rb: string, rt: string): string
---@return string
local function render(tokens, render_ruby)
local result = {}
for i, token in ipairs(tokens) do
result[i] = token.type == 'rb-rt' and render_ruby(token.rb, token.rt) or token.content
end
return concat(result)
end
p.render = render
---将`texts`中用“汉字(注音)”表示的振假名替换为`render_ruby`的返回值。
---@param texts string | string[]
---@param render_ruby fun(rb: string, rt: string): string
---@return string
function p.process(texts, render_ruby)
if type(texts) == 'string' then
texts = { texts }
end
local tokens = parse(texts)
return render(tokens, render_ruby)
end
function p.main(frame, args)
if not args then
local parent = frame:getParent()
local templates = { ['Template:Furigana'] = true, ['Template:Sandbox'] = true }
if parent and templates[parent:getTitle()] then
frame = parent
end
args = frame.args
end
local template = args.template or 'Ruby'
if mw.isSubsting() then
local prefix = '{{'..template..'|'
return p.process(args, function (rb, rt)
return prefix..rb..'|'..rt..'}}'
end)
end
local is_compatible_mode = args.compatible and args.compatible ~= ''
if not is_compatible_mode then
-- 快速模式,通过字符串替换,避免多次调用frame:expandTemplate
local placeholder_pattern = '!@$r[bt]$@!'
local rb_placeholder = '!@$rb$@!'
local rt_placeholder = '!@$rt$@!'
local template_string = frame:expandTemplate {
title = template,
args = { rb_placeholder, rt_placeholder },
}
if template_string:find(placeholder_pattern) then
return p.process(args, function (rb, rt)
return template_string:gsub(placeholder_pattern, { [rb_placeholder] = rb, [rt_placeholder] = rt })
end)
end
end
return p.process(args, function (rb, rt)
return frame:expandTemplate {
title = template,
args = { rb, rt },
}
end)
end
return p