Module:Furigana:修订间差异
外观
创建页面,内容为“local p = {} local getArgs = require('module:arguments').getArgs function main(args) local rb = args[1] local rt = args[2] local rt2 = args[3] local newrb = '' local newrt = '' if rt then local lrb = mw.ustring.len(rb) local lrt = mw.ustring.len(rt) local diff = lrt * 0.75 - lrb local space = '' local important = rt2 and '!important' or '' if diff > 0 then space = tostring(math.floor(diff / (lrb + 1) * 1000) / 1000)..'em' newrb = '<span…” |
无编辑摘要 |
||
| 第1行: | 第1行: | ||
local p = {} | local p = {} | ||
function | -- 转化为upvalue,提升速度 | ||
local | local type = type | ||
local ipairs = ipairs | |||
local | local concat = table.concat | ||
local u_gsub = mw.ustring.gsub | |||
local u_sub = mw.ustring.sub | |||
if | |||
local | local KANJI_LIKE_UNICODE_RANGES = { | ||
local | { 0x2E80, 0x2EFF }, -- CJK部首补充 | ||
{ 0x3005, 0x3007 }, -- "々、〆、〇" | |||
{ 0x31C0, 0x31EF }, -- CJK笔画 | |||
{ 0x3400, 0x4DBF }, -- CJK统一表意文字扩展A | |||
if | { 0x4E00, 0x9FFF }, -- CJK统一表意文字 | ||
{ 0xF900, 0xFAFF }, -- CJK兼容表意文字 | |||
{ 0x20000, 0x2A6DF }, -- CJK统一表意文字扩展B | |||
{ 0x2A700, 0x2EE5F }, -- CJK统一表意文字扩展C-I | |||
{ 0x2F800, 0x2FA1F }, -- CJK兼容表意文字补充 | |||
{ 0x30000, 0x323AF }, -- CJK统一表意文字扩展G-H | |||
} | |||
local KANJI_LIKE_PATTERN = (function () | |||
local char = mw.ustring.char | |||
local parts = {} | |||
for i, range in ipairs(KANJI_LIKE_UNICODE_RANGES) do | |||
parts[i] = type(range) == 'table' and char(range[1])..'-'..char(range[2]) or char(range) | |||
end | |||
return '['..concat(parts)..']' | |||
end)() | |||
--[[ | |||
把<tag>、</tag>与其他文本分开。 | |||
```lua | |||
divide_tags_and_non_tags('A<span title="(title)">B</span>C') | |||
--> | |||
{ | |||
{ type = 'unknown', content = 'A' }, | |||
{ type = 'tag', content = '<span title="(title)">' }, | |||
{ type = 'unknown', content = 'B' }, | |||
{ type = 'tag', content = '</span>' }, | |||
{ type = 'unknown', content = 'C' }, | |||
``` | |||
]] | |||
---@param text string | |||
---@return {type: 'tag' | 'unkown', content: string}[] | |||
local function extract_tags(text) | |||
local result = {} | |||
local rest = text | |||
:gsub( | |||
'(.-)(</?[:%a_][:%w_-.]*[^<>]*>)', | |||
function (non_tag, tag) | |||
if non_tag ~= '' then | |||
result[#result+1] = { type = 'unknown', content = non_tag } | |||
end | |||
result[#result+1] = { type = 'tag', content = tag } | |||
return '' | |||
end | |||
) | |||
if rest ~= '' then | |||
result[#result+1] = { type = 'unknown', content = rest } | |||
end | |||
return result | |||
end | |||
local EXTRACT_RB_RT_PATTERN = '(.-)('..KANJI_LIKE_PATTERN..'+)(%b())' | |||
---@param text string | |||
---@param into_tokens ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[] | |||
local function extract_rb_rt(text, into_tokens) | |||
for line_with_lf in text:gmatch('[^\n]*\n*') do | |||
local rest = u_gsub( | |||
line_with_lf, | |||
EXTRACT_RB_RT_PATTERN, | |||
function (before, rb, rt) | |||
if before ~= '' then | |||
into_tokens[#into_tokens+1] = { type = 'text', content = before } | |||
end | |||
into_tokens[#into_tokens+1] = { | |||
type = 'rb-rt', | |||
rb = rb, | |||
rt = u_sub(rt, 2, -2), -- 去除括号 | |||
} | |||
return '' | |||
end | |||
) | |||
if rest ~= '' then | |||
into_tokens[#into_tokens+1] = { type = 'text', content = rest } | |||
end | |||
end | |||
end | |||
---细节见测试 [[Module:Furigana/tests]]。 | |||
---@param tokens {type: 'tag' | 'unkown', content: string}[] | |||
---@param into_tokens ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[] | |||
local function extract_others(tokens, into_tokens) | |||
local ruby_start_index = nil | |||
for i, token in ipairs(tokens) do | |||
if ruby_start_index then -- 如果处于<ruby>标签内,则所有token都是ruby-part | |||
into_tokens[#into_tokens+1] = { type = 'ruby-part', content = token.content } | |||
if token.type == 'tag' and token.content:match('^</[Rr][Uu][Bb][Yy]') then | |||
ruby_start_index = nil | |||
end | |||
elseif token.type == 'tag' then | |||
if token.content:match('^<[Rr][Uu][Bb][Yy]') then | |||
into_tokens[#into_tokens+1] = { type = 'ruby-part', content = token.content } | |||
ruby_start_index = i | |||
else | |||
---@diagnostic disable-next-line: assign-type-mismatch | |||
into_tokens[#into_tokens+1] = token | |||
end | |||
else | else | ||
-- assert(token.type == 'unknown') | |||
extract_rb_rt(token.content, into_tokens) | |||
end | end | ||
end | end | ||
end | end | ||
---将`texts`解析为tokens,细节见测试[[Module:Furigana/tests]]。 | |||
---@param texts string[] | |||
---@return ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[] | |||
local function parse(texts) | |||
local tokens = {} | |||
( | for _, text in ipairs(texts) do | ||
extract_others(extract_tags(text), tokens) | |||
end | |||
return tokens | |||
end | end | ||
p.parse = parse | |||
function p. | ---用`render_ruby`渲染`tokens`中的`rb-rt` token,其余token不变,最终连接为一个字符串。 | ||
local | ---@param tokens ({type: 'ruby-part' | 'tag' | 'text', content: string} | {type: 'rb-rt', rb: string, rt: string})[] | ||
local | ---@param render_ruby fun(rb: string, rt: string): string | ||
---@return string | |||
local function render(tokens, render_ruby) | |||
local | local result = {} | ||
for i, token in ipairs(tokens) do | |||
local | result[i] = token.type == 'rb-rt' and render_ruby(token.rb, token.rt) or token.content | ||
if | end | ||
return concat(result) | |||
end | |||
p.render = render | |||
---将`texts`中用“汉字(注音)”表示的振假名替换为`render_ruby`的返回值。 | |||
---@param texts string | string[] | |||
---@param render_ruby fun(rb: string, rt: string): string | |||
---@return string | |||
function p.process(texts, render_ruby) | |||
if type(texts) == 'string' then | |||
texts = { texts } | |||
end | |||
local tokens = parse(texts) | |||
return render(tokens, render_ruby) | |||
end | |||
function p.main(frame, args) | |||
if not args then | |||
local parent = frame:getParent() | |||
local templates = { ['Template:Furigana'] = true, ['Template:Sandbox'] = true } | |||
if parent and templates[parent:getTitle()] then | |||
frame = parent | |||
end | |||
args = frame.args | |||
end | |||
local template = args.template or 'Ruby' | |||
if mw.isSubsting() then | |||
local prefix = '{{'..template..'|' | |||
return p.process(args, function (rb, rt) | |||
return prefix..rb..'|'..rt..'}}' | |||
end) | |||
end | |||
local is_compatible_mode = args.compatible and args.compatible ~= '' | |||
if not is_compatible_mode then | |||
-- 快速模式,通过字符串替换,避免多次调用frame:expandTemplate | |||
local placeholder_pattern = '!@$r[bt]$@!' | |||
local rb_placeholder = '!@$rb$@!' | |||
local rt_placeholder = '!@$rt$@!' | |||
local template_string = frame:expandTemplate { | |||
title = template, | |||
args = { rb_placeholder, rt_placeholder }, | |||
} | |||
if template_string:find(placeholder_pattern) then | |||
return p.process(args, function (rb, rt) | |||
return template_string:gsub(placeholder_pattern, { [rb_placeholder] = rb, [rt_placeholder] = rt }) | |||
end) | |||
end | end | ||
end | end | ||
return | return p.process(args, function (rb, rt) | ||
return frame:expandTemplate { | |||
title = template, | |||
args = { rb, rt }, | |||
} | |||
end) | |||
end | end | ||
return p | return p | ||