module Unicode::DisplayWidth::IndexBuilder
Constants
- EAST_ASIAN_WIDTH_DATA_FILENAME
- EAST_ASIAN_WIDTH_DATA_URL
- IGNORE_CATEGORIES
- SPECIAL_WIDTHS
- ZERO_WIDTH_CATEGORIES
- ZERO_WIDTH_CODEPOINTS
Public Class Methods
build!()
click to toggle source
# File lib/unicode/display_width/index_builder.rb, line 34 def self.build! data = File.open(EAST_ASIAN_WIDTH_DATA_FILENAME) data.rewind Dir.mkdir(DATA_DIRECTORY) unless Dir.exists?(DATA_DIRECTORY) index = {} data.each_line{ |line| line =~ /^(\S+?);(\S+)\s+#\s(\S+).*$/ if $1 && $2 cps, width, category = $1, $2, $3 next if IGNORE_CATEGORIES.include?(category) if cps['..'] codepoints = Range.new(*cps.split('..').map{ |cp| cp.to_i(16) }) else codepoints = [cps.to_i(16)] end codepoints.each{ |cp| index[cp] = is_zero_width?(category, cp) ? 0 : width.to_sym } end } index.merge! SPECIAL_WIDTHS File.open(INDEX_FILENAME, 'wb') { |f| Marshal.dump(index, f) } end
fetch!()
click to toggle source
# File lib/unicode/display_width/index_builder.rb, line 27 def self.fetch! require 'open-uri' open(EAST_ASIAN_WIDTH_DATA_URL) { |f| File.write(EAST_ASIAN_WIDTH_DATA_FILENAME, f.read) } end
is_zero_width?(category, cp)
click to toggle source
# File lib/unicode/display_width/index_builder.rb, line 61 def self.is_zero_width?(category, cp) ( ZERO_WIDTH_CATEGORIES.include?(category) && [cp].pack('U') !~ /\p{Cf}(?<=\p{Arabic})/ ) || ZERO_WIDTH_CODEPOINTS.include?(cp) end