class String

Public Instance Methods

ascii() click to toggle source
# File lib/sup/util.rb, line 429
def ascii
  out = ""
  each_byte do |b|
    if (b & 128) != 0
      out << "\\x#{b.to_s 16}"
    else
      out << b.chr
    end
  end
  out = out.fix_encoding! # this should now be an utf-8 string of ascii
                         # compat chars.
end
camel_to_hyphy() click to toggle source
# File lib/sup/util.rb, line 254
def camel_to_hyphy
  self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
end
check() click to toggle source
# File lib/sup/util.rb, line 420
def check
  begin
    fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
    fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
  rescue
    raise CheckError.new($!.message)
  end
end
display_length() click to toggle source
# File lib/sup/util.rb, line 242
def display_length
  @display_length ||= Unicode::DisplayWidth.of(self)
end
each(&b) click to toggle source
# File lib/sup/util.rb, line 407
def each &b
  each_line &b
end
find_all_positions(x) click to toggle source
# File lib/sup/util.rb, line 258
def find_all_positions x
  ret = []
  start = 0
  while start < length
    pos = index x, start
    break if pos.nil?
    ret << pos
    start = pos + 1
  end
  ret
end
fix_encoding!() click to toggle source

Fix the damn string! make sure it is valid utf-8, then convert to user encoding.

# File lib/sup/util.rb, line 353
def fix_encoding!
  # first try to encode to utf-8 from whatever current encoding
  encode!('UTF-8', :invalid => :replace, :undef => :replace)

  # do this anyway in case string is set to be UTF-8, encoding to
  # something else (UTF-16 which can fully represent UTF-8) and back
  # ensures invalid chars are replaced.
  encode!('UTF-16', 'UTF-8', :invalid => :replace, :undef => :replace)
  encode!('UTF-8', 'UTF-16', :invalid => :replace, :undef => :replace)

  fail "Could not create valid UTF-8 string out of: '#{self.to_s}'." unless valid_encoding?

  # now convert to $encoding
  encode!($encoding, :invalid => :replace, :undef => :replace)

  fail "Could not create valid #{$encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?

  self
end
normalize_whitespace() click to toggle source
# File lib/sup/util.rb, line 395
def normalize_whitespace
  fix_encoding!
  gsub(/\t/, "    ").gsub(/\r/, "")
end
ord() click to toggle source
# File lib/sup/util.rb, line 401
def ord
  self[0]
end
slice_by_display_length(len) click to toggle source
# File lib/sup/util.rb, line 246
def slice_by_display_length len
  each_char.each_with_object "" do |c, buffer|
    len -= Unicode::DisplayWidth.of(c)
    return buffer if len < 0
    buffer << c
  end
end
split_on_commas() click to toggle source

a very complicated regex found on teh internets to split on commas, unless they occurr within double quotes.

# File lib/sup/util.rb, line 272
def split_on_commas
  normalize_whitespace().split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/)
end
split_on_commas_with_remainder() click to toggle source

ok, here we do it the hard way. got to have a remainder for purposes of tab-completing full email addresses

# File lib/sup/util.rb, line 278
def split_on_commas_with_remainder
  ret = []
  state = :outstring
  pos = 0
  region_start = 0
  while pos <= length
    newpos = case state
      when :escaped_instring, :escaped_outstring then pos
      else index(/[,"\\]/, pos)
    end

    if newpos
      char = self[newpos]
    else
      char = nil
      newpos = length
    end

    case char
    when ?"
      state = case state
        when :outstring then :instring
        when :instring then :outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    when ?,, nil
      state = case state
        when :outstring, :escaped_outstring then
          ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
          region_start = newpos + 1
          :outstring
        when :instring then :instring
        when :escaped_instring then :instring
      end
    when ?\\
      state = case state
        when :instring then :escaped_instring
        when :outstring then :escaped_outstring
        when :escaped_instring then :instring
        when :escaped_outstring then :outstring
      end
    end
    pos = newpos + 1
  end

  remainder = case state
    when :instring
      self[region_start .. -1].gsub(/^\s+/, "")
    else
      nil
    end

  [ret, remainder]
end
to_set_of_symbols(split_on=nil;) click to toggle source

takes a list of words, and returns an array of symbols. typically used in Sup for translating Xapian's representation of a list of labels (a string) to an array of label symbols.

split_on will be passed to String#split, so you can leave this nil for space.

# File lib/sup/util.rb, line 417
def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
transcode(to_encoding, from_encoding) click to toggle source

transcode the string if original encoding is know fix if broken.

# File lib/sup/util.rb, line 375
def transcode to_encoding, from_encoding
  begin
    encode!(to_encoding, from_encoding, :invalid => :replace, :undef => :replace)

    unless valid_encoding?
      # fix encoding (through UTF-8)
      encode!('UTF-16', from_encoding, :invalid => :replace, :undef => :replace)
      encode!(to_encoding, 'UTF-16', :invalid => :replace, :undef => :replace)
    end

  rescue Encoding::ConverterNotFoundError
    debug "Encoding converter not found for #{from_encoding.inspect} or #{to_encoding.inspect}, fixing string: '#{self.to_s}', but expect weird characters."
    fix_encoding!
  end

  fail "Could not create valid #{to_encoding.inspect} string out of: '#{self.to_s}'." unless valid_encoding?

  self
end
wrap(len) click to toggle source
# File lib/sup/util.rb, line 334
def wrap len
  ret = []
  s = self
  while s.display_length > len
    slice = s.slice_by_display_length(len)
    cut = slice.rindex(/\s/)
    if cut
      ret << s[0 ... cut]
      s = s[(cut + 1) .. -1]
    else
      ret << slice
      s = s[slice.length .. -1]
    end
  end
  ret << s
end