Import Upstream version 2.7.1upstream/2.7.1

author: Unit 193 <unit193@ubuntu.com> 2017-06-12 03:37:11 -0400
committer: Unit 193 <unit193@ubuntu.com> 2017-06-12 03:37:11 -0400
commit: 8280a21a23d44aa90177e2bc041d0b8dc8556f4b (patch)
tree: dadef7ee085c0e990a5070bd41b6a5b98c97f4fd /lib
39 files changed, 3577 insertions, 0 deletions
diff --git a/lib/roo.rb b/lib/roo.rb
new file mode 100644
index 0000000..ec8eb42
--- /dev/null
+++ b/lib/roo.rb
@@ -0,0 +1,33 @@
+require 'roo/constants'
+require 'roo/errors'
+require 'roo/spreadsheet'
+require 'roo/base'
+
+module Roo
+  autoload :OpenOffice,   'roo/open_office'
+  autoload :LibreOffice,  'roo/libre_office'
+  autoload :Excelx,       'roo/excelx'
+  autoload :CSV,          'roo/csv'
+
+  TEMP_PREFIX = 'roo_'.freeze
+
+  CLASS_FOR_EXTENSION = {
+    ods: Roo::OpenOffice,
+    xlsx: Roo::Excelx,
+    xlsm: Roo::Excelx,
+    csv: Roo::CSV
+  }
+
+  def self.const_missing(const_name)
+    case const_name
+    when :Excel
+      raise ROO_EXCEL_NOTICE
+    when :Excel2003XML
+      raise ROO_EXCELML_NOTICE
+    when :Google
+      raise ROO_GOOGLE_NOTICE
+    else
+      super
+    end
+  end
+end
diff --git a/lib/roo/base.rb b/lib/roo/base.rb
new file mode 100644
index 0000000..53e4075
--- /dev/null
+++ b/lib/roo/base.rb
@@ -0,0 +1,598 @@
+# encoding: utf-8
+
+require 'tmpdir'
+require 'stringio'
+require 'nokogiri'
+require 'roo/utils'
+require "roo/formatters/base"
+require "roo/formatters/csv"
+require "roo/formatters/matrix"
+require "roo/formatters/xml"
+require "roo/formatters/yaml"
+
+# Base class for all other types of spreadsheets
+class Roo::Base
+  include Enumerable
+  include Roo::Formatters::Base
+  include Roo::Formatters::CSV
+  include Roo::Formatters::Matrix
+  include Roo::Formatters::XML
+  include Roo::Formatters::YAML
+
+  MAX_ROW_COL = 999_999.freeze
+  MIN_ROW_COL = 0.freeze
+
+  attr_reader :headers
+
+  # sets the line with attribute names (default: 1)
+  attr_accessor :header_line
+
+  def self.TEMP_PREFIX
+    warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX'
+    Roo::TEMP_PREFIX
+  end
+
+  def self.finalize(object_id)
+    proc { finalize_tempdirs(object_id) }
+  end
+
+  def initialize(filename, options = {}, _file_warning = :error, _tmpdir = nil)
+    @filename = filename
+    @options = options
+
+    @cell = {}
+    @cell_type = {}
+    @cells_read = {}
+
+    @first_row = {}
+    @last_row = {}
+    @first_column = {}
+    @last_column = {}
+
+    @header_line = 1
+  end
+
+  def close
+    if self.class.respond_to?(:finalize_tempdirs)
+      self.class.finalize_tempdirs(object_id)
+    end
+    nil
+  end
+
+  def default_sheet
+    @default_sheet ||= sheets.first
+  end
+
+  # sets the working sheet in the document
+  # 'sheet' can be a number (1 = first sheet) or the name of a sheet.
+  def default_sheet=(sheet)
+    validate_sheet!(sheet)
+    @default_sheet = sheet
+    @first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil
+    @cells_read[sheet] = false
+  end
+
+  # first non-empty column as a letter
+  def first_column_as_letter(sheet = default_sheet)
+    ::Roo::Utils.number_to_letter(first_column(sheet))
+  end
+
+  # last non-empty column as a letter
+  def last_column_as_letter(sheet = default_sheet)
+    ::Roo::Utils.number_to_letter(last_column(sheet))
+  end
+
+  # Set first/last row/column for sheet
+  def first_last_row_col_for_sheet(sheet)
+    @first_last_row_cols ||= {}
+    @first_last_row_cols[sheet] ||= begin
+      result = collect_last_row_col_for_sheet(sheet)
+      {
+        first_row: result[:first_row] == MAX_ROW_COL ? nil : result[:first_row],
+        first_column: result[:first_column] == MAX_ROW_COL ? nil : result[:first_column],
+        last_row: result[:last_row] == MIN_ROW_COL ? nil : result[:last_row],
+        last_column: result[:last_column] == MIN_ROW_COL ? nil : result[:last_column]
+      }
+    end
+  end
+
+  # Collect first/last row/column from sheet
+  def collect_last_row_col_for_sheet(sheet)
+    first_row = first_column = MAX_ROW_COL
+    last_row = last_column = MIN_ROW_COL
+    @cell[sheet].each_pair do|key, value|
+      next unless value
+      first_row = [first_row, key.first.to_i].min
+      last_row = [last_row, key.first.to_i].max
+      first_column = [first_column, key.last.to_i].min
+      last_column = [last_column, key.last.to_i].max
+    end if @cell[sheet]
+    { first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column }
+  end
+
+  %w(first_row last_row first_column last_column).each do |key|
+    class_eval <<-EOS, __FILE__, __LINE__ + 1
+      def #{key}(sheet = default_sheet)                                   # def first_row(sheet = default_sheet)
+        read_cells(sheet)                                                 #   read_cells(sheet)
+        @#{key}[sheet] ||= first_last_row_col_for_sheet(sheet)[:#{key}]   #   @first_row[sheet] ||= first_last_row_col_for_sheet(sheet)[:first_row]
+      end                                                                 # end
+    EOS
+  end
+
+  def inspect
+    "<##{self.class}:#{object_id.to_s(8)} #{instance_variables.join(' ')}>"
+  end
+
+  # find a row either by row number or a condition
+  # Caution: this works only within the default sheet -> set default_sheet before you call this method
+  # (experimental. see examples in the test_roo.rb file)
+  def find(*args) # :nodoc
+    options = (args.last.is_a?(Hash) ? args.pop : {})
+
+    case args[0]
+    when Integer
+      find_by_row(args[0])
+    when :all
+      find_by_conditions(options)
+    else
+      fail ArgumentError, "unexpected arg #{args[0].inspect}, pass a row index or :all"
+    end
+  end
+
+  # returns all values in this row as an array
+  # row numbers are 1,2,3,... like in the spreadsheet
+  def row(row_number, sheet = default_sheet)
+    read_cells(sheet)
+    first_column(sheet).upto(last_column(sheet)).map do |col|
+      cell(row_number, col, sheet)
+    end
+  end
+
+  # returns all values in this column as an array
+  # column numbers are 1,2,3,... like in the spreadsheet
+  def column(column_number, sheet = default_sheet)
+    if column_number.is_a?(::String)
+      column_number = ::Roo::Utils.letter_to_number(column_number)
+    end
+    read_cells(sheet)
+    first_row(sheet).upto(last_row(sheet)).map do |row|
+      cell(row, column_number, sheet)
+    end
+  end
+
+  # set a cell to a certain value
+  # (this will not be saved back to the spreadsheet file!)
+  def set(row, col, value, sheet = default_sheet) #:nodoc:
+    read_cells(sheet)
+    row, col = normalize(row, col)
+    cell_type = cell_type_by_value(value)
+    set_value(row, col, value, sheet)
+    set_type(row, col, cell_type, sheet)
+  end
+
+  def cell_type_by_value(value)
+    case value
+    when Integer then :float
+    when String, Float then :string
+    else
+      fail ArgumentError, "Type for #{value} not set"
+    end
+  end
+
+  # reopens and read a spreadsheet document
+  def reload
+    ds = default_sheet
+    reinitialize
+    self.default_sheet = ds
+  end
+
+  # true if cell is empty
+  def empty?(row, col, sheet = default_sheet)
+    read_cells(sheet)
+    row, col = normalize(row, col)
+    contents = cell(row, col, sheet)
+    !contents || (celltype(row, col, sheet) == :string && contents.empty?) \
+      || (row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet))
+  end
+
+  # returns information of the spreadsheet document and all sheets within
+  # this document.
+  def info
+    without_changing_default_sheet do
+      result = "File: #{File.basename(@filename)}\n"\
+        "Number of sheets: #{sheets.size}\n"\
+        "Sheets: #{sheets.join(', ')}\n"
+      n = 1
+      sheets.each do|sheet|
+        self.default_sheet = sheet
+        result << 'Sheet ' + n.to_s + ":\n"
+        if first_row
+          result << "  First row: #{first_row}\n"
+          result << "  Last row: #{last_row}\n"
+          result << "  First column: #{::Roo::Utils.number_to_letter(first_column)}\n"
+          result << "  Last column: #{::Roo::Utils.number_to_letter(last_column)}"
+        else
+          result << '  - empty -'
+        end
+        result << "\n" if sheet != sheets.last
+        n += 1
+      end
+      result
+    end
+  end
+
+  # when a method like spreadsheet.a42 is called
+  # convert it to a call of spreadsheet.cell('a',42)
+  def method_missing(m, *args)
+    # #aa42 => #cell('aa',42)
+    # #aa42('Sheet1')  => #cell('aa',42,'Sheet1')
+    if m =~ /^([a-z]+)(\d+)$/
+      col = ::Roo::Utils.letter_to_number(Regexp.last_match[1])
+      row = Regexp.last_match[2].to_i
+      if args.empty?
+        cell(row, col)
+      else
+        cell(row, col, args.first)
+      end
+    else
+      super
+    end
+  end
+
+  # access different worksheets by calling spreadsheet.sheet(1)
+  # or spreadsheet.sheet('SHEETNAME')
+  def sheet(index, name = false)
+    self.default_sheet = index.is_a?(::String) ? index : sheets[index]
+    name ? [default_sheet, self] : self
+  end
+
+  # iterate through all worksheets of a document
+  def each_with_pagename
+    sheets.each do |s|
+      yield sheet(s, true)
+    end
+  end
+
+  # by passing in headers as options, this method returns
+  # specific columns from your header assignment
+  # for example:
+  # xls.sheet('New Prices').parse(:upc => 'UPC', :price => 'Price') would return:
+  # [{:upc => 123456789012, :price => 35.42},..]
+
+  # the queries are matched with regex, so regex options can be passed in
+  # such as :price => '^(Cost|Price)'
+  # case insensitive by default
+
+  # by using the :header_search option, you can query for headers
+  # and return a hash of every row with the keys set to the header result
+  # for example:
+  # xls.sheet('New Prices').parse(:header_search => ['UPC*SKU','^Price*\sCost\s'])
+
+  # that example searches for a column titled either UPC or SKU and another
+  # column titled either Price or Cost (regex characters allowed)
+  # * is the wildcard character
+
+  # you can also pass in a :clean => true option to strip the sheet of
+  # control characters and white spaces around columns
+
+  def each(options = {})
+    return to_enum(:each, options) unless block_given?
+
+    if options.empty?
+      1.upto(last_row) do |line|
+        yield row(line)
+      end
+    else
+      clean_sheet_if_need(options)
+      search_or_set_header(options)
+      headers = @headers ||
+                Hash[(first_column..last_column).map do |col|
+                  [cell(@header_line, col), col]
+                end]
+
+      @header_line.upto(last_row) do |line|
+        yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
+      end
+    end
+  end
+
+  def parse(options = {})
+    results = each(options).map do |row|
+      block_given? ? yield(row) : row
+    end
+
+    options[:headers] == true ? results : results.drop(1)
+  end
+
+  def row_with(query, return_headers = false)
+    line_no = 0
+    each do |row|
+      line_no += 1
+      headers = query.map { |q| row.grep(q)[0] }.compact
+
+      if headers.length == query.length
+        @header_line = line_no
+        return return_headers ? headers : line_no
+      elsif line_no > 100
+        raise Roo::HeaderRowNotFoundError
+      end
+    end
+    raise Roo::HeaderRowNotFoundError
+  end
+
+  protected
+
+  def file_type_check(filename, exts, name, warning_level, packed = nil)
+    if packed == :zip
+      # spreadsheet.ods.zip => spreadsheet.ods
+      # Decompression is not performed here, only the 'zip' extension
+      # is removed from the file.
+      filename = File.basename(filename, File.extname(filename))
+    end
+
+    if uri?(filename) && (qs_begin = filename.rindex('?'))
+      filename = filename[0..qs_begin - 1]
+    end
+    exts = Array(exts)
+
+    return if exts.include?(File.extname(filename).downcase)
+
+    case warning_level
+    when :error
+      warn file_type_warning_message(filename, exts)
+      fail TypeError, "#{filename} is not #{name} file"
+    when :warning
+      warn "are you sure, this is #{name} spreadsheet file?"
+      warn file_type_warning_message(filename, exts)
+    when :ignore
+      # ignore
+    else
+      fail "#{warning_level} illegal state of file_warning"
+    end
+  end
+
+  # konvertiert einen Key in der Form "12,45" (=row,column) in
+  # ein Array mit numerischen Werten ([12,45])
+  # Diese Methode ist eine temp. Loesung, um zu erforschen, ob der
+  # Zugriff mit numerischen Keys schneller ist.
+  def key_to_num(str)
+    r, c = str.split(',')
+    [r.to_i, c.to_i]
+  end
+
+  # see: key_to_num
+  def key_to_string(arr)
+    "#{arr[0]},#{arr[1]}"
+  end
+
+  def is_stream?(filename_or_stream)
+    filename_or_stream.respond_to?(:seek)
+  end
+
+  private
+
+  def clean_sheet_if_need(options)
+    return unless options[:clean]
+    options.delete(:clean)
+    @cleaned ||= {}
+    clean_sheet(default_sheet) unless @cleaned[default_sheet]
+  end
+
+  def search_or_set_header(options)
+    if options[:header_search]
+      @headers = nil
+      @header_line = row_with(options[:header_search])
+    elsif [:first_row, true].include?(options[:headers])
+      @headers = []
+      row(first_row).each_with_index { |x, i| @headers << [x, i + 1] }
+    else
+      set_headers(options)
+    end
+  end
+
+  def local_filename(filename, tmpdir, packed)
+    return if is_stream?(filename)
+    filename = download_uri(filename, tmpdir) if uri?(filename)
+    filename = unzip(filename, tmpdir) if packed == :zip
+
+    fail IOError, "file #{filename} does not exist" unless File.file?(filename)
+
+    filename
+  end
+
+  def file_type_warning_message(filename, exts)
+    *rest, last_ext = exts
+    ext_list = rest.any? ? "#{rest.join(', ')} or #{last_ext}" : last_ext
+    "use #{Roo::CLASS_FOR_EXTENSION.fetch(last_ext.sub('.', '').to_sym)}.new to handle #{ext_list} spreadsheet files. This has #{File.extname(filename).downcase}"
+  rescue KeyError
+    raise "unknown file types: #{ext_list}"
+  end
+
+  def find_by_row(row_index)
+    row_index += (header_line - 1) if @header_line
+
+    row(row_index).size.times.map do |cell_index|
+      cell(row_index, cell_index + 1)
+    end
+  end
+
+  def find_by_conditions(options)
+    rows = first_row.upto(last_row)
+    header_for = Hash[1.upto(last_column).map do |col|
+      [col, cell(@header_line, col)]
+    end]
+
+    # are all conditions met?
+    conditions = options[:conditions]
+    if conditions && !conditions.empty?
+      column_with = header_for.invert
+      rows = rows.select do |i|
+        conditions.all? { |key, val| cell(i, column_with[key]) == val }
+      end
+    end
+
+    if options[:array]
+      rows.map { |i| row(i) }
+    else
+      rows.map do |i|
+        Hash[1.upto(row(i).size).map do |j|
+          [header_for.fetch(j), cell(i, j)]
+        end]
+      end
+    end
+  end
+
+  def without_changing_default_sheet
+    original_default_sheet = default_sheet
+    yield
+  ensure
+    self.default_sheet = original_default_sheet
+  end
+
+  def reinitialize
+    initialize(@filename)
+  end
+
+  def find_basename(filename)
+    if uri?(filename)
+      require 'uri'
+      uri = URI.parse filename
+      File.basename(uri.path)
+    elsif !is_stream?(filename)
+      File.basename(filename)
+    end
+  end
+
+  def make_tmpdir(prefix = nil, root = nil, &block)
+    warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead'
+    prefix = "#{Roo::TEMP_PREFIX}#{prefix}"
+    root ||= ENV['ROO_TMP']
+
+    if block_given?
+      # folder is deleted at end of block
+      ::Dir.mktmpdir(prefix, root, &block)
+    else
+      self.class.make_tempdir(self, prefix, root)
+    end
+  end
+
+  def clean_sheet(sheet)
+    read_cells(sheet)
+    @cell[sheet].each_pair do |coord, value|
+      @cell[sheet][coord] = sanitize_value(value) if value.is_a?(::String)
+    end
+    @cleaned[sheet] = true
+  end
+
+  def sanitize_value(v)
+    v.gsub(/[[:cntrl:]]|^[\p{Space}]+|[\p{Space}]+$/, '')
+  end
+
+  def set_headers(hash = {})
+    # try to find header row with all values or give an error
+    # then create new hash by indexing strings and keeping integers for header array
+    @headers = row_with(hash.values, true)
+    @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
+  end
+
+  def header_index(query)
+    row(@header_line).index(query) + first_column
+  end
+
+  def set_value(row, col, value, sheet = default_sheet)
+    @cell[sheet][[row, col]] = value
+  end
+
+  def set_type(row, col, type, sheet = default_sheet)
+    @cell_type[sheet][[row, col]] = type
+  end
+
+  # converts cell coordinate to numeric values of row,col
+  def normalize(row, col)
+    if row.is_a?(::String)
+      if col.is_a?(::Integer)
+        # ('A',1):
+        # ('B', 5) -> (5, 2)
+        row, col = col, row
+      else
+        fail ArgumentError
+      end
+    end
+
+    col = ::Roo::Utils.letter_to_number(col) if col.is_a?(::String)
+
+    [row, col]
+  end
+
+  def uri?(filename)
+    filename.start_with?('http://', 'https://', 'ftp://')
+  rescue
+    false
+  end
+
+  def download_uri(uri, tmpdir)
+    require 'open-uri'
+    tempfilename = File.join(tmpdir, find_basename(uri))
+    begin
+      File.open(tempfilename, 'wb') do |file|
+        open(uri, 'User-Agent' => "Ruby/#{RUBY_VERSION}") do |net|
+          file.write(net.read)
+        end
+      end
+    rescue OpenURI::HTTPError
+      raise "could not open #{uri}"
+    end
+    tempfilename
+  end
+
+  def open_from_stream(stream, tmpdir)
+    tempfilename = File.join(tmpdir, 'spreadsheet')
+    File.open(tempfilename, 'wb') do |file|
+      file.write(stream[7..-1])
+    end
+    File.join(tmpdir, 'spreadsheet')
+  end
+
+  def unzip(filename, tmpdir)
+    require 'zip/filesystem'
+
+    Zip::File.open(filename) do |zip|
+      process_zipfile_packed(zip, tmpdir)
+    end
+  end
+
+  # check if default_sheet was set and exists in sheets-array
+  def validate_sheet!(sheet)
+    case sheet
+    when nil
+      fail ArgumentError, "Error: sheet 'nil' not valid"
+    when Integer
+      sheets.fetch(sheet - 1) do
+        fail RangeError, "sheet index #{sheet} not found"
+      end
+    when String
+      unless sheets.include?(sheet)
+        fail RangeError, "sheet '#{sheet}' not found"
+      end
+    else
+      fail TypeError, "not a valid sheet type: #{sheet.inspect}"
+    end
+  end
+
+  def process_zipfile_packed(zip, tmpdir, path = '')
+    if zip.file.file? path
+      # extract and return filename
+      File.open(File.join(tmpdir, path), 'wb') do |file|
+        file.write(zip.read(path))
+      end
+      File.join(tmpdir, path)
+    else
+      ret = nil
+      path += '/' unless path.empty?
+      zip.dir.foreach(path) do |filename|
+        ret = process_zipfile_packed(zip, tmpdir, path + filename)
+      end
+      ret
+    end
+  end
+end
diff --git a/lib/roo/constants.rb b/lib/roo/constants.rb
new file mode 100644
index 0000000..90d54ee
--- /dev/null
+++ b/lib/roo/constants.rb
@@ -0,0 +1,5 @@
+module Roo
+  ROO_EXCEL_NOTICE   = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze
+  ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze
+  ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze
+end
+\ No newline at end of file
diff --git a/lib/roo/csv.rb b/lib/roo/csv.rb
new file mode 100644
index 0000000..c161c64
--- /dev/null
+++ b/lib/roo/csv.rb
@@ -0,0 +1,127 @@
+require "csv"
+require "time"
+
+# The CSV class can read csv files (must be separated with commas) which then
+# can be handled like spreadsheets. This means you can access cells like A5
+# within these files.
+# The CSV class provides only string objects. If you want conversions to other
+# types you have to do it yourself.
+#
+# You can pass options to the underlying CSV parse operation, via the
+# :csv_options option.
+module Roo
+  class CSV < Roo::Base
+    attr_reader :filename
+
+    # Returns an array with the names of the sheets. In CSV class there is only
+    # one dummy sheet, because a csv file cannot have more than one sheet.
+    def sheets
+      ["default"]
+    end
+
+    def cell(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      @cell[normalize(row, col)]
+    end
+
+    def celltype(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      @cell_type[normalize(row, col)]
+    end
+
+    def cell_postprocessing(_row, _col, value)
+      value
+    end
+
+    def csv_options
+      @options[:csv_options] || {}
+    end
+
+    def set_value(row, col, value, _sheet)
+      @cell[[row, col]] = value
+    end
+
+    def set_type(row, col, type, _sheet)
+      @cell_type[[row, col]] = type
+    end
+
+    private
+
+    TYPE_MAP = {
+      String => :string,
+      Float => :float,
+      Date => :date,
+      DateTime => :datetime,
+    }
+
+    def celltype_class(value)
+      TYPE_MAP[value.class]
+    end
+
+    def read_cells(sheet = default_sheet)
+      sheet ||= default_sheet
+      return if @cells_read[sheet]
+      set_row_count(sheet)
+      set_column_count(sheet)
+      row_num = 1
+
+      each_row csv_options do |row|
+        row.each_with_index do |elem, col_num|
+          coordinate = [row_num, col_num + 1]
+          @cell[coordinate] = elem
+          @cell_type[coordinate] = celltype_class(elem)
+        end
+        row_num += 1
+      end
+
+      @cells_read[sheet] = true
+    end
+
+    def each_row(options, &block)
+      if uri?(filename)
+        each_row_using_temp_dir(filename)
+      elsif is_stream?(filename_or_stream)
+        ::CSV.new(filename_or_stream, options).each(&block)
+      else
+        ::CSV.foreach(filename, options, &block)
+      end
+    end
+
+    def each_row_using_tempdir
+      ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir|
+        tmp_filename = download_uri(filename, tmpdir)
+        ::CSV.foreach(tmp_filename, options, &block)
+      end
+    end
+
+    def set_row_count(sheet)
+      @first_row[sheet] = 1
+      @last_row[sheet] = ::CSV.readlines(@filename, csv_options).size
+      @last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero?
+
+      nil
+    end
+
+    def set_column_count(sheet)
+      @first_column[sheet] = 1
+      @last_column[sheet] = (::CSV.readlines(@filename, csv_options).first || []).size
+      @last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero?
+
+      nil
+    end
+
+    def clean_sheet(sheet)
+      read_cells(sheet)
+
+      @cell.each_pair do |coord, value|
+        @cell[coord] = sanitize_value(value) if value.is_a?(::String)
+      end
+
+      @cleaned[sheet] = true
+    end
+
+    alias_method :filename_or_stream, :filename
+  end
+end
diff --git a/lib/roo/errors.rb b/lib/roo/errors.rb
new file mode 100644
index 0000000..9a736b8
--- /dev/null
+++ b/lib/roo/errors.rb
@@ -0,0 +1,11 @@
+module Roo
+  # A base error class for Roo. Most errors thrown by Roo should inherit from
+  # this class.
+  class Error < StandardError; end
+
+  # Raised when Roo cannot find a header row that matches the given column
+  # name(s).
+  class HeaderRowNotFoundError < Error; end
+
+  class FileNotFound < Error; end
+end
diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb
new file mode 100644
index 0000000..82c1431
--- /dev/null
+++ b/lib/roo/excelx.rb
@@ -0,0 +1,448 @@
+require 'nokogiri'
+require 'zip/filesystem'
+require 'roo/link'
+require 'roo/tempdir'
+require 'roo/utils'
+require 'forwardable'
+require 'set'
+
+module Roo
+  class Excelx < Roo::Base
+    extend Roo::Tempdir
+    extend Forwardable
+
+    ERROR_VALUES = %w(#N/A #REF! #NAME? #DIV/0! #NULL! #VALUE! #NUM!).to_set
+
+    require 'roo/excelx/shared'
+    require 'roo/excelx/workbook'
+    require 'roo/excelx/shared_strings'
+    require 'roo/excelx/styles'
+    require 'roo/excelx/cell'
+    require 'roo/excelx/sheet'
+    require 'roo/excelx/relationships'
+    require 'roo/excelx/comments'
+    require 'roo/excelx/sheet_doc'
+    require 'roo/excelx/coordinate'
+    require 'roo/excelx/format'
+
+    delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
+    ExceedsMaxError = Class.new(StandardError)
+
+    # initialization and opening of a spreadsheet file
+    # values for packed: :zip
+    # optional cell_max (int) parameter for early aborting attempts to parse
+    # enormous documents.
+    def initialize(filename_or_stream, options = {})
+      packed = options[:packed]
+      file_warning = options.fetch(:file_warning, :error)
+      cell_max = options.delete(:cell_max)
+      sheet_options = {}
+      sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
+      sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
+
+      unless is_stream?(filename_or_stream)
+        file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
+        basename = find_basename(filename_or_stream)
+      end
+
+      # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
+      #       when the object is garbage collected. Initially, the finalizer was
+      #       created in the Roo::Tempdir module, but that led to a segfault
+      #       when testing in Ruby 2.4.0.
+      @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
+      ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
+
+      @shared = Shared.new(@tmpdir)
+      @filename = local_filename(filename_or_stream, @tmpdir, packed)
+      process_zipfile(@filename || filename_or_stream)
+
+      @sheet_names = workbook.sheets.map do |sheet|
+        unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
+          sheet['name']
+        end
+      end.compact
+      @sheets = []
+      @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
+        @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
+        [sheet_name, @sheets[n]]
+      end]
+
+      if cell_max
+        cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
+        raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
+      end
+
+      super
+    rescue
+      self.class.finalize_tempdirs(object_id)
+      raise
+    end
+
+    def method_missing(method, *args)
+      if (label = workbook.defined_names[method.to_s])
+        safe_send(sheet_for(label.sheet).cells[label.key], :value)
+      else
+        # call super for methods like #a1
+        super
+      end
+    end
+
+    def sheets
+      @sheet_names
+    end
+
+    def sheet_for(sheet)
+      sheet ||= default_sheet
+      validate_sheet!(sheet)
+      @sheets_by_name[sheet]
+    end
+
+    # Returns the content of a spreadsheet-cell.
+    # (1,1) is the upper left corner.
+    # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
+    # cell at the first line and first row.
+    def cell(row, col, sheet = nil)
+      key = normalize(row, col)
+      safe_send(sheet_for(sheet).cells[key], :value)
+    end
+
+    def row(rownumber, sheet = nil)
+      sheet_for(sheet).row(rownumber)
+    end
+
+    # returns all values in this column as an array
+    # column numbers are 1,2,3,... like in the spreadsheet
+    def column(column_number, sheet = nil)
+      if column_number.is_a?(::String)
+        column_number = ::Roo::Utils.letter_to_number(column_number)
+      end
+      sheet_for(sheet).column(column_number)
+    end
+
+    # returns the number of the first non-empty row
+    def first_row(sheet = nil)
+      sheet_for(sheet).first_row
+    end
+
+    # returns the number of the last non-empty row
+    def last_row(sheet = nil)
+      sheet_for(sheet).last_row
+    end
+
+    # returns the number of the first non-empty column
+    def first_column(sheet = nil)
+      sheet_for(sheet).first_column
+    end
+
+    # returns the number of the last non-empty column
+    def last_column(sheet = nil)
+      sheet_for(sheet).last_column
+    end
+
+    # set a cell to a certain value
+    # (this will not be saved back to the spreadsheet file!)
+    def set(row, col, value, sheet = nil) #:nodoc:
+      key = normalize(row, col)
+      cell_type = cell_type_by_value(value)
+      sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Coordinate.new(row, col))
+    end
+
+    # Returns the formula at (row,col).
+    # Returns nil if there is no formula.
+    # The method #formula? checks if there is a formula.
+    def formula(row, col, sheet = nil)
+      key = normalize(row, col)
+      safe_send(sheet_for(sheet).cells[key], :formula)
+    end
+
+    # Predicate methods really should return a boolean
+    # value. Hopefully no one was relying on the fact that this
+    # previously returned either nil/formula
+    def formula?(*args)
+      !!formula(*args)
+    end
+
+    # returns each formula in the selected sheet as an array of tuples in following format
+    # [[row, col, formula], [row, col, formula],...]
+    def formulas(sheet = nil)
+      sheet_for(sheet).cells.select { |_, cell| cell.formula }.map do |(x, y), cell|
+        [x, y, cell.formula]
+      end
+    end
+
+    # Given a cell, return the cell's style
+    def font(row, col, sheet = nil)
+      key = normalize(row, col)
+      definition_index = safe_send(sheet_for(sheet).cells[key], :style)
+      styles.definitions[definition_index] if definition_index
+    end
+
+    # returns the type of a cell:
+    # * :float
+    # * :string,
+    # * :date
+    # * :percentage
+    # * :formula
+    # * :time
+    # * :datetime
+    def celltype(row, col, sheet = nil)
+      key = normalize(row, col)
+      safe_send(sheet_for(sheet).cells[key], :type)
+    end
+
+    # returns the internal type of an excel cell
+    # * :numeric_or_formula
+    # * :string
+    # Note: this is only available within the Excelx class
+    def excelx_type(row, col, sheet = nil)
+      key = normalize(row, col)
+      safe_send(sheet_for(sheet).cells[key], :cell_type)
+    end
+
+    # returns the internal value of an excelx cell
+    # Note: this is only available within the Excelx class
+    def excelx_value(row, col, sheet = nil)
+      key = normalize(row, col)
+      safe_send(sheet_for(sheet).cells[key], :cell_value)
+    end
+
+    # returns the internal value of an excelx cell
+    # Note: this is only available within the Excelx class
+    def formatted_value(row, col, sheet = nil)
+      key = normalize(row, col)
+      safe_send(sheet_for(sheet).cells[key], :formatted_value)
+    end
+
+    # returns the internal format of an excel cell
+    def excelx_format(row, col, sheet = nil)
+      key = normalize(row, col)
+      sheet_for(sheet).excelx_format(key)
+    end
+
+    def empty?(row, col, sheet = nil)
+      sheet = sheet_for(sheet)
+      key = normalize(row, col)
+      cell = sheet.cells[key]
+      !cell || cell.empty? ||
+        (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
+    end
+
+    # shows the internal representation of all cells
+    # for debugging purposes
+    def to_s(sheet = nil)
+      sheet_for(sheet).cells.inspect
+    end
+
+    # returns the row,col values of the labelled cell
+    # (nil,nil) if label is not defined
+    def label(name)
+      labels = workbook.defined_names
+      return [nil, nil, nil] if labels.empty? || !labels.key?(name)
+
+      [labels[name].row, labels[name].col, labels[name].sheet]
+    end
+
+    # Returns an array which all labels. Each element is an array with
+    # [labelname, [row,col,sheetname]]
+    def labels
+      @labels ||= workbook.defined_names.map do |name, label|
+        [
+          name,
+          [label.row, label.col, label.sheet]
+        ]
+      end
+    end
+
+    def hyperlink?(row, col, sheet = nil)
+      !!hyperlink(row, col, sheet)
+    end
+
+    # returns the hyperlink at (row/col)
+    # nil if there is no hyperlink
+    def hyperlink(row, col, sheet = nil)
+      key = normalize(row, col)
+      sheet_for(sheet).hyperlinks[key]
+    end
+
+    # returns the comment at (row/col)
+    # nil if there is no comment
+    def comment(row, col, sheet = nil)
+      key = normalize(row, col)
+      sheet_for(sheet).comments[key]
+    end
+
+    # true, if there is a comment
+    def comment?(row, col, sheet = nil)
+      !!comment(row, col, sheet)
+    end
+
+    def comments(sheet = nil)
+      sheet_for(sheet).comments.map do |(x, y), comment|
+        [x, y, comment]
+      end
+    end
+
+    # Yield an array of Excelx::Cell
+    # Takes options for sheet, pad_cells, and max_rows
+    def each_row_streaming(options = {})
+      sheet = sheet_for(options.delete(:sheet))
+      if block_given?
+        sheet.each_row(options) { |row| yield row }
+      else
+        sheet.to_enum(:each_row, options)
+      end
+    end
+
+    private
+
+    def clean_sheet(sheet)
+      @sheets_by_name[sheet].cells.each_pair do |coord, value|
+        next unless value.value.is_a?(::String)
+
+        @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
+      end
+
+      @cleaned[sheet] = true
+    end
+
+    # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
+    #           documents require a workbook.xml file, so a if the file is missing
+    #           it is not a valid xlsx file. In these cases, an ArgumentError is
+    #           raised.
+    #
+    # wb - a Zip::Entry for the workbook.xml file.
+    # path - A String for Zip::Entry's destination path.
+    #
+    # Examples
+    #
+    #   extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
+    #   # => ["rId1", "rId2", "rId3"]
+    #
+    # Returns an Array of Strings.
+    def extract_worksheet_ids(entries, path)
+      wb = entries.find { |e| e.name[/workbook.xml$/] }
+      fail ArgumentError 'missing required workbook file' if wb.nil?
+
+      wb.extract(path)
+      workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
+      workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
+    end
+
+    # Internal
+    #
+    # wb_rels - A Zip::Entry for the workbook.xml.rels file.
+    # path - A String for the Zip::Entry's destination path.
+    #
+    # Examples
+    #
+    #   extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
+    #   # => {
+    #         "rId1"=>"worksheets/sheet1.xml",
+    #         "rId2"=>"worksheets/sheet2.xml",
+    #         "rId3"=>"worksheets/sheet3.xml"
+    #        }
+    #
+    # Returns a Hash.
+    def extract_worksheet_rels(entries, path)
+      wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
+      fail ArgumentError 'missing required workbook file' if wb_rels.nil?
+
+      wb_rels.extract(path)
+      rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
+      worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
+
+      relationships = rels_doc.xpath('//Relationship').select do |relationship|
+        relationship.attributes['Type'].value == worksheet_type
+      end
+
+      relationships.inject({}) do |hash, relationship|
+        attributes = relationship.attributes
+        id = attributes['Id']
+        hash[id.value] = attributes['Target'].value
+        hash
+      end
+    end
+
+    # Extracts the sheets in order, but it will ignore sheets that are not
+    # worksheets.
+    def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
+      (sheet_ids & sheets.keys).each_with_index do |id, i|
+        name = sheets[id]
+        entry = entries.find { |e| "/#{e.name}" =~ /#{name}$/ }
+        path = "#{tmpdir}/roo_sheet#{i + 1}"
+        sheet_files << path
+        @sheet_files << path
+        entry.extract(path)
+      end
+    end
+
+    # Extracts all needed files from the zip file
+    def process_zipfile(zipfilename_or_stream)
+      @sheet_files = []
+
+      unless is_stream?(zipfilename_or_stream)
+        zip_file = Zip::File.open(zipfilename_or_stream)
+      else
+        zip_file = Zip::CentralDirectory.new
+        zip_file.read_from_stream zipfilename_or_stream
+      end
+
+      process_zipfile_entries zip_file.to_a.sort_by(&:name)
+    end
+
+    def process_zipfile_entries(entries)
+      # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
+      #       are not in order. With Numbers 3.1, the first sheet is always
+      #       sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
+      #       independent of a worksheet's filename (i.e. sheet6.xml can be the
+      #       first worksheet).
+      #
+      #       workbook.xml lists the correct order of worksheets and
+      #       workbook.xml.rels lists the filenames for those worksheets.
+      #
+      #       workbook.xml:
+      #         <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
+      #         <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
+      #       workbook.xml.rel:
+      #         <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
+      #         <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
+      sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
+      sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
+      extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
+
+      entries.each do |entry|
+        path =
+        case entry.name.downcase
+        when /sharedstrings.xml$/
+          "#{@tmpdir}/roo_sharedStrings.xml"
+        when /styles.xml$/
+          "#{@tmpdir}/roo_styles.xml"
+        when /comments([0-9]+).xml$/
+          # FIXME: Most of the time, The order of the comment files are the same
+          #       the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
+          #       In some situations, this isn't true. The true location of a
+          #       sheet's comment file is in the sheet1.xml.rels file. SEE
+          #       ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
+          nr = Regexp.last_match[1].to_i
+          comments_files[nr - 1] = "#{@tmpdir}/roo_comments#{nr}"
+        when %r{chartsheets/_rels/sheet([0-9]+).xml.rels$}
+          # NOTE: Chart sheet relationship files were interfering with
+          #       worksheets.
+          nil
+        when /sheet([0-9]+).xml.rels$/
+          # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
+          #        it also stores the location for sharedStrings, comments,
+          #        drawings, etc.
+          nr = Regexp.last_match[1].to_i
+          rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
+        end
+
+        entry.extract(path) if path
+      end
+    end
+
+    def safe_send(object, method, *args)
+      object.send(method, *args) if object && object.respond_to?(method)
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb
new file mode 100644
index 0000000..2fc78e1
--- /dev/null
+++ b/lib/roo/excelx/cell.rb
@@ -0,0 +1,106 @@
+require 'date'
+require 'roo/excelx/cell/base'
+require 'roo/excelx/cell/boolean'
+require 'roo/excelx/cell/datetime'
+require 'roo/excelx/cell/date'
+require 'roo/excelx/cell/empty'
+require 'roo/excelx/cell/number'
+require 'roo/excelx/cell/string'
+require 'roo/excelx/cell/time'
+
+module Roo
+  class Excelx
+    class Cell
+      attr_reader :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
+      attr_writer :value
+
+      # DEPRECATED: Please use Cell.create_cell instead.
+      def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
+        warn '[DEPRECATION] `Cell.new` is deprecated.  Please use `Cell.create_cell` instead.'
+        @type = type
+        @formula = formula
+        @base_date = base_date if [:date, :datetime].include?(@type)
+        @excelx_type = excelx_type
+        @excelx_value = excelx_value
+        @style = style
+        @value = type_cast_value(value)
+        @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
+        @coordinate = coordinate
+      end
+
+      def type
+        case
+        when @formula
+          :formula
+        when @value.is_a?(Roo::Link)
+          :link
+        else
+          @type
+        end
+      end
+
+      def self.create_cell(type, *values)
+        case type
+        when :string
+          Cell::String.new(*values)
+        when :boolean
+          Cell::Boolean.new(*values)
+        when :number
+          Cell::Number.new(*values)
+        when :date
+          Cell::Date.new(*values)
+        when :datetime
+          Cell::DateTime.new(*values)
+        when :time
+          Cell::Time.new(*values)
+        end
+      end
+
+      # Deprecated: use Roo::Excelx::Coordinate instead.
+      class Coordinate
+        attr_accessor :row, :column
+
+        def initialize(row, column)
+          warn '[DEPRECATION] `Roo::Excel::Cell::Coordinate` is deprecated.  Please use `Roo::Excelx::Coordinate` instead.'
+          @row, @column = row, column
+        end
+      end
+
+      private
+
+      def type_cast_value(value)
+        case @type
+        when :float, :percentage
+          value.to_f
+        when :date
+          create_date(@base_date + value.to_i)
+        when :datetime
+          create_datetime(@base_date + value.to_f.round(6))
+        when :time
+          value.to_f * 86_400
+        else
+          value
+        end
+      end
+
+      def create_date(date)
+        yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
+
+        ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+      end
+
+      def create_datetime(date)
+        datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
+        t = round_datetime(datetime_string)
+
+        ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
+      end
+
+      def round_datetime(datetime_string)
+        /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
+
+        ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb
new file mode 100644
index 0000000..aea8808
--- /dev/null
+++ b/lib/roo/excelx/cell/base.rb
@@ -0,0 +1,94 @@
+module Roo
+  class Excelx
+    class Cell
+      class Base
+        attr_reader :cell_type, :cell_value, :value
+
+        # FIXME: I think style should be deprecated. Having a style attribute
+        #        for a cell doesn't really accomplish much. It seems to be used
+        #        when you want to export to excelx.
+        attr_reader :style
+
+
+        # FIXME: Updating a cell's value should be able tochange the cell's type,
+        #        but that isn't currently possible. This will cause weird bugs
+        #        when one changes the value of a Number cell to a String. e.g.
+        #
+        #           cell = Cell::Number(*args)
+        #           cell.value = 'Hello'
+        #           cell.formatted_value # => Some unexpected value
+        #
+        #        Here are two possible solutions to such issues:
+        #        1. Don't allow a cell's value to be updated. Use a method like
+        #          `Sheet.update_cell` instead. The simple solution.
+        #        2. When `cell.value = ` is called, use injection to try and
+        #           change the type of cell on the fly. But deciding what type
+        #           of value to pass to `cell.value=`. isn't always obvious. e.g.
+        #           `cell.value = Time.now` should convert a cell to a DateTime,
+        #           not a Time cell. Time cells would be hard to recognize because
+        #           they are integers. This approach would require a significant
+        #           change to the code as written. The complex solution.
+        #
+        #        If the first solution is used, then this method should be
+        #        deprecated.
+        attr_writer :value
+
+        def initialize(value, formula, excelx_type, style, link, coordinate)
+          @link = !!link
+          @cell_value = value
+          @cell_type = excelx_type
+          @formula = formula
+          @style = style
+          @coordinate = coordinate
+          @type = :base
+          @value = link? ? Roo::Link.new(link, value) : value
+        end
+
+        def type
+          if formula?
+            :formula
+          elsif link?
+            :link
+          else
+            @type
+          end
+        end
+
+        def formula?
+          !!@formula
+        end
+
+        def link?
+          !!@link
+        end
+
+        alias_method :formatted_value, :value
+
+        def to_s
+          formatted_value
+        end
+
+        # DEPRECATED: Please use link instead.
+        def hyperlink
+          warn '[DEPRECATION] `hyperlink` is deprecated.  Please use `link` instead.'
+        end
+
+        # DEPRECATED: Please use cell_value instead.
+        def excelx_value
+          warn '[DEPRECATION] `excelx_value` is deprecated.  Please use `cell_value` instead.'
+          cell_value
+        end
+
+        # DEPRECATED: Please use cell_type instead.
+        def excelx_type
+          warn '[DEPRECATION] `excelx_type` is deprecated.  Please use `cell_type` instead.'
+          cell_type
+        end
+
+        def empty?
+          false
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/boolean.rb b/lib/roo/excelx/cell/boolean.rb
new file mode 100644
index 0000000..fe1f691
--- /dev/null
+++ b/lib/roo/excelx/cell/boolean.rb
@@ -0,0 +1,27 @@
+module Roo
+  class Excelx
+    class Cell
+      class Boolean < Cell::Base
+        attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+
+        def initialize(value, formula, style, link, coordinate)
+          super(value, formula, nil, style, link, coordinate)
+          @type = @cell_type = :boolean
+          @value = link? ? Roo::Link.new(link, value) : create_boolean(value)
+        end
+
+        def formatted_value
+          value ? 'TRUE'.freeze : 'FALSE'.freeze
+        end
+
+        private
+
+        def create_boolean(value)
+          # FIXME: Using a boolean will cause methods like Base#to_csv to fail.
+          #       Roo is using some method to ignore false/nil values.
+          value.to_i == 1 ? true : false
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/date.rb b/lib/roo/excelx/cell/date.rb
new file mode 100644
index 0000000..8e2c6cb
--- /dev/null
+++ b/lib/roo/excelx/cell/date.rb
@@ -0,0 +1,28 @@
+require 'date'
+
+module Roo
+  class Excelx
+    class Cell
+      class Date < Roo::Excelx::Cell::DateTime
+        attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+
+        def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
+          # NOTE: Pass all arguments to the parent class, DateTime.
+          super
+          @type = :date
+          @format = excelx_type.last
+          @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value)
+        end
+
+        private
+
+        def create_date(base_date, value)
+          date = base_date + value.to_i
+          yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
+
+          ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/datetime.rb b/lib/roo/excelx/cell/datetime.rb
new file mode 100644
index 0000000..35d93ac
--- /dev/null
+++ b/lib/roo/excelx/cell/datetime.rb
@@ -0,0 +1,111 @@
+require 'date'
+
+module Roo
+  class Excelx
+    class Cell
+      class DateTime < Cell::Base
+        attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+
+        def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
+          super(value, formula, excelx_type, style, link, coordinate)
+          @type = :datetime
+          @format = excelx_type.last
+          @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
+        end
+
+        # Public: Returns formatted value for a datetime. Format's can be an
+        #         standard excel format, or a custom format.
+        #
+        #         Standard formats follow certain conventions. Date fields for
+        #         days, months, and years are separated with hyhens or
+        #         slashes ("-", /") (e.g. 01-JAN, 1/13/15). Time fields for
+        #         hours, minutes, and seconds are separated with a colon (e.g.
+        #         12:45:01).
+        #
+        #         If a custom format follows those conventions, then the custom
+        #         format will be used for the a cell's formatted value.
+        #         Otherwise, the formatted value will be in the following
+        #         format: 'YYYY-mm-dd HH:MM:SS' (e.g. "2015-07-10 20:33:15").
+        #
+        # Examples
+        #    formatted_value #=> '01-JAN'
+        #
+        # Returns a String representation of a cell's value.
+        def formatted_value
+          formatter = @format.downcase.split(' ').map do |part|
+            if (parsed_format = parse_date_or_time_format(part))
+              parsed_format
+            else
+              warn 'Unable to parse custom format. Using "YYYY-mm-dd HH:MM:SS" format.'
+              return @value.strftime('%F %T')
+            end
+          end.join(' ')
+
+          @value.strftime(formatter)
+        end
+
+        private
+
+        def parse_date_or_time_format(part)
+          date_regex = /(?<date>[dmy]+[\-\/][dmy]+([\-\/][dmy]+)?)/
+          time_regex = /(?<time>(\[?[h]\]?+:)?[m]+(:?ss|:?s)?)/
+
+          if part[date_regex] == part
+            formats = DATE_FORMATS
+          elsif part[time_regex]
+            formats = TIME_FORMATS
+          else
+            return false
+          end
+
+          part.gsub(/#{formats.keys.join('|')}/, formats)
+        end
+
+        DATE_FORMATS = {
+          'yyyy' => '%Y',  # Year: 2000
+          'yy' => '%y',    # Year: 00
+          # mmmmm => J-D
+          'mmmm' => '%B',  # Month: January
+          'mmm' => '%^b',   # Month: JAN
+          'mm' => '%m',    # Month: 01
+          'm' => '%-m',    # Month: 1
+          'dddd' => '%A',  # Day of the Week: Sunday
+          'ddd' => '%^a',   # Day of the Week: SUN
+          'dd' => '%d',    # Day of the Month: 01
+          'd' => '%-d'    # Day of the Month: 1
+          # '\\\\'.freeze => ''.freeze,  # NOTE: Fixes a custom format's output.
+        }
+
+        TIME_FORMATS = {
+          'hh' => '%H',    # Hour (24): 01
+          'h' => '%-k'.freeze,    # Hour (24): 1
+          # 'hh'.freeze => '%I'.freeze,    # Hour (12): 08
+          # 'h'.freeze => '%-l'.freeze,    # Hour (12): 8
+          'mm' => '%M',    # Minute: 01
+          # FIXME: is this used? Seems like 'm' is used for month, not minute.
+          'm' => '%-M',    # Minute: 1
+          'ss' => '%S',    # Seconds: 01
+          's' => '%-S',    # Seconds: 1
+          'am/pm' => '%p', # Meridian: AM
+          '000' => '%3N',  # Fractional Seconds: thousandth.
+          '00' => '%2N',   # Fractional Seconds: hundredth.
+          '0' => '%1N'    # Fractional Seconds: tenths.
+        }
+
+        def create_datetime(base_date, value)
+          date = base_date + value.to_f.round(6)
+          datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
+          t = round_datetime(datetime_string)
+
+          ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
+        end
+
+        def round_datetime(datetime_string)
+          /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
+
+          ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/empty.rb b/lib/roo/excelx/cell/empty.rb
new file mode 100644
index 0000000..49a20e7
--- /dev/null
+++ b/lib/roo/excelx/cell/empty.rb
@@ -0,0 +1,19 @@
+
+module Roo
+  class Excelx
+    class Cell
+      class Empty < Cell::Base
+        attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate
+
+        def initialize(coordinate)
+          @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil
+          @coordinate = coordinate
+        end
+
+        def empty?
+          true
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb
new file mode 100644
index 0000000..2015562
--- /dev/null
+++ b/lib/roo/excelx/cell/number.rb
@@ -0,0 +1,87 @@
+module Roo
+  class Excelx
+    class Cell
+      class Number < Cell::Base
+        attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+
+        def initialize(value, formula, excelx_type, style, link, coordinate)
+          super
+          # FIXME: change @type to number. This will break brittle tests.
+          # FIXME: Excelx_type is an array, but the first value isn't used.
+          @type = :float
+          @format = excelx_type.last
+          @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
+        end
+
+        def create_numeric(number)
+          return number if Excelx::ERROR_VALUES.include?(number)
+          case @format
+          when /%/
+            Float(number)
+          when /\.0/
+            Float(number)
+          else
+            (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number)
+          end
+        end
+
+        def formatted_value
+          return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
+
+          formatter = formats[@format]
+          if formatter.is_a? Proc
+            formatter.call(@cell_value)
+          elsif zero_padded_number?
+            "%0#{@format.size}d" % @cell_value
+          else
+            Kernel.format(formatter, @cell_value)
+          end
+        end
+
+        def formats
+          # FIXME: numbers can be other colors besides red:
+          # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
+          {
+            'General' => '%.0f',
+            '0' => '%.0f',
+            '0.00' => '%.2f',
+            '0.000000' => '%.6f',
+            '#,##0' => number_format('%.0f'),
+            '#,##0.00' => number_format('%.2f'),
+            '0%' =>  proc do |number|
+              Kernel.format('%d%', number.to_f * 100)
+            end,
+            '0.00%' => proc do |number|
+              Kernel.format('%.2f%', number.to_f * 100)
+            end,
+            '0.00E+00' => '%.2E',
+            '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'),
+            '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'),
+            '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'),
+            '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'),
+            # FIXME: not quite sure what the format should look like in this case.
+            '##0.0E+0' => '%.1E',
+            '@' => proc { |number| number }
+          }
+        end
+
+        private
+
+        def number_format(formatter, negative_formatter = nil)
+          proc do |number|
+            if negative_formatter
+              formatter = number.to_i > 0 ? formatter : negative_formatter
+              number = number.to_f.abs
+            end
+
+            Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
+          end
+        end
+
+        def zero_padded_number?
+          @format[/0+/] == @format
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/string.rb b/lib/roo/excelx/cell/string.rb
new file mode 100644
index 0000000..7967806
--- /dev/null
+++ b/lib/roo/excelx/cell/string.rb
@@ -0,0 +1,19 @@
+module Roo
+  class Excelx
+    class Cell
+      class String < Cell::Base
+        attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+
+        def initialize(value, formula, style, link, coordinate)
+          super(value, formula, nil, style, link, coordinate)
+          @type = @cell_type = :string
+          @value = link? ? Roo::Link.new(link, value) : value
+        end
+
+        def empty?
+          value.empty?
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb
new file mode 100644
index 0000000..d661ab8
--- /dev/null
+++ b/lib/roo/excelx/cell/time.rb
@@ -0,0 +1,43 @@
+require 'date'
+
+module Roo
+  class Excelx
+    class Cell
+      class Time < Roo::Excelx::Cell::DateTime
+        attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+
+        def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
+          # NOTE: Pass all arguments to DateTime super class.
+          super
+          @type = :time
+          @format = excelx_type.last
+          @datetime = create_datetime(base_date, value)
+          @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
+        end
+
+        def formatted_value
+          formatter = @format.gsub(/#{TIME_FORMATS.keys.join('|')}/, TIME_FORMATS)
+          @datetime.strftime(formatter)
+        end
+
+        alias_method :to_s, :formatted_value
+
+        private
+
+        # def create_datetime(base_date, value)
+        #   date = base_date + value.to_f.round(6)
+        #   datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
+        #   t = round_datetime(datetime_string)
+        #
+        #   ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
+        # end
+
+        # def round_datetime(datetime_string)
+        #   /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
+        #
+        #   ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
+        # end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb
new file mode 100644
index 0000000..1a89908
--- /dev/null
+++ b/lib/roo/excelx/comments.rb
@@ -0,0 +1,55 @@
+require 'roo/excelx/extractor'
+
+module Roo
+  class Excelx
+    class Comments < Excelx::Extractor
+      def comments
+        @comments ||= extract_comments
+      end
+
+      private
+
+      def extract_comments
+        return {} unless doc_exists?
+
+        Hash[doc.xpath('//comments/commentList/comment').map do |comment|
+          value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
+          [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
+        end]
+      end
+    end
+  end
+end
+# xl/comments1.xml
+#   <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+#   <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+#     <authors>
+#       <author />
+#     </authors>
+#     <commentList>
+#       <comment ref="B4" authorId="0">
+#         <text>
+#           <r>
+#             <rPr>
+#               <sz val="10" />
+#               <rFont val="Arial" />
+#               <family val="2" />
+#             </rPr>
+#             <t>Comment for B4</t>
+#           </r>
+#         </text>
+#       </comment>
+#       <comment ref="B5" authorId="0">
+#         <text>
+#           <r>
+#             <rPr>
+#             <sz val="10" />
+#             <rFont val="Arial" />
+#             <family val="2" />
+#           </rPr>
+#             <t>Comment for B5</t>
+#           </r>
+#         </text>
+#       </comment>
+#     </commentList>
+#   </comments>
diff --git a/lib/roo/excelx/coordinate.rb b/lib/roo/excelx/coordinate.rb
new file mode 100644
index 0000000..53b24ba
--- /dev/null
+++ b/lib/roo/excelx/coordinate.rb
@@ -0,0 +1,12 @@
+module Roo
+  class Excelx
+    class Coordinate
+      attr_accessor :row, :column
+
+      def initialize(row, column)
+        @row = row
+        @column = column
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb
new file mode 100644
index 0000000..1cdd13b
--- /dev/null
+++ b/lib/roo/excelx/extractor.rb
@@ -0,0 +1,21 @@
+module Roo
+  class Excelx
+    class Extractor
+      def initialize(path)
+        @path = path
+      end
+
+      private
+
+      def doc
+        raise FileNotFound, "#{@path} file not found" unless doc_exists?
+
+        ::Roo::Utils.load_xml(@path).remove_namespaces!
+      end
+
+      def doc_exists?
+        @path && File.exist?(@path)
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/format.rb b/lib/roo/excelx/format.rb
new file mode 100644
index 0000000..72b36d9
--- /dev/null
+++ b/lib/roo/excelx/format.rb
@@ -0,0 +1,64 @@
+module Roo
+  class Excelx
+    module Format
+      EXCEPTIONAL_FORMATS = {
+        'h:mm am/pm' => :date,
+        'h:mm:ss am/pm' => :date
+      }
+
+      STANDARD_FORMATS = {
+        0 => 'General'.freeze,
+        1 => '0'.freeze,
+        2 => '0.00'.freeze,
+        3 => '#,##0'.freeze,
+        4 => '#,##0.00'.freeze,
+        9 => '0%'.freeze,
+        10 => '0.00%'.freeze,
+        11 => '0.00E+00'.freeze,
+        12 => '# ?/?'.freeze,
+        13 => '# ??/??'.freeze,
+        14 => 'mm-dd-yy'.freeze,
+        15 => 'd-mmm-yy'.freeze,
+        16 => 'd-mmm'.freeze,
+        17 => 'mmm-yy'.freeze,
+        18 => 'h:mm AM/PM'.freeze,
+        19 => 'h:mm:ss AM/PM'.freeze,
+        20 => 'h:mm'.freeze,
+        21 => 'h:mm:ss'.freeze,
+        22 => 'm/d/yy h:mm'.freeze,
+        37 => '#,##0 ;(#,##0)'.freeze,
+        38 => '#,##0 ;[Red](#,##0)'.freeze,
+        39 => '#,##0.00;(#,##0.00)'.freeze,
+        40 => '#,##0.00;[Red](#,##0.00)'.freeze,
+        45 => 'mm:ss'.freeze,
+        46 => '[h]:mm:ss'.freeze,
+        47 => 'mmss.0'.freeze,
+        48 => '##0.0E+0'.freeze,
+        49 => '@'.freeze
+      }
+
+      def to_type(format)
+        format = format.to_s.downcase
+        if (type = EXCEPTIONAL_FORMATS[format])
+          type
+        elsif format.include?('#')
+          :float
+        elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
+          if format.include?('h') || format.include?('s')
+            :datetime
+          else
+            :date
+          end
+        elsif format.include?('h') || format.include?('s')
+          :time
+        elsif format.include?('%')
+          :percentage
+        else
+          :float
+        end
+      end
+
+      module_function :to_type
+    end
+  end 
+end
diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb
new file mode 100644
index 0000000..8a0ed97
--- /dev/null
+++ b/lib/roo/excelx/relationships.rb
@@ -0,0 +1,25 @@
+require 'roo/excelx/extractor'
+
+module Roo
+  class Excelx
+    class Relationships < Excelx::Extractor
+      def [](index)
+        to_a[index]
+      end
+
+      def to_a
+        @relationships ||= extract_relationships
+      end
+
+      private
+
+      def extract_relationships
+        return [] unless doc_exists?
+
+        Hash[doc.xpath('/Relationships/Relationship').map do |rel|
+          [rel.attribute('Id').text, rel]
+        end]
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb
new file mode 100644
index 0000000..3677fa2
--- /dev/null
+++ b/lib/roo/excelx/shared.rb
@@ -0,0 +1,32 @@
+module Roo
+  class Excelx
+    # Public:  Shared class for allowing sheets to share data. This should
+    #          reduce memory usage and reduce the number of objects being passed
+    #          to various inititializers.
+    class Shared
+      attr_accessor :comments_files, :sheet_files, :rels_files
+      def initialize(dir)
+        @dir = dir
+        @comments_files = []
+        @sheet_files = []
+        @rels_files = []
+      end
+
+      def styles
+        @styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
+      end
+
+      def shared_strings
+        @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
+      end
+
+      def workbook
+        @workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
+      end
+
+      def base_date
+        workbook.base_date
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb
new file mode 100755
index 0000000..f7caf7c
--- /dev/null
+++ b/lib/roo/excelx/shared_strings.rb
@@ -0,0 +1,157 @@
+require 'roo/excelx/extractor'
+
+module Roo
+  class Excelx
+    class SharedStrings < Excelx::Extractor
+
+      COMMON_STRINGS = {
+        t: "t",
+        r: "r",
+        html_tag_open: "<html>",
+        html_tag_closed: "</html>"
+      }
+
+      def [](index)
+        to_a[index]
+      end
+
+      def to_a
+        @array ||= extract_shared_strings
+      end
+
+      def to_html
+        @html ||= extract_html
+      end
+
+      # Use to_html or to_a for html returns
+      # See what is happening with commit???
+      def use_html?(index)
+        to_html[index][/<([biu]|sup|sub)>/]
+      end
+
+      private
+
+      def fix_invalid_shared_strings(doc)
+        invalid = { '_x000D_'  => "\n" }
+        xml = doc.to_s
+        return doc unless xml[/#{invalid.keys.join('|')}/]
+
+        ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
+      end
+
+      def extract_shared_strings
+        return [] unless doc_exists?
+
+        document = fix_invalid_shared_strings(doc)
+        # read the shared strings xml document
+        document.xpath('/sst/si').map do |si|
+          shared_string = ''
+          si.children.each do |elem|
+            case elem.name
+            when 'r'
+              elem.children.each do |r_elem|
+                shared_string << r_elem.content if r_elem.name == 't'
+              end
+            when 't'
+              shared_string = elem.content
+            end
+          end
+          shared_string
+        end
+      end
+
+      def extract_html
+        return [] unless doc_exists?
+        fix_invalid_shared_strings(doc)
+        # read the shared strings xml document
+        doc.xpath('/sst/si').map do |si|
+          html_string = '<html>'
+          si.children.each do |elem|
+            case elem.name
+            when 'r'
+              html_string << extract_html_r(elem)
+            when 't'
+              html_string << elem.content
+            end # case elem.name
+          end # si.children.each do |elem|
+          html_string << '</html>'
+        end # doc.xpath('/sst/si').map do |si|
+      end # def extract_html
+
+      # The goal of this function is to take the following XML code snippet and create a html tag
+      # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
+      # {code:xml}
+      # <r>
+      #   <rPr>
+      #      <i/>
+      #      <b/>
+      #      <u/>
+      #      <vertAlign val="subscript"/>
+      #      <vertAlign val="superscript"/>
+      #   </rPr>
+      #   <t>TEXT</t>
+      # </r>
+      # {code}
+      #
+      # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
+      def extract_html_r(r_elem)
+        str = ''
+        xml_elems = {
+          sub: false,
+          sup: false,
+          b:   false,
+          i:   false,
+          u:   false
+        }
+        b, i, u, sub, sup = false, false, false, false, false
+        r_elem.children.each do |elem|
+          case elem.name
+          when 'rPr'
+            elem.children.each do |rPr_elem|
+              case rPr_elem.name
+              when 'b'
+                # set formatting for Bold to true
+                xml_elems[:b] = true
+              when 'i'
+                # set formatting for Italics to true
+                xml_elems[:i] = true
+              when 'u'
+                # set formatting for Underline to true
+                xml_elems[:u] = true
+              when 'vertAlign'
+                # See if the Vertical Alignment is subscript or superscript
+                case rPr_elem.xpath('@val').first.value
+                when 'subscript'
+                  # set formatting for Subscript to true and Superscript to false ... Can't have both
+                  xml_elems[:sub] = true
+                  xml_elems[:sup] = false
+                when 'superscript'
+                  # set formatting for Superscript to true and Subscript to false ... Can't have both
+                  xml_elems[:sup] = true
+                  xml_elems[:sub] = false
+                end
+              end
+            end
+          when 't'
+            str << create_html(elem.content, xml_elems)
+          end
+        end
+        str
+      end # extract_html_r
+
+      # This will return an html string
+      def create_html(text, formatting)
+        tmp_str = ''
+        formatting.each do |elem, val|
+          tmp_str << "<#{elem}>" if val
+        end
+        tmp_str << text
+        reverse_format = Hash[formatting.to_a.reverse]
+        reverse_format.each do |elem, val|
+          tmp_str << "</#{elem}>" if val
+        end
+        tmp_str
+      end
+    end # class SharedStrings < Excelx::Extractor
+  end # class Excelx
+end # module Roo
diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb
new file mode 100644
index 0000000..add92f0
--- /dev/null
+++ b/lib/roo/excelx/sheet.rb
@@ -0,0 +1,112 @@
+require 'forwardable'
+module Roo
+  class Excelx
+    class Sheet
+      extend Forwardable
+
+      delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
+
+      def initialize(name, shared, sheet_index, options = {})
+        @name = name
+        @shared = shared
+        @rels = Relationships.new(rels_files[sheet_index])
+        @comments = Comments.new(comments_files[sheet_index])
+        @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
+      end
+
+      def cells
+        @cells ||= @sheet.cells(@rels)
+      end
+
+      def present_cells
+        @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
+      end
+
+      # Yield each row as array of Excelx::Cell objects
+      # accepts options max_rows (int) (offset by 1 for header),
+      # pad_cells (boolean) and offset (int)
+      def each_row(options = {}, &block)
+        row_count = 0
+        options[:offset] ||= 0
+        @sheet.each_row_streaming do |row|
+          break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
+          if block_given? && !(options[:offset] && row_count < options[:offset])
+            block.call(cells_for_row_element(row, options))
+          end
+          row_count += 1
+        end
+      end
+
+      def row(row_number)
+        first_column.upto(last_column).map do |col|
+          cells[[row_number, col]]
+        end.map { |cell| cell && cell.value }
+      end
+
+      def column(col_number)
+        first_row.upto(last_row).map do |row|
+          cells[[row, col_number]]
+        end.map { |cell| cell && cell.value }
+      end
+
+      # returns the number of the first non-empty row
+      def first_row
+        @first_row ||= present_cells.keys.map { |row, _| row }.min
+      end
+
+      def last_row
+        @last_row ||= present_cells.keys.map { |row, _| row }.max
+      end
+
+      # returns the number of the first non-empty column
+      def first_column
+        @first_column ||= present_cells.keys.map { |_, col| col }.min
+      end
+
+      # returns the number of the last non-empty column
+      def last_column
+        @last_column ||= present_cells.keys.map { |_, col| col }.max
+      end
+
+      def excelx_format(key)
+        cell = cells[key]
+        styles.style_format(cell.style).to_s if cell
+      end
+
+      def hyperlinks
+        @hyperlinks ||= @sheet.hyperlinks(@rels)
+      end
+
+      def comments
+        @comments.comments
+      end
+
+      def dimensions
+        @sheet.dimensions
+      end
+
+      private
+
+      # Take an xml row and return an array of Excelx::Cell objects
+      # optionally pad array to header width(assumed 1st row).
+      # takes option pad_cells (boolean) defaults false
+      def cells_for_row_element(row_element, options = {})
+        return [] unless row_element
+        cell_col = 0
+        cells = []
+        @sheet.each_cell(row_element) do |cell|
+          cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
+          cells << cell
+          cell_col = cell.coordinate.column
+        end
+        cells
+      end
+
+      def pad_cells(cell, last_column)
+        pad = []
+        (cell.coordinate.column - 1 - last_column).times { pad << nil }
+        pad
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb
new file mode 100755
index 0000000..a705958
--- /dev/null
+++ b/lib/roo/excelx/sheet_doc.rb
@@ -0,0 +1,211 @@
+require 'forwardable'
+require 'roo/excelx/extractor'
+
+module Roo
+  class Excelx
+    class SheetDoc < Excelx::Extractor
+      extend Forwardable
+      delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
+
+      def initialize(path, relationships, shared, options = {})
+        super(path)
+        @shared = shared
+        @options = options
+        @relationships = relationships
+      end
+
+      def cells(relationships)
+        @cells ||= extract_cells(relationships)
+      end
+
+      def hyperlinks(relationships)
+        @hyperlinks ||= extract_hyperlinks(relationships)
+      end
+
+      # Get the dimensions for the sheet.
+      # This is the upper bound of cells that might
+      # be parsed. (the document may be sparse so cell count is only upper bound)
+      def dimensions
+        @dimensions ||= extract_dimensions
+      end
+
+      # Yield each row xml element to caller
+      def each_row_streaming(&block)
+        Roo::Utils.each_element(@path, 'row', &block)
+      end
+
+      # Yield each cell as Excelx::Cell to caller for given
+      # row xml
+      def each_cell(row_xml)
+        return [] unless row_xml
+        row_xml.children.each do |cell_element|
+          # If you're sure you're not going to need this hyperlinks you can discard it
+          hyperlinks = unless @options[:no_hyperlinks]
+                         key = ::Roo::Utils.ref_to_key(cell_element['r'])
+                         hyperlinks(@relationships)[key]
+                       end
+
+          yield cell_from_xml(cell_element, hyperlinks)
+        end
+      end
+
+      private
+
+      def cell_value_type(type, format)
+        case type
+        when 's'.freeze
+          :shared
+        when 'b'.freeze
+          :boolean
+        when 'str'.freeze
+          :string
+        when 'inlineStr'.freeze
+          :inlinestr
+        else
+          Excelx::Format.to_type(format)
+        end
+      end
+
+      # Internal: Creates a cell based on an XML clell..
+      #
+      # cell_xml - a Nokogiri::XML::Element. e.g.
+      #             <c r="A5" s="2">
+      #               <v>22606</v>
+      #             </c>
+      # hyperlink - a String for the hyperlink for the cell or nil when no
+      #             hyperlink is present.
+      #
+      # Examples
+      #
+      #    cells_from_xml(<Nokogiri::XML::Element>, nil)
+      #    # => <Excelx::Cell::String>
+      #
+      # Returns a type of <Excelx::Cell>.
+      def cell_from_xml(cell_xml, hyperlink)
+        coordinate = extract_coordinate(cell_xml['r'])
+        return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
+
+        # NOTE: This is error prone, to_i will silently turn a nil into a 0.
+        #       This works by coincidence because Format[0] is General.
+        style = cell_xml['s'].to_i
+        format = styles.style_format(style)
+        value_type = cell_value_type(cell_xml['t'], format)
+        formula = nil
+
+        cell_xml.children.each do |cell|
+          case cell.name
+          when 'is'
+            content_arr = cell.search('t').map(&:content)
+            unless content_arr.empty?
+              return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
+            end
+          when 'f'
+            formula = cell.content
+          when 'v'
+            return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+          end
+        end
+
+        Excelx::Cell::Empty.new(coordinate)
+      end
+
+      def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+        # NOTE: format.to_s can replace excelx_type as an argument for
+        #       Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
+        #       it will break some brittle tests.
+        excelx_type = [:numeric_or_formula, format.to_s]
+
+        # NOTE: There are only a few situations where value != cell.content
+        #       1. when a sharedString is used. value = sharedString;
+        #          cell.content = id of sharedString
+        #       2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
+        #          But a boolean cell should use TRUE|FALSE as the formatted value
+        #          and use a Boolean for it's value. Using a Boolean value breaks
+        #          Roo::Base#to_csv.
+        #       3. formula
+        case value_type
+        when :shared
+          value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
+          Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
+        when :boolean, :string
+          value = cell.content
+          Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
+        when :time, :datetime
+          cell_content = cell.content.to_f
+          # NOTE: A date will be a whole number. A time will have be > 1. And
+          #      in general, a datetime will have decimals. But if the cell is
+          #      using a custom format, it's possible to be interpreted incorrectly.
+          #      cell_content.to_i == cell_content && standard_style?=> :date
+          #
+          #      Should check to see if the format is standard or not. If it's a
+          #      standard format, than it's a date, otherwise, it is a datetime.
+          #      @styles.standard_style?(style_id)
+          #      STANDARD_STYLES.keys.include?(style_id.to_i)
+          cell_type = if cell_content < 1.0
+                        :time
+                      elsif (cell_content - cell_content.floor).abs > 0.000001
+                        :datetime
+                      else
+                        :date
+                      end
+          Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+        when :date
+          Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+        else
+          Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
+        end
+      end
+
+      def extract_coordinate(coordinate)
+        row, column = ::Roo::Utils.split_coordinate(coordinate)
+
+        Excelx::Coordinate.new(row, column)
+      end
+
+      def extract_hyperlinks(relationships)
+        return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
+
+        Hash[hyperlinks.map do |hyperlink|
+          if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
+            [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
+          end
+        end.compact]
+      end
+
+      def expand_merged_ranges(cells)
+        # Extract merged ranges from xml
+        merges = {}
+        doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
+          tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
+          for row in tl[0]..br[0] do
+            for col in tl[1]..br[1] do
+              next if row == tl[0] && col == tl[1]
+              merges[[row, col]] = tl
+            end
+          end
+        end
+        # Duplicate value into all cells in merged range
+        merges.each do |dst, src|
+          cells[dst] = cells[src]
+        end
+      end
+
+      def extract_cells(relationships)
+        extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
+          key = ::Roo::Utils.ref_to_key(cell_xml['r'])
+          [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
+        end]
+
+        expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
+
+        extracted_cells
+      end
+
+      def extract_dimensions
+        Roo::Utils.each_element(@path, 'dimension') do |dimension|
+          return dimension.attributes['ref'].value
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb
new file mode 100644
index 0000000..87f1713
--- /dev/null
+++ b/lib/roo/excelx/styles.rb
@@ -0,0 +1,64 @@
+require 'roo/font'
+require 'roo/excelx/extractor'
+
+module Roo
+  class Excelx
+    class Styles < Excelx::Extractor
+      # convert internal excelx attribute to a format
+      def style_format(style)
+        id = num_fmt_ids[style.to_i]
+        num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
+      end
+
+      def definitions
+        @definitions ||= extract_definitions
+      end
+
+      private
+
+      def num_fmt_ids
+        @num_fmt_ids ||= extract_num_fmt_ids
+      end
+
+      def num_fmts
+        @num_fmts ||= extract_num_fmts
+      end
+
+      def fonts
+        @fonts ||= extract_fonts
+      end
+
+      def extract_definitions
+        doc.xpath('//cellXfs').flat_map do |xfs|
+          xfs.children.map do |xf|
+            fonts[xf['fontId'].to_i]
+          end
+        end
+      end
+
+      def extract_fonts
+        doc.xpath('//fonts/font').map do |font_el|
+          Font.new.tap do |font|
+            font.bold = !font_el.xpath('./b').empty?
+            font.italic = !font_el.xpath('./i').empty?
+            font.underline = !font_el.xpath('./u').empty?
+          end
+        end
+      end
+
+      def extract_num_fmt_ids
+        doc.xpath('//cellXfs').flat_map do |xfs|
+          xfs.children.map do |xf|
+            xf['numFmtId']
+          end
+        end.compact
+      end
+
+      def extract_num_fmts
+        Hash[doc.xpath('//numFmt').map do |num_fmt|
+          [num_fmt['numFmtId'], num_fmt['formatCode']]
+        end]
+      end
+    end
+  end
+end
diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb
new file mode 100644
index 0000000..7ef841f
--- /dev/null
+++ b/lib/roo/excelx/workbook.rb
@@ -0,0 +1,59 @@
+require 'roo/excelx/extractor'
+
+module Roo
+  class Excelx
+    class Workbook < Excelx::Extractor
+      class Label
+        attr_reader :sheet, :row, :col, :name
+
+        def initialize(name, sheet, row, col)
+          @name = name
+          @sheet = sheet
+          @row = row.to_i
+          @col = ::Roo::Utils.letter_to_number(col)
+        end
+
+        def key
+          [@row, @col]
+        end
+      end
+
+      def initialize(path)
+        super
+        fail ArgumentError, 'missing required workbook file' unless doc_exists?
+      end
+
+      def sheets
+        doc.xpath('//sheet')
+      end
+
+      # aka labels
+      def defined_names
+        Hash[doc.xpath('//definedName').map do |defined_name|
+          # "Sheet1!$C$5"
+          sheet, coordinates = defined_name.text.split('!$', 2)
+          col, row = coordinates.split('$')
+          name = defined_name['name']
+          [name, Label.new(name, sheet, row, col)]
+        end]
+      end
+
+      def base_date
+        @base_date ||=
+        begin
+          # Default to 1900 (minus one day due to excel quirk) but use 1904 if
+          # it's set in the Workbook's workbookPr
+          # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
+          result = Date.new(1899, 12, 30) # default
+          doc.css('workbookPr[date1904]').each do |workbookPr|
+            if workbookPr['date1904'] =~ /true|1/i
+              result = Date.new(1904, 01, 01)
+              break
+            end
+          end
+          result
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/font.rb b/lib/roo/font.rb
new file mode 100644
index 0000000..40e9c55
--- /dev/null
+++ b/lib/roo/font.rb
@@ -0,0 +1,17 @@
+module Roo
+  class Font
+    attr_accessor :bold, :italic, :underline
+
+    def bold?
+      @bold
+    end
+
+    def italic?
+      @italic
+    end
+
+    def underline?
+      @underline
+    end
+  end
+end
diff --git a/lib/roo/formatters/base.rb b/lib/roo/formatters/base.rb
new file mode 100644
index 0000000..2d32d6d
--- /dev/null
+++ b/lib/roo/formatters/base.rb
@@ -0,0 +1,15 @@
+module Roo
+  module Formatters
+    module Base
+      # converts an integer value to a time string like '02:05:06'
+      def integer_to_timestring(content)
+        h = (content / 3600.0).floor
+        content -= h * 3600
+        m = (content / 60.0).floor
+        content -= m * 60
+        s = content
+        Kernel.format("%02d:%02d:%02d", h, m, s)
+      end
+    end
+  end
+end
diff --git a/lib/roo/formatters/csv.rb b/lib/roo/formatters/csv.rb
new file mode 100644
index 0000000..3181f09
--- /dev/null
+++ b/lib/roo/formatters/csv.rb
@@ -0,0 +1,84 @@
+module Roo
+  module Formatters
+    module CSV
+      def to_csv(filename = nil, separator = ",", sheet = default_sheet)
+        if filename
+          File.open(filename, "w") do |file|
+            write_csv_content(file, sheet, separator)
+          end
+          true
+        else
+          sio = ::StringIO.new
+          write_csv_content(sio, sheet, separator)
+          sio.rewind
+          sio.read
+        end
+      end
+
+      private
+
+      # Write all cells to the csv file. File can be a filename or nil. If the
+      # file argument is nil the output goes to STDOUT
+      def write_csv_content(file = nil, sheet = nil, separator = ",")
+        file ||= STDOUT
+        return unless first_row(sheet) # The sheet is empty
+
+        1.upto(last_row(sheet)) do |row|
+          1.upto(last_column(sheet)) do |col|
+            # TODO: use CSV.generate_line
+            file.print(separator) if col > 1
+            file.print cell_to_csv(row, col, sheet)
+          end
+          file.print("\n")
+        end
+      end
+
+      # The content of a cell in the csv output
+      def cell_to_csv(row, col, sheet)
+        return "" if empty?(row, col, sheet)
+
+        onecell = cell(row, col, sheet)
+
+        case celltype(row, col, sheet)
+        when :string
+          %("#{onecell.gsub('"', '""')}") unless onecell.empty?
+        when :boolean
+          # TODO: this only works for excelx
+          onecell = self.sheet_for(sheet).cells[[row, col]].formatted_value
+          %("#{onecell.gsub('"', '""').downcase}")
+        when :float, :percentage
+          if onecell == onecell.to_i
+            onecell.to_i.to_s
+          else
+            onecell.to_s
+          end
+        when :formula
+          case onecell
+          when String
+            %("#{onecell.gsub('"', '""')}") unless onecell.empty?
+          when Integer
+            onecell.to_s
+          when Float
+            if onecell == onecell.to_i
+              onecell.to_i.to_s
+            else
+              onecell.to_s
+            end
+          when Date, DateTime, TrueClass, FalseClass
+            onecell.to_s
+          else
+            fail "unhandled onecell-class #{onecell.class}"
+          end
+        when :date, :datetime
+          onecell.to_s
+        when :time
+          integer_to_timestring(onecell)
+        when :link
+          %("#{onecell.url.gsub('"', '""')}")
+        else
+          fail "unhandled celltype #{celltype(row, col, sheet)}"
+        end || ""
+      end
+    end
+  end
+end
diff --git a/lib/roo/formatters/matrix.rb b/lib/roo/formatters/matrix.rb
new file mode 100644
index 0000000..39c7381
--- /dev/null
+++ b/lib/roo/formatters/matrix.rb
@@ -0,0 +1,23 @@
+module Roo
+  module Formatters
+    module Matrix
+      # returns a matrix object from the whole sheet or a rectangular area of a sheet
+      def to_matrix(from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
+        require 'matrix'
+
+        return ::Matrix.empty unless first_row
+
+        from_row ||= first_row(sheet)
+        to_row ||= last_row(sheet)
+        from_column ||= first_column(sheet)
+        to_column ||= last_column(sheet)
+
+        ::Matrix.rows(from_row.upto(to_row).map do |row|
+          from_column.upto(to_column).map do |col|
+            cell(row, col, sheet)
+          end
+        end)
+      end
+    end
+  end
+end
diff --git a/lib/roo/formatters/xml.rb b/lib/roo/formatters/xml.rb
new file mode 100644
index 0000000..ebb8cde
--- /dev/null
+++ b/lib/roo/formatters/xml.rb
@@ -0,0 +1,31 @@
+# returns an XML representation of all sheets of a spreadsheet file
+module Roo
+  module Formatters
+    module XML
+      def to_xml
+        Nokogiri::XML::Builder.new do |xml|
+          xml.spreadsheet do
+            sheets.each do |sheet|
+              self.default_sheet = sheet
+              xml.sheet(name: sheet) do |x|
+                if first_row && last_row && first_column && last_column
+                  # sonst gibt es Fehler bei leeren Blaettern
+                  first_row.upto(last_row) do |row|
+                    first_column.upto(last_column) do |col|
+                      next if empty?(row, col)
+
+                      x.cell(cell(row, col),
+                      row: row,
+                      column: col,
+                      type: celltype(row, col))
+                    end
+                  end
+                end
+              end
+            end
+          end
+        end.to_xml
+      end
+    end
+  end
+end
diff --git a/lib/roo/formatters/yaml.rb b/lib/roo/formatters/yaml.rb
new file mode 100644
index 0000000..69ef3ab
--- /dev/null
+++ b/lib/roo/formatters/yaml.rb
@@ -0,0 +1,40 @@
+module Roo
+  module Formatters
+    module YAML
+      # returns a rectangular area (default: all cells) as yaml-output
+      # you can add additional attributes with the prefix parameter like:
+      # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"})
+      def to_yaml(prefix = {}, from_row = nil, from_column = nil, to_row = nil, to_column = nil, sheet = default_sheet)
+        # return an empty string if there is no first_row, i.e. the sheet is empty
+        return "" unless first_row
+
+        from_row ||= first_row(sheet)
+        to_row ||= last_row(sheet)
+        from_column ||= first_column(sheet)
+        to_column ||= last_column(sheet)
+
+        result = "--- \n"
+        from_row.upto(to_row) do |row|
+          from_column.upto(to_column) do |col|
+            next if empty?(row, col, sheet)
+
+            result << "cell_#{row}_#{col}: \n"
+            prefix.each do|k, v|
+              result << "  #{k}: #{v} \n"
+            end
+            result << "  row: #{row} \n"
+            result << "  col: #{col} \n"
+            result << "  celltype: #{celltype(row, col, sheet)} \n"
+            value = cell(row, col, sheet)
+            if celltype(row, col, sheet) == :time
+              value = integer_to_timestring(value)
+            end
+            result << "  value: #{value} \n"
+          end
+        end
+
+        result
+      end
+    end
+  end
+end
diff --git a/lib/roo/libre_office.rb b/lib/roo/libre_office.rb
new file mode 100644
index 0000000..6aa2332
--- /dev/null
+++ b/lib/roo/libre_office.rb
@@ -0,0 +1,4 @@
+require 'roo/open_office'
+
+# LibreOffice is just an alias for Roo::OpenOffice class
+Roo::LibreOffice = Roo::OpenOffice
diff --git a/lib/roo/link.rb b/lib/roo/link.rb
new file mode 100644
index 0000000..72dc143
--- /dev/null
+++ b/lib/roo/link.rb
@@ -0,0 +1,34 @@
+require 'uri'
+
+module Roo
+  class Link < String
+    # FIXME: Roo::Link inherits from String. A link cell is_a?(Roo::Link). **It is
+    #        the only situation where a cells `value` is always a String**. Link
+    #        cells have a nifty `to_uri` method, but this method isn't easily
+    #        reached. (e.g. `sheet.sheet_for(nil).cells[[row,column]]).value.to_uri`;
+    #        `sheet.hyperlink(row, column)` doesn't use `to_uri`).
+    #
+    #        1. Add different types of links (String, Numeric, Date, DateTime, etc.)
+    #        2. Remove Roo::Link.
+    #        3. Don't inherit the string and pass the cell's value.
+    #
+    #        I don't know the historical reasons for the Roo::Link, but right now
+    #        it seems uneccessary. I'm in favor of keeping it just in case.
+    #
+    #        I'm also in favor of passing the cell's value to Roo::Link. The
+    #        cell.value's class would still be Roo::Link, but the value itself
+    #        would depend on what type of cell it is (Numeric, Date, etc.).
+    #
+    attr_reader :href
+    alias_method :url, :href
+
+    def initialize(href = '', text = href)
+      super(text)
+      @href = href
+    end
+
+    def to_uri
+      URI.parse href
+    end
+  end
+end
diff --git a/lib/roo/open_office.rb b/lib/roo/open_office.rb
new file mode 100644
index 0000000..6ccbe85
--- /dev/null
+++ b/lib/roo/open_office.rb
@@ -0,0 +1,626 @@
+require 'date'
+require 'nokogiri'
+require 'cgi'
+require 'zip/filesystem'
+require 'roo/font'
+require 'roo/tempdir'
+require 'base64'
+require 'openssl'
+
+module Roo
+  class OpenOffice < Roo::Base
+    extend Roo::Tempdir
+
+    ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
+    XPATH_FIND_TABLE_STYLES   = "//*[local-name()='automatic-styles']".freeze
+    XPATH_LOCAL_NAME_TABLE    = "//*[local-name()='table']".freeze
+
+    # initialization and opening of a spreadsheet file
+    # values for packed: :zip
+    def initialize(filename, options = {})
+      packed       = options[:packed]
+      file_warning = options[:file_warning] || :error
+
+      @only_visible_sheets = options[:only_visible_sheets]
+      file_type_check(filename, '.ods', 'an Roo::OpenOffice', file_warning, packed)
+      # NOTE: Create temp directory and allow Ruby to cleanup the temp directory
+      #       when the object is garbage collected. Initially, the finalizer was
+      #       created in the Roo::Tempdir module, but that led to a segfault
+      #       when testing in Ruby 2.4.0.
+      @tmpdir = self.class.make_tempdir(self, find_basename(filename), options[:tmpdir_root])
+      ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
+      @filename = local_filename(filename, @tmpdir, packed)
+      # TODO: @cells_read[:default] = false
+      open_oo_file(options)
+      super(filename, options)
+      initialize_default_variables
+
+      unless @table_display.any?
+        doc.xpath(XPATH_FIND_TABLE_STYLES).each do |style|
+          read_table_styles(style)
+        end
+      end
+
+      @sheet_names = doc.xpath(XPATH_LOCAL_NAME_TABLE).map do |sheet|
+        if !@only_visible_sheets || @table_display[attribute(sheet, 'style-name')]
+          sheet.attributes['name'].value
+        end
+      end.compact
+    rescue
+      self.class.finalize_tempdirs(object_id)
+      raise
+    end
+
+    def open_oo_file(options)
+      Zip::File.open(@filename) do |zip_file|
+        content_entry = zip_file.glob('content.xml').first
+        fail ArgumentError, ERROR_MISSING_CONTENT_XML unless content_entry
+
+        roo_content_xml_path = ::File.join(@tmpdir, 'roo_content.xml')
+        content_entry.extract(roo_content_xml_path)
+        decrypt_if_necessary(zip_file, content_entry, roo_content_xml_path, options)
+      end
+    end
+
+    def initialize_default_variables
+      @formula                = {}
+      @style                  = {}
+      @style_defaults         = Hash.new { |h, k| h[k] = [] }
+      @table_display          = Hash.new { |h, k| h[k] = true }
+      @font_style_definitions = {}
+      @comment                = {}
+      @comments_read          = {}
+    end
+
+    def method_missing(m, *args)
+      read_labels
+      # is method name a label name
+      if @label.key?(m.to_s)
+        row, col = label(m.to_s)
+        cell(row, col)
+      else
+        # call super for methods like #a1
+        super
+      end
+    end
+
+    # Returns the content of a spreadsheet-cell.
+    # (1,1) is the upper left corner.
+    # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
+    # cell at the first line and first row.
+    def cell(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      row, col = normalize(row, col)
+      if celltype(row, col, sheet) == :date
+        yyyy, mm, dd = @cell[sheet][[row, col]].to_s.split('-')
+        return Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+      end
+
+      @cell[sheet][[row, col]]
+    end
+
+    # Returns the formula at (row,col).
+    # Returns nil if there is no formula.
+    # The method #formula? checks if there is a formula.
+    def formula(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      row, col = normalize(row, col)
+      @formula[sheet][[row, col]]
+    end
+
+    # Predicate methods really should return a boolean
+    # value. Hopefully no one was relying on the fact that this
+    # previously returned either nil/formula
+    def formula?(*args)
+      !!formula(*args)
+    end
+
+    # returns each formula in the selected sheet as an array of elements
+    # [row, col, formula]
+    def formulas(sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      return [] unless @formula[sheet]
+      @formula[sheet].each.collect do |elem|
+        [elem[0][0], elem[0][1], elem[1]]
+      end
+    end
+
+    # Given a cell, return the cell's style
+    def font(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      row, col   = normalize(row, col)
+      style_name = @style[sheet][[row, col]] || @style_defaults[sheet][col - 1] || 'Default'
+      @font_style_definitions[style_name]
+    end
+
+    # returns the type of a cell:
+    # * :float
+    # * :string
+    # * :date
+    # * :percentage
+    # * :formula
+    # * :time
+    # * :datetime
+    def celltype(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      row, col = normalize(row, col)
+      @formula[sheet][[row, col]] ? :formula : @cell_type[sheet][[row, col]]
+    end
+
+    def sheets
+      @sheet_names
+    end
+
+    # version of the Roo::OpenOffice document
+    # at 2007 this is always "1.0"
+    def officeversion
+      oo_version
+      @officeversion
+    end
+
+    # shows the internal representation of all cells
+    # mainly for debugging purposes
+    def to_s(sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      @cell[sheet].inspect
+    end
+
+    # returns the row,col values of the labelled cell
+    # (nil,nil) if label is not defined
+    def label(labelname)
+      read_labels
+      return [nil, nil, nil] if @label.size < 1 || !@label.key?(labelname)
+      [
+        @label[labelname][1].to_i,
+        ::Roo::Utils.letter_to_number(@label[labelname][2]),
+        @label[labelname][0]
+      ]
+    end
+
+    # Returns an array which all labels. Each element is an array with
+    # [labelname, [row,col,sheetname]]
+    def labels(_sheet = nil)
+      read_labels
+      @label.map do |label|
+        [label[0], # name
+         [label[1][1].to_i, # row
+          ::Roo::Utils.letter_to_number(label[1][2]), # column
+          label[1][0], # sheet
+         ]]
+      end
+    end
+
+    # returns the comment at (row/col)
+    # nil if there is no comment
+    def comment(row, col, sheet = nil)
+      sheet ||= default_sheet
+      read_cells(sheet)
+      row, col = normalize(row, col)
+      return nil unless @comment[sheet]
+      @comment[sheet][[row, col]]
+    end
+
+    # returns each comment in the selected sheet as an array of elements
+    # [row, col, comment]
+    def comments(sheet = nil)
+      sheet ||= default_sheet
+      read_comments(sheet) unless @comments_read[sheet]
+      return [] unless @comment[sheet]
+      @comment[sheet].each.collect do |elem|
+        [elem[0][0], elem[0][1], elem[1]]
+      end
+    end
+
+    private
+
+    # If the ODS file has an encryption-data element, then try to decrypt.
+    # If successful, the temporary content.xml will be overwritten with
+    # decrypted contents.
+    def decrypt_if_necessary(
+      zip_file,
+      content_entry,
+      roo_content_xml_path, options
+    )
+      # Check if content.xml is encrypted by extracting manifest.xml
+      # and searching for a manifest:encryption-data element
+
+      if (manifest_entry = zip_file.glob('META-INF/manifest.xml').first)
+        roo_manifest_xml_path = File.join(@tmpdir, 'roo_manifest.xml')
+        manifest_entry.extract(roo_manifest_xml_path)
+        manifest        = ::Roo::Utils.load_xml(roo_manifest_xml_path)
+
+        # XPath search for manifest:encryption-data only for the content.xml
+        # file
+
+        encryption_data = manifest.xpath(
+          "//manifest:file-entry[@manifest:full-path='content.xml']"\
+        "/manifest:encryption-data"
+        ).first
+
+        # If XPath returns a node, then we know content.xml is encrypted
+
+        unless encryption_data.nil?
+
+          # Since we know it's encrypted, we check for the password option
+          # and if it doesn't exist, raise an argument error
+
+          password = options[:password]
+          if !password.nil?
+            perform_decryption(
+              encryption_data,
+              password,
+              content_entry,
+              roo_content_xml_path
+            )
+          else
+            fail ArgumentError, 'file is encrypted but password was not supplied'
+          end
+        end
+      else
+        fail ArgumentError, 'file missing required META-INF/manifest.xml'
+      end
+    end
+
+    # Process the ODS encryption manifest and perform the decryption
+    def perform_decryption(
+      encryption_data,
+      password,
+      content_entry,
+      roo_content_xml_path
+    )
+      # Extract various expected attributes from the manifest that
+      # describe the encryption
+
+      algorithm_node            = encryption_data.xpath('manifest:algorithm').first
+      key_derivation_node       =
+        encryption_data.xpath('manifest:key-derivation').first
+      start_key_generation_node =
+        encryption_data.xpath('manifest:start-key-generation').first
+
+      # If we have all the expected elements, then we can perform
+      # the decryption.
+
+      if !algorithm_node.nil? && !key_derivation_node.nil? &&
+         !start_key_generation_node.nil?
+
+        # The algorithm is a URI describing the algorithm used
+        algorithm           = algorithm_node['manifest:algorithm-name']
+
+        # The initialization vector is base-64 encoded
+        iv                  = Base64.decode64(
+          algorithm_node['manifest:initialisation-vector']
+        )
+        key_derivation_name = key_derivation_node['manifest:key-derivation-name']
+        iteration_count     = key_derivation_node['manifest:iteration-count'].to_i
+        salt                = Base64.decode64(key_derivation_node['manifest:salt'])
+
+        # The key is hashed with an algorithm represented by this URI
+        key_generation_name =
+          start_key_generation_node[
+            'manifest:start-key-generation-name'
+          ]
+
+        hashed_password = password
+
+        if key_generation_name == 'http://www.w3.org/2000/09/xmldsig#sha256'
+
+          hashed_password = Digest::SHA256.digest(password)
+        else
+          fail ArgumentError, "Unknown key generation algorithm #{key_generation_name}"
+        end
+
+        cipher = find_cipher(
+          algorithm,
+          key_derivation_name,
+          hashed_password,
+          salt,
+          iteration_count,
+          iv
+        )
+
+        begin
+          decrypted = decrypt(content_entry, cipher)
+
+          # Finally, inflate the decrypted stream and overwrite
+          # content.xml
+          IO.binwrite(
+            roo_content_xml_path,
+            Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(decrypted)
+          )
+        rescue StandardError => error
+          raise ArgumentError, "Invalid password or other data error: #{error}"
+        end
+      else
+        fail ArgumentError, 'manifest.xml missing encryption-data elements'
+      end
+    end
+
+    # Create a cipher based on an ODS algorithm URI from manifest.xml
+    # params: algorithm, key_derivation_name, hashed_password, salt, iteration_count, iv
+    def find_cipher(*args)
+      fail ArgumentError, 'Unknown algorithm ' + algorithm unless args[0] == 'http://www.w3.org/2001/04/xmlenc#aes256-cbc'
+
+      cipher = ::OpenSSL::Cipher.new('AES-256-CBC')
+      cipher.decrypt
+      cipher.padding = 0
+      cipher.key     = find_cipher_key(cipher, *args[1..4])
+      cipher.iv      = args[5]
+
+      cipher
+    end
+
+    # Create a cipher key based on an ODS algorithm string from manifest.xml
+    def find_cipher_key(*args)
+      fail ArgumentError, 'Unknown key derivation name ', args[1] unless args[1] == 'PBKDF2'
+
+      ::OpenSSL::PKCS5.pbkdf2_hmac_sha1(args[2], args[3], args[4], args[0].key_len)
+    end
+
+    # Block decrypt raw bytes from the zip file based on the cipher
+    def decrypt(content_entry, cipher)
+      # Zip::Entry.extract writes a 0-length file when trying
+      # to extract an encrypted stream, so we read the
+      # raw bytes based on the offset and lengths
+      decrypted = ''
+      File.open(@filename, 'rb') do |zipfile|
+        zipfile.seek(
+          content_entry.local_header_offset +
+            content_entry.calculate_local_header_size
+        )
+        total_to_read = content_entry.compressed_size
+
+        block_size = 4096
+        block_size = total_to_read if block_size > total_to_read
+
+        while (buffer = zipfile.read(block_size))
+          decrypted     += cipher.update(buffer)
+          total_to_read -= buffer.length
+
+          break if total_to_read == 0
+
+          block_size = total_to_read if block_size > total_to_read
+        end
+      end
+
+      decrypted + cipher.final
+    end
+
+    def doc
+      @doc ||= ::Roo::Utils.load_xml(File.join(@tmpdir, 'roo_content.xml'))
+    end
+
+    # read the version of the OO-Version
+    def oo_version
+      doc.xpath("//*[local-name()='document-content']").each do |office|
+        @officeversion = attribute(office, 'version')
+      end
+    end
+
+    # helper function to set the internal representation of cells
+    def set_cell_values(sheet, x, y, i, v, value_type, formula, table_cell, str_v, style_name)
+      key = [y, x + i]
+      @cell_type[sheet] ||= {}
+      @cell_type[sheet][key] = value_type.to_sym if value_type
+      @formula[sheet] ||= {}
+      if formula
+        ['of:', 'oooc:'].each do |prefix|
+          if formula[0, prefix.length] == prefix
+            formula = formula[prefix.length..-1]
+          end
+        end
+        @formula[sheet][key] = formula
+      end
+      @cell[sheet] ||= {}
+      @style[sheet] ||= {}
+      @style[sheet][key] = style_name
+      case @cell_type[sheet][key]
+      when :float
+        @cell[sheet][key] = (table_cell.attributes['value'].to_s.include?(".") || table_cell.children.first.text.include?(".")) ? v.to_f : v.to_i
+      when :percentage
+        @cell[sheet][key] = v.to_f
+      when :string
+        @cell[sheet][key] = str_v
+      when :date
+        # TODO: if table_cell.attributes['date-value'].size != "XXXX-XX-XX".size
+        if attribute(table_cell, 'date-value').size != 'XXXX-XX-XX'.size
+          #-- dann ist noch eine Uhrzeit vorhanden
+          #-- "1961-11-21T12:17:18"
+          @cell[sheet][key]      = DateTime.parse(attribute(table_cell, 'date-value').to_s)
+          @cell_type[sheet][key] = :datetime
+        else
+          @cell[sheet][key] = table_cell.attributes['date-value']
+        end
+      when :time
+        hms = v.split(':')
+        @cell[sheet][key] = hms[0].to_i * 3600 + hms[1].to_i * 60 + hms[2].to_i
+      else
+        @cell[sheet][key] = v
+      end
+    end
+
+    # read all cells in the selected sheet
+    #--
+    # the following construct means '4 blanks'
+    # some content <text:s text:c="3"/>
+    #++
+    def read_cells(sheet = default_sheet)
+      validate_sheet!(sheet)
+      return if @cells_read[sheet]
+
+      sheet_found = false
+      doc.xpath("//*[local-name()='table']").each do |ws|
+        next unless sheet == attribute(ws, 'name')
+
+        sheet_found = true
+        col         = 1
+        row         = 1
+        ws.children.each do |table_element|
+          case table_element.name
+          when 'table-column'
+            @style_defaults[sheet] << table_element.attributes['default-cell-style-name']
+          when 'table-row'
+            if table_element.attributes['number-rows-repeated']
+              skip_row = attribute(table_element, 'number-rows-repeated').to_s.to_i
+              row      = row + skip_row - 1
+            end
+            table_element.children.each do |cell|
+              skip_col   = attribute(cell, 'number-columns-repeated')
+              formula    = attribute(cell, 'formula')
+              value_type = attribute(cell, 'value-type')
+              v          = attribute(cell, 'value')
+              style_name = attribute(cell, 'style-name')
+              case value_type
+              when 'string'
+                str_v      = ''
+                # insert \n if there is more than one paragraph
+                para_count = 0
+                cell.children.each do |str|
+                  # begin comments
+                  #=begin
+                  #- <table:table-cell office:value-type="string">
+                  #  - <office:annotation office:display="true" draw:style-name="gr1" draw:text-style-name="P1" svg:width="1.1413in" svg:height="0.3902in" svg:x="2.0142in" svg:y="0in" draw:caption-point-x="-0.2402in" draw:caption-point-y="0.5661in">
+                  #      <dc:date>2011-09-20T00:00:00</dc:date>
+                  #      <text:p text:style-name="P1">Kommentar fuer B4</text:p>
+                  #    </office:annotation>
+                  #    <text:p>B4 (mit Kommentar)</text:p>
+                  #  </table:table-cell>
+                  #=end
+                  if str.name == 'annotation'
+                    str.children.each do |annotation|
+                      next unless annotation.name == 'p'
+                      # @comment ist ein Hash mit Sheet als Key (wie bei @cell)
+                      # innerhalb eines Elements besteht ein Eintrag aus einem
+                      # weiteren Hash mit Key [row,col] und dem eigentlichen
+                      # Kommentartext als Inhalt
+                      @comment[sheet]      = Hash.new unless @comment[sheet]
+                      key                  = [row, col]
+                      @comment[sheet][key] = annotation.text
+                    end
+                  end
+                  # end comments
+                  if str.name == 'p'
+                    v          = str.content
+                    str_v      += "\n" if para_count > 0
+                    para_count += 1
+                    if str.children.size > 1
+                      str_v += children_to_string(str.children)
+                    else
+                      str.children.each do |child|
+                        str_v += child.content #.text
+                      end
+                    end
+                    str_v.gsub!(/&apos;/, "'") # special case not supported by unescapeHTML
+                    str_v = CGI.unescapeHTML(str_v)
+                  end # == 'p'
+                end
+              when 'time'
+                cell.children.each do |str|
+                  v = str.content if str.name == 'p'
+                end
+              when '', nil, 'date', 'percentage', 'float'
+                #
+              when 'boolean'
+                v = attribute(cell, 'boolean-value').to_s
+              end
+              if skip_col
+                if !v.nil? || cell.attributes['date-value']
+                  0.upto(skip_col.to_i - 1) do |i|
+                    set_cell_values(sheet, col, row, i, v, value_type, formula, cell, str_v, style_name)
+                  end
+                end
+                col += (skip_col.to_i - 1)
+              end # if skip
+              set_cell_values(sheet, col, row, 0, v, value_type, formula, cell, str_v, style_name)
+              col += 1
+            end
+            row += 1
+            col = 1
+          end
+        end
+      end
+      doc.xpath("//*[local-name()='automatic-styles']").each do |style|
+        read_styles(style)
+      end
+
+      fail RangeError unless sheet_found
+
+      @cells_read[sheet]    = true
+      @comments_read[sheet] = true
+    end
+
+    # Only calls read_cells because Roo::Base calls read_comments
+    # whereas the reading of comments is done in read_cells for Roo::OpenOffice-objects
+    def read_comments(sheet = nil)
+      read_cells(sheet)
+    end
+
+    def read_labels
+      @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
+        #-
+        # $Sheet1.$C$5
+        #+
+        name              = attribute(ne, 'name').to_s
+        sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
+        col, row          = coords.split('$')
+        sheetname         = sheetname[1..-1] if sheetname[0, 1] == '$'
+        [name, [sheetname, row, col]]
+      end]
+    end
+
+    def read_styles(style_elements)
+      @font_style_definitions['Default'] = Roo::Font.new
+      style_elements.each do |style|
+        next unless style.name == 'style'
+        style_name = attribute(style, 'name')
+        style.each do |properties|
+          font                                = Roo::OpenOffice::Font.new
+          font.bold                           = attribute(properties, 'font-weight')
+          font.italic                         = attribute(properties, 'font-style')
+          font.underline                      = attribute(properties, 'text-underline-style')
+          @font_style_definitions[style_name] = font
+        end
+      end
+    end
+
+    def read_table_styles(styles)
+      styles.children.each do |style|
+        next unless style.name == 'style'
+        style_name = attribute(style, 'name')
+        style.children.each do |properties|
+          display = attribute(properties, 'display')
+          next unless display
+          @table_display[style_name] = (display == 'true')
+        end
+      end
+    end
+
+    # helper method to convert compressed spaces and other elements within
+    # an text into a string
+    # FIXME: add a test for compressed_spaces == 0. It's not currently tested.
+    def children_to_string(children)
+      children.map do |child|
+        if child.text?
+          child.content
+        else
+          if child.name == 's'
+            compressed_spaces = child.attributes['c'].to_s.to_i
+            # no explicit number means a count of 1:
+            compressed_spaces == 0 ? ' ' : ' ' * compressed_spaces
+          else
+            child.content
+          end
+        end
+      end.join
+    end
+
+    def attribute(node, attr_name)
+      node.attributes[attr_name].value if node.attributes[attr_name]
+    end
+  end
+end
diff --git a/lib/roo/spreadsheet.rb b/lib/roo/spreadsheet.rb
new file mode 100644
index 0000000..1eef58d
--- /dev/null
+++ b/lib/roo/spreadsheet.rb
@@ -0,0 +1,33 @@
+require 'uri'
+
+module Roo
+  class Spreadsheet
+    class << self
+      def open(path, options = {})
+        path      = path.respond_to?(:path) ? path.path : path
+        extension = extension_for(path, options)
+
+        begin
+          Roo::CLASS_FOR_EXTENSION.fetch(extension).new(path, options)
+        rescue KeyError
+          raise ArgumentError,
+                "Can't detect the type of #{path} - please use the :extension option to declare its type."
+        end
+      end
+
+      def extension_for(path, options)
+        case (extension = options.delete(:extension))
+        when ::Symbol
+          options[:file_warning] = :ignore
+          extension
+        when ::String
+          options[:file_warning] = :ignore
+          extension.tr('.', '').downcase.to_sym
+        else
+          res = ::File.extname((path =~ /\A#{::URI.regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path)
+          res.tr('.', '').downcase.to_sym
+        end
+      end
+    end
+  end
+end
diff --git a/lib/roo/tempdir.rb b/lib/roo/tempdir.rb
new file mode 100644
index 0000000..4be755b
--- /dev/null
+++ b/lib/roo/tempdir.rb
@@ -0,0 +1,21 @@
+module Roo
+  module Tempdir
+    def finalize_tempdirs(object_id)
+      if @tempdirs && (dirs_to_remove = @tempdirs[object_id])
+        @tempdirs.delete(object_id)
+        dirs_to_remove.each do |dir|
+          ::FileUtils.remove_entry(dir)
+        end
+      end
+    end
+
+    def make_tempdir(object, prefix, root)
+      root ||= ENV["ROO_TMP"]
+      # NOTE: This folder is cleaned up by finalize_tempdirs.
+      ::Dir.mktmpdir("#{Roo::TEMP_PREFIX}#{prefix}", root).tap do |tmpdir|
+        @tempdirs ||= Hash.new { |h, k| h[k] = [] }
+        @tempdirs[object.object_id] << tmpdir
+      end
+    end
+  end
+end
diff --git a/lib/roo/utils.rb b/lib/roo/utils.rb
new file mode 100644
index 0000000..17b672d
--- /dev/null
+++ b/lib/roo/utils.rb
@@ -0,0 +1,78 @@
+module Roo
+  module Utils
+    extend self
+
+    LETTERS = ('A'..'Z').to_a
+
+    def split_coordinate(str)
+      @split_coordinate ||= {}
+
+      @split_coordinate[str] ||= begin
+        letter, number = split_coord(str)
+        x = letter_to_number(letter)
+        y = number
+        [y, x]
+      end
+    end
+
+    alias_method :ref_to_key, :split_coordinate
+
+    def split_coord(s)
+      if s =~ /([a-zA-Z]+)([0-9]+)/
+        letter = Regexp.last_match[1]
+        number = Regexp.last_match[2].to_i
+      else
+        fail ArgumentError
+      end
+      [letter, number]
+    end
+
+    # convert a number to something like 'AB' (1 => 'A', 2 => 'B', ...)
+    def number_to_letter(num)
+      result = ""
+
+      until num.zero?
+        num, index = (num - 1).divmod(26)
+        result.prepend(LETTERS[index])
+      end
+
+      result
+    end
+
+    def letter_to_number(letters)
+      @letter_to_number ||= {}
+      @letter_to_number[letters] ||= begin
+         result = 0
+
+         # :bytes method returns an enumerator in 1.9.3 and an array in 2.0+
+         letters.bytes.to_a.map{|b| b > 96 ? b - 96 : b - 64 }.reverse.each_with_index{ |num, i| result += num * 26 ** i }
+
+         result
+      end
+    end
+
+    # Compute upper bound for cells in a given cell range.
+    def num_cells_in_range(str)
+      cells = str.split(':')
+      return 1 if cells.count == 1
+      raise ArgumentError.new("invalid range string: #{str}. Supported range format 'A1:B2'") if cells.count != 2
+      x1, y1 = split_coordinate(cells[0])
+      x2, y2 = split_coordinate(cells[1])
+      (x2 - (x1 - 1)) * (y2 - (y1 - 1))
+    end
+
+    def load_xml(path)
+      ::File.open(path, 'rb') do |file|
+        ::Nokogiri::XML(file)
+      end
+    end
+
+    # Yield each element of a given type ('row', 'c', etc.) to caller
+    def each_element(path, elements)
+      Nokogiri::XML::Reader(::File.open(path, 'rb'), nil, nil, Nokogiri::XML::ParseOptions::NOBLANKS).each do |node|
+        next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT && Array(elements).include?(node.name)
+        yield Nokogiri::XML(node.outer_xml).root if block_given?
+      end
+    end
+  end
+end
diff --git a/lib/roo/version.rb b/lib/roo/version.rb
new file mode 100644
index 0000000..bdd16dc
--- /dev/null
+++ b/lib/roo/version.rb
@@ -0,0 +1,3 @@
+module Roo
+  VERSION = "2.7.1"
+end
author	Unit 193 <unit193@ubuntu.com>	2017-06-12 03:37:11 -0400
committer	Unit 193 <unit193@ubuntu.com>	2017-06-12 03:37:11 -0400
commit	8280a21a23d44aa90177e2bc041d0b8dc8556f4b (patch)
tree	dadef7ee085c0e990a5070bd41b6a5b98c97f4fd /lib