aboutsummaryrefslogtreecommitdiffstats
path: root/lib/roo/excelx
diff options
context:
space:
mode:
Diffstat (limited to 'lib/roo/excelx')
-rw-r--r--lib/roo/excelx/cell.rb106
-rw-r--r--lib/roo/excelx/cell/base.rb94
-rw-r--r--lib/roo/excelx/cell/boolean.rb27
-rw-r--r--lib/roo/excelx/cell/date.rb28
-rw-r--r--lib/roo/excelx/cell/datetime.rb111
-rw-r--r--lib/roo/excelx/cell/empty.rb19
-rw-r--r--lib/roo/excelx/cell/number.rb87
-rw-r--r--lib/roo/excelx/cell/string.rb19
-rw-r--r--lib/roo/excelx/cell/time.rb43
-rw-r--r--lib/roo/excelx/comments.rb55
-rw-r--r--lib/roo/excelx/coordinate.rb12
-rw-r--r--lib/roo/excelx/extractor.rb21
-rw-r--r--lib/roo/excelx/format.rb64
-rw-r--r--lib/roo/excelx/relationships.rb25
-rw-r--r--lib/roo/excelx/shared.rb32
-rwxr-xr-xlib/roo/excelx/shared_strings.rb157
-rw-r--r--lib/roo/excelx/sheet.rb112
-rwxr-xr-xlib/roo/excelx/sheet_doc.rb211
-rw-r--r--lib/roo/excelx/styles.rb64
-rw-r--r--lib/roo/excelx/workbook.rb59
20 files changed, 1346 insertions, 0 deletions
diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb
new file mode 100644
index 0000000..2fc78e1
--- /dev/null
+++ b/lib/roo/excelx/cell.rb
@@ -0,0 +1,106 @@
+require 'date'
+require 'roo/excelx/cell/base'
+require 'roo/excelx/cell/boolean'
+require 'roo/excelx/cell/datetime'
+require 'roo/excelx/cell/date'
+require 'roo/excelx/cell/empty'
+require 'roo/excelx/cell/number'
+require 'roo/excelx/cell/string'
+require 'roo/excelx/cell/time'
+
+module Roo
+ class Excelx
+ class Cell
+ attr_reader :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
+ attr_writer :value
+
+ # DEPRECATED: Please use Cell.create_cell instead.
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
+ warn '[DEPRECATION] `Cell.new` is deprecated. Please use `Cell.create_cell` instead.'
+ @type = type
+ @formula = formula
+ @base_date = base_date if [:date, :datetime].include?(@type)
+ @excelx_type = excelx_type
+ @excelx_value = excelx_value
+ @style = style
+ @value = type_cast_value(value)
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
+ @coordinate = coordinate
+ end
+
+ def type
+ case
+ when @formula
+ :formula
+ when @value.is_a?(Roo::Link)
+ :link
+ else
+ @type
+ end
+ end
+
+ def self.create_cell(type, *values)
+ case type
+ when :string
+ Cell::String.new(*values)
+ when :boolean
+ Cell::Boolean.new(*values)
+ when :number
+ Cell::Number.new(*values)
+ when :date
+ Cell::Date.new(*values)
+ when :datetime
+ Cell::DateTime.new(*values)
+ when :time
+ Cell::Time.new(*values)
+ end
+ end
+
+ # Deprecated: use Roo::Excelx::Coordinate instead.
+ class Coordinate
+ attr_accessor :row, :column
+
+ def initialize(row, column)
+ warn '[DEPRECATION] `Roo::Excel::Cell::Coordinate` is deprecated. Please use `Roo::Excelx::Coordinate` instead.'
+ @row, @column = row, column
+ end
+ end
+
+ private
+
+ def type_cast_value(value)
+ case @type
+ when :float, :percentage
+ value.to_f
+ when :date
+ create_date(@base_date + value.to_i)
+ when :datetime
+ create_datetime(@base_date + value.to_f.round(6))
+ when :time
+ value.to_f * 86_400
+ else
+ value
+ end
+ end
+
+ def create_date(date)
+ yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
+
+ ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+ end
+
+ def create_datetime(date)
+ datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
+ t = round_datetime(datetime_string)
+
+ ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
+ end
+
+ def round_datetime(datetime_string)
+ /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
+
+ ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb
new file mode 100644
index 0000000..aea8808
--- /dev/null
+++ b/lib/roo/excelx/cell/base.rb
@@ -0,0 +1,94 @@
+module Roo
+ class Excelx
+ class Cell
+ class Base
+ attr_reader :cell_type, :cell_value, :value
+
+ # FIXME: I think style should be deprecated. Having a style attribute
+ # for a cell doesn't really accomplish much. It seems to be used
+ # when you want to export to excelx.
+ attr_reader :style
+
+
+ # FIXME: Updating a cell's value should be able tochange the cell's type,
+ # but that isn't currently possible. This will cause weird bugs
+ # when one changes the value of a Number cell to a String. e.g.
+ #
+ # cell = Cell::Number(*args)
+ # cell.value = 'Hello'
+ # cell.formatted_value # => Some unexpected value
+ #
+ # Here are two possible solutions to such issues:
+ # 1. Don't allow a cell's value to be updated. Use a method like
+ # `Sheet.update_cell` instead. The simple solution.
+ # 2. When `cell.value = ` is called, use injection to try and
+ # change the type of cell on the fly. But deciding what type
+ # of value to pass to `cell.value=`. isn't always obvious. e.g.
+ # `cell.value = Time.now` should convert a cell to a DateTime,
+ # not a Time cell. Time cells would be hard to recognize because
+ # they are integers. This approach would require a significant
+ # change to the code as written. The complex solution.
+ #
+ # If the first solution is used, then this method should be
+ # deprecated.
+ attr_writer :value
+
+ def initialize(value, formula, excelx_type, style, link, coordinate)
+ @link = !!link
+ @cell_value = value
+ @cell_type = excelx_type
+ @formula = formula
+ @style = style
+ @coordinate = coordinate
+ @type = :base
+ @value = link? ? Roo::Link.new(link, value) : value
+ end
+
+ def type
+ if formula?
+ :formula
+ elsif link?
+ :link
+ else
+ @type
+ end
+ end
+
+ def formula?
+ !!@formula
+ end
+
+ def link?
+ !!@link
+ end
+
+ alias_method :formatted_value, :value
+
+ def to_s
+ formatted_value
+ end
+
+ # DEPRECATED: Please use link instead.
+ def hyperlink
+ warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.'
+ end
+
+ # DEPRECATED: Please use cell_value instead.
+ def excelx_value
+ warn '[DEPRECATION] `excelx_value` is deprecated. Please use `cell_value` instead.'
+ cell_value
+ end
+
+ # DEPRECATED: Please use cell_type instead.
+ def excelx_type
+ warn '[DEPRECATION] `excelx_type` is deprecated. Please use `cell_type` instead.'
+ cell_type
+ end
+
+ def empty?
+ false
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/boolean.rb b/lib/roo/excelx/cell/boolean.rb
new file mode 100644
index 0000000..fe1f691
--- /dev/null
+++ b/lib/roo/excelx/cell/boolean.rb
@@ -0,0 +1,27 @@
+module Roo
+ class Excelx
+ class Cell
+ class Boolean < Cell::Base
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+
+ def initialize(value, formula, style, link, coordinate)
+ super(value, formula, nil, style, link, coordinate)
+ @type = @cell_type = :boolean
+ @value = link? ? Roo::Link.new(link, value) : create_boolean(value)
+ end
+
+ def formatted_value
+ value ? 'TRUE'.freeze : 'FALSE'.freeze
+ end
+
+ private
+
+ def create_boolean(value)
+ # FIXME: Using a boolean will cause methods like Base#to_csv to fail.
+ # Roo is using some method to ignore false/nil values.
+ value.to_i == 1 ? true : false
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/date.rb b/lib/roo/excelx/cell/date.rb
new file mode 100644
index 0000000..8e2c6cb
--- /dev/null
+++ b/lib/roo/excelx/cell/date.rb
@@ -0,0 +1,28 @@
+require 'date'
+
+module Roo
+ class Excelx
+ class Cell
+ class Date < Roo::Excelx::Cell::DateTime
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+
+ def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
+ # NOTE: Pass all arguments to the parent class, DateTime.
+ super
+ @type = :date
+ @format = excelx_type.last
+ @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value)
+ end
+
+ private
+
+ def create_date(base_date, value)
+ date = base_date + value.to_i
+ yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
+
+ ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/datetime.rb b/lib/roo/excelx/cell/datetime.rb
new file mode 100644
index 0000000..35d93ac
--- /dev/null
+++ b/lib/roo/excelx/cell/datetime.rb
@@ -0,0 +1,111 @@
+require 'date'
+
+module Roo
+ class Excelx
+ class Cell
+ class DateTime < Cell::Base
+ attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+
+ def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
+ super(value, formula, excelx_type, style, link, coordinate)
+ @type = :datetime
+ @format = excelx_type.last
+ @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
+ end
+
+ # Public: Returns formatted value for a datetime. Format's can be an
+ # standard excel format, or a custom format.
+ #
+ # Standard formats follow certain conventions. Date fields for
+ # days, months, and years are separated with hyhens or
+ # slashes ("-", /") (e.g. 01-JAN, 1/13/15). Time fields for
+ # hours, minutes, and seconds are separated with a colon (e.g.
+ # 12:45:01).
+ #
+ # If a custom format follows those conventions, then the custom
+ # format will be used for the a cell's formatted value.
+ # Otherwise, the formatted value will be in the following
+ # format: 'YYYY-mm-dd HH:MM:SS' (e.g. "2015-07-10 20:33:15").
+ #
+ # Examples
+ # formatted_value #=> '01-JAN'
+ #
+ # Returns a String representation of a cell's value.
+ def formatted_value
+ formatter = @format.downcase.split(' ').map do |part|
+ if (parsed_format = parse_date_or_time_format(part))
+ parsed_format
+ else
+ warn 'Unable to parse custom format. Using "YYYY-mm-dd HH:MM:SS" format.'
+ return @value.strftime('%F %T')
+ end
+ end.join(' ')
+
+ @value.strftime(formatter)
+ end
+
+ private
+
+ def parse_date_or_time_format(part)
+ date_regex = /(?<date>[dmy]+[\-\/][dmy]+([\-\/][dmy]+)?)/
+ time_regex = /(?<time>(\[?[h]\]?+:)?[m]+(:?ss|:?s)?)/
+
+ if part[date_regex] == part
+ formats = DATE_FORMATS
+ elsif part[time_regex]
+ formats = TIME_FORMATS
+ else
+ return false
+ end
+
+ part.gsub(/#{formats.keys.join('|')}/, formats)
+ end
+
+ DATE_FORMATS = {
+ 'yyyy' => '%Y', # Year: 2000
+ 'yy' => '%y', # Year: 00
+ # mmmmm => J-D
+ 'mmmm' => '%B', # Month: January
+ 'mmm' => '%^b', # Month: JAN
+ 'mm' => '%m', # Month: 01
+ 'm' => '%-m', # Month: 1
+ 'dddd' => '%A', # Day of the Week: Sunday
+ 'ddd' => '%^a', # Day of the Week: SUN
+ 'dd' => '%d', # Day of the Month: 01
+ 'd' => '%-d' # Day of the Month: 1
+ # '\\\\'.freeze => ''.freeze, # NOTE: Fixes a custom format's output.
+ }
+
+ TIME_FORMATS = {
+ 'hh' => '%H', # Hour (24): 01
+ 'h' => '%-k'.freeze, # Hour (24): 1
+ # 'hh'.freeze => '%I'.freeze, # Hour (12): 08
+ # 'h'.freeze => '%-l'.freeze, # Hour (12): 8
+ 'mm' => '%M', # Minute: 01
+ # FIXME: is this used? Seems like 'm' is used for month, not minute.
+ 'm' => '%-M', # Minute: 1
+ 'ss' => '%S', # Seconds: 01
+ 's' => '%-S', # Seconds: 1
+ 'am/pm' => '%p', # Meridian: AM
+ '000' => '%3N', # Fractional Seconds: thousandth.
+ '00' => '%2N', # Fractional Seconds: hundredth.
+ '0' => '%1N' # Fractional Seconds: tenths.
+ }
+
+ def create_datetime(base_date, value)
+ date = base_date + value.to_f.round(6)
+ datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
+ t = round_datetime(datetime_string)
+
+ ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
+ end
+
+ def round_datetime(datetime_string)
+ /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
+
+ ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/empty.rb b/lib/roo/excelx/cell/empty.rb
new file mode 100644
index 0000000..49a20e7
--- /dev/null
+++ b/lib/roo/excelx/cell/empty.rb
@@ -0,0 +1,19 @@
+
+module Roo
+ class Excelx
+ class Cell
+ class Empty < Cell::Base
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate
+
+ def initialize(coordinate)
+ @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil
+ @coordinate = coordinate
+ end
+
+ def empty?
+ true
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb
new file mode 100644
index 0000000..2015562
--- /dev/null
+++ b/lib/roo/excelx/cell/number.rb
@@ -0,0 +1,87 @@
+module Roo
+ class Excelx
+ class Cell
+ class Number < Cell::Base
+ attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+
+ def initialize(value, formula, excelx_type, style, link, coordinate)
+ super
+ # FIXME: change @type to number. This will break brittle tests.
+ # FIXME: Excelx_type is an array, but the first value isn't used.
+ @type = :float
+ @format = excelx_type.last
+ @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
+ end
+
+ def create_numeric(number)
+ return number if Excelx::ERROR_VALUES.include?(number)
+ case @format
+ when /%/
+ Float(number)
+ when /\.0/
+ Float(number)
+ else
+ (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number)
+ end
+ end
+
+ def formatted_value
+ return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
+
+ formatter = formats[@format]
+ if formatter.is_a? Proc
+ formatter.call(@cell_value)
+ elsif zero_padded_number?
+ "%0#{@format.size}d" % @cell_value
+ else
+ Kernel.format(formatter, @cell_value)
+ end
+ end
+
+ def formats
+ # FIXME: numbers can be other colors besides red:
+ # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
+ {
+ 'General' => '%.0f',
+ '0' => '%.0f',
+ '0.00' => '%.2f',
+ '0.000000' => '%.6f',
+ '#,##0' => number_format('%.0f'),
+ '#,##0.00' => number_format('%.2f'),
+ '0%' => proc do |number|
+ Kernel.format('%d%', number.to_f * 100)
+ end,
+ '0.00%' => proc do |number|
+ Kernel.format('%.2f%', number.to_f * 100)
+ end,
+ '0.00E+00' => '%.2E',
+ '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'),
+ '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'),
+ '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'),
+ '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'),
+ # FIXME: not quite sure what the format should look like in this case.
+ '##0.0E+0' => '%.1E',
+ '@' => proc { |number| number }
+ }
+ end
+
+ private
+
+ def number_format(formatter, negative_formatter = nil)
+ proc do |number|
+ if negative_formatter
+ formatter = number.to_i > 0 ? formatter : negative_formatter
+ number = number.to_f.abs
+ end
+
+ Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
+ end
+ end
+
+ def zero_padded_number?
+ @format[/0+/] == @format
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/string.rb b/lib/roo/excelx/cell/string.rb
new file mode 100644
index 0000000..7967806
--- /dev/null
+++ b/lib/roo/excelx/cell/string.rb
@@ -0,0 +1,19 @@
+module Roo
+ class Excelx
+ class Cell
+ class String < Cell::Base
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+
+ def initialize(value, formula, style, link, coordinate)
+ super(value, formula, nil, style, link, coordinate)
+ @type = @cell_type = :string
+ @value = link? ? Roo::Link.new(link, value) : value
+ end
+
+ def empty?
+ value.empty?
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb
new file mode 100644
index 0000000..d661ab8
--- /dev/null
+++ b/lib/roo/excelx/cell/time.rb
@@ -0,0 +1,43 @@
+require 'date'
+
+module Roo
+ class Excelx
+ class Cell
+ class Time < Roo::Excelx::Cell::DateTime
+ attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+
+ def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
+ # NOTE: Pass all arguments to DateTime super class.
+ super
+ @type = :time
+ @format = excelx_type.last
+ @datetime = create_datetime(base_date, value)
+ @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
+ end
+
+ def formatted_value
+ formatter = @format.gsub(/#{TIME_FORMATS.keys.join('|')}/, TIME_FORMATS)
+ @datetime.strftime(formatter)
+ end
+
+ alias_method :to_s, :formatted_value
+
+ private
+
+ # def create_datetime(base_date, value)
+ # date = base_date + value.to_f.round(6)
+ # datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
+ # t = round_datetime(datetime_string)
+ #
+ # ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
+ # end
+
+ # def round_datetime(datetime_string)
+ # /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
+ #
+ # ::Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0)
+ # end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb
new file mode 100644
index 0000000..1a89908
--- /dev/null
+++ b/lib/roo/excelx/comments.rb
@@ -0,0 +1,55 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class Comments < Excelx::Extractor
+ def comments
+ @comments ||= extract_comments
+ end
+
+ private
+
+ def extract_comments
+ return {} unless doc_exists?
+
+ Hash[doc.xpath('//comments/commentList/comment').map do |comment|
+ value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
+ [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
+ end]
+ end
+ end
+ end
+end
+# xl/comments1.xml
+# <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
+# <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
+# <authors>
+# <author />
+# </authors>
+# <commentList>
+# <comment ref="B4" authorId="0">
+# <text>
+# <r>
+# <rPr>
+# <sz val="10" />
+# <rFont val="Arial" />
+# <family val="2" />
+# </rPr>
+# <t>Comment for B4</t>
+# </r>
+# </text>
+# </comment>
+# <comment ref="B5" authorId="0">
+# <text>
+# <r>
+# <rPr>
+# <sz val="10" />
+# <rFont val="Arial" />
+# <family val="2" />
+# </rPr>
+# <t>Comment for B5</t>
+# </r>
+# </text>
+# </comment>
+# </commentList>
+# </comments>
diff --git a/lib/roo/excelx/coordinate.rb b/lib/roo/excelx/coordinate.rb
new file mode 100644
index 0000000..53b24ba
--- /dev/null
+++ b/lib/roo/excelx/coordinate.rb
@@ -0,0 +1,12 @@
+module Roo
+ class Excelx
+ class Coordinate
+ attr_accessor :row, :column
+
+ def initialize(row, column)
+ @row = row
+ @column = column
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb
new file mode 100644
index 0000000..1cdd13b
--- /dev/null
+++ b/lib/roo/excelx/extractor.rb
@@ -0,0 +1,21 @@
+module Roo
+ class Excelx
+ class Extractor
+ def initialize(path)
+ @path = path
+ end
+
+ private
+
+ def doc
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
+
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
+ end
+
+ def doc_exists?
+ @path && File.exist?(@path)
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/format.rb b/lib/roo/excelx/format.rb
new file mode 100644
index 0000000..72b36d9
--- /dev/null
+++ b/lib/roo/excelx/format.rb
@@ -0,0 +1,64 @@
+module Roo
+ class Excelx
+ module Format
+ EXCEPTIONAL_FORMATS = {
+ 'h:mm am/pm' => :date,
+ 'h:mm:ss am/pm' => :date
+ }
+
+ STANDARD_FORMATS = {
+ 0 => 'General'.freeze,
+ 1 => '0'.freeze,
+ 2 => '0.00'.freeze,
+ 3 => '#,##0'.freeze,
+ 4 => '#,##0.00'.freeze,
+ 9 => '0%'.freeze,
+ 10 => '0.00%'.freeze,
+ 11 => '0.00E+00'.freeze,
+ 12 => '# ?/?'.freeze,
+ 13 => '# ??/??'.freeze,
+ 14 => 'mm-dd-yy'.freeze,
+ 15 => 'd-mmm-yy'.freeze,
+ 16 => 'd-mmm'.freeze,
+ 17 => 'mmm-yy'.freeze,
+ 18 => 'h:mm AM/PM'.freeze,
+ 19 => 'h:mm:ss AM/PM'.freeze,
+ 20 => 'h:mm'.freeze,
+ 21 => 'h:mm:ss'.freeze,
+ 22 => 'm/d/yy h:mm'.freeze,
+ 37 => '#,##0 ;(#,##0)'.freeze,
+ 38 => '#,##0 ;[Red](#,##0)'.freeze,
+ 39 => '#,##0.00;(#,##0.00)'.freeze,
+ 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
+ 45 => 'mm:ss'.freeze,
+ 46 => '[h]:mm:ss'.freeze,
+ 47 => 'mmss.0'.freeze,
+ 48 => '##0.0E+0'.freeze,
+ 49 => '@'.freeze
+ }
+
+ def to_type(format)
+ format = format.to_s.downcase
+ if (type = EXCEPTIONAL_FORMATS[format])
+ type
+ elsif format.include?('#')
+ :float
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
+ if format.include?('h') || format.include?('s')
+ :datetime
+ else
+ :date
+ end
+ elsif format.include?('h') || format.include?('s')
+ :time
+ elsif format.include?('%')
+ :percentage
+ else
+ :float
+ end
+ end
+
+ module_function :to_type
+ end
+ end
+end
diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb
new file mode 100644
index 0000000..8a0ed97
--- /dev/null
+++ b/lib/roo/excelx/relationships.rb
@@ -0,0 +1,25 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class Relationships < Excelx::Extractor
+ def [](index)
+ to_a[index]
+ end
+
+ def to_a
+ @relationships ||= extract_relationships
+ end
+
+ private
+
+ def extract_relationships
+ return [] unless doc_exists?
+
+ Hash[doc.xpath('/Relationships/Relationship').map do |rel|
+ [rel.attribute('Id').text, rel]
+ end]
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb
new file mode 100644
index 0000000..3677fa2
--- /dev/null
+++ b/lib/roo/excelx/shared.rb
@@ -0,0 +1,32 @@
+module Roo
+ class Excelx
+ # Public: Shared class for allowing sheets to share data. This should
+ # reduce memory usage and reduce the number of objects being passed
+ # to various inititializers.
+ class Shared
+ attr_accessor :comments_files, :sheet_files, :rels_files
+ def initialize(dir)
+ @dir = dir
+ @comments_files = []
+ @sheet_files = []
+ @rels_files = []
+ end
+
+ def styles
+ @styles ||= Styles.new(File.join(@dir, 'roo_styles.xml'))
+ end
+
+ def shared_strings
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
+ end
+
+ def workbook
+ @workbook ||= Workbook.new(File.join(@dir, 'roo_workbook.xml'))
+ end
+
+ def base_date
+ workbook.base_date
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb
new file mode 100755
index 0000000..f7caf7c
--- /dev/null
+++ b/lib/roo/excelx/shared_strings.rb
@@ -0,0 +1,157 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class SharedStrings < Excelx::Extractor
+
+ COMMON_STRINGS = {
+ t: "t",
+ r: "r",
+ html_tag_open: "<html>",
+ html_tag_closed: "</html>"
+ }
+
+ def [](index)
+ to_a[index]
+ end
+
+ def to_a
+ @array ||= extract_shared_strings
+ end
+
+ def to_html
+ @html ||= extract_html
+ end
+
+ # Use to_html or to_a for html returns
+ # See what is happening with commit???
+ def use_html?(index)
+ to_html[index][/<([biu]|sup|sub)>/]
+ end
+
+ private
+
+ def fix_invalid_shared_strings(doc)
+ invalid = { '_x000D_' => "\n" }
+ xml = doc.to_s
+ return doc unless xml[/#{invalid.keys.join('|')}/]
+
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
+ end
+
+ def extract_shared_strings
+ return [] unless doc_exists?
+
+ document = fix_invalid_shared_strings(doc)
+ # read the shared strings xml document
+ document.xpath('/sst/si').map do |si|
+ shared_string = ''
+ si.children.each do |elem|
+ case elem.name
+ when 'r'
+ elem.children.each do |r_elem|
+ shared_string << r_elem.content if r_elem.name == 't'
+ end
+ when 't'
+ shared_string = elem.content
+ end
+ end
+ shared_string
+ end
+ end
+
+ def extract_html
+ return [] unless doc_exists?
+ fix_invalid_shared_strings(doc)
+ # read the shared strings xml document
+ doc.xpath('/sst/si').map do |si|
+ html_string = '<html>'
+ si.children.each do |elem|
+ case elem.name
+ when 'r'
+ html_string << extract_html_r(elem)
+ when 't'
+ html_string << elem.content
+ end # case elem.name
+ end # si.children.each do |elem|
+ html_string << '</html>'
+ end # doc.xpath('/sst/si').map do |si|
+ end # def extract_html
+
+ # The goal of this function is to take the following XML code snippet and create a html tag
+ # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
+ # {code:xml}
+ # <r>
+ # <rPr>
+ # <i/>
+ # <b/>
+ # <u/>
+ # <vertAlign val="subscript"/>
+ # <vertAlign val="superscript"/>
+ # </rPr>
+ # <t>TEXT</t>
+ # </r>
+ # {code}
+ #
+ # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
+ def extract_html_r(r_elem)
+ str = ''
+ xml_elems = {
+ sub: false,
+ sup: false,
+ b: false,
+ i: false,
+ u: false
+ }
+ b, i, u, sub, sup = false, false, false, false, false
+ r_elem.children.each do |elem|
+ case elem.name
+ when 'rPr'
+ elem.children.each do |rPr_elem|
+ case rPr_elem.name
+ when 'b'
+ # set formatting for Bold to true
+ xml_elems[:b] = true
+ when 'i'
+ # set formatting for Italics to true
+ xml_elems[:i] = true
+ when 'u'
+ # set formatting for Underline to true
+ xml_elems[:u] = true
+ when 'vertAlign'
+ # See if the Vertical Alignment is subscript or superscript
+ case rPr_elem.xpath('@val').first.value
+ when 'subscript'
+ # set formatting for Subscript to true and Superscript to false ... Can't have both
+ xml_elems[:sub] = true
+ xml_elems[:sup] = false
+ when 'superscript'
+ # set formatting for Superscript to true and Subscript to false ... Can't have both
+ xml_elems[:sup] = true
+ xml_elems[:sub] = false
+ end
+ end
+ end
+ when 't'
+ str << create_html(elem.content, xml_elems)
+ end
+ end
+ str
+ end # extract_html_r
+
+ # This will return an html string
+ def create_html(text, formatting)
+ tmp_str = ''
+ formatting.each do |elem, val|
+ tmp_str << "<#{elem}>" if val
+ end
+ tmp_str << text
+ reverse_format = Hash[formatting.to_a.reverse]
+ reverse_format.each do |elem, val|
+ tmp_str << "</#{elem}>" if val
+ end
+ tmp_str
+ end
+ end # class SharedStrings < Excelx::Extractor
+ end # class Excelx
+end # module Roo
diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb
new file mode 100644
index 0000000..add92f0
--- /dev/null
+++ b/lib/roo/excelx/sheet.rb
@@ -0,0 +1,112 @@
+require 'forwardable'
+module Roo
+ class Excelx
+ class Sheet
+ extend Forwardable
+
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
+
+ def initialize(name, shared, sheet_index, options = {})
+ @name = name
+ @shared = shared
+ @rels = Relationships.new(rels_files[sheet_index])
+ @comments = Comments.new(comments_files[sheet_index])
+ @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
+ end
+
+ def cells
+ @cells ||= @sheet.cells(@rels)
+ end
+
+ def present_cells
+ @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
+ end
+
+ # Yield each row as array of Excelx::Cell objects
+ # accepts options max_rows (int) (offset by 1 for header),
+ # pad_cells (boolean) and offset (int)
+ def each_row(options = {}, &block)
+ row_count = 0
+ options[:offset] ||= 0
+ @sheet.each_row_streaming do |row|
+ break if options[:max_rows] && row_count == options[:max_rows] + options[:offset] + 1
+ if block_given? && !(options[:offset] && row_count < options[:offset])
+ block.call(cells_for_row_element(row, options))
+ end
+ row_count += 1
+ end
+ end
+
+ def row(row_number)
+ first_column.upto(last_column).map do |col|
+ cells[[row_number, col]]
+ end.map { |cell| cell && cell.value }
+ end
+
+ def column(col_number)
+ first_row.upto(last_row).map do |row|
+ cells[[row, col_number]]
+ end.map { |cell| cell && cell.value }
+ end
+
+ # returns the number of the first non-empty row
+ def first_row
+ @first_row ||= present_cells.keys.map { |row, _| row }.min
+ end
+
+ def last_row
+ @last_row ||= present_cells.keys.map { |row, _| row }.max
+ end
+
+ # returns the number of the first non-empty column
+ def first_column
+ @first_column ||= present_cells.keys.map { |_, col| col }.min
+ end
+
+ # returns the number of the last non-empty column
+ def last_column
+ @last_column ||= present_cells.keys.map { |_, col| col }.max
+ end
+
+ def excelx_format(key)
+ cell = cells[key]
+ styles.style_format(cell.style).to_s if cell
+ end
+
+ def hyperlinks
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
+ end
+
+ def comments
+ @comments.comments
+ end
+
+ def dimensions
+ @sheet.dimensions
+ end
+
+ private
+
+ # Take an xml row and return an array of Excelx::Cell objects
+ # optionally pad array to header width(assumed 1st row).
+ # takes option pad_cells (boolean) defaults false
+ def cells_for_row_element(row_element, options = {})
+ return [] unless row_element
+ cell_col = 0
+ cells = []
+ @sheet.each_cell(row_element) do |cell|
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
+ cells << cell
+ cell_col = cell.coordinate.column
+ end
+ cells
+ end
+
+ def pad_cells(cell, last_column)
+ pad = []
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
+ pad
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb
new file mode 100755
index 0000000..a705958
--- /dev/null
+++ b/lib/roo/excelx/sheet_doc.rb
@@ -0,0 +1,211 @@
+require 'forwardable'
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class SheetDoc < Excelx::Extractor
+ extend Forwardable
+ delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
+
+ def initialize(path, relationships, shared, options = {})
+ super(path)
+ @shared = shared
+ @options = options
+ @relationships = relationships
+ end
+
+ def cells(relationships)
+ @cells ||= extract_cells(relationships)
+ end
+
+ def hyperlinks(relationships)
+ @hyperlinks ||= extract_hyperlinks(relationships)
+ end
+
+ # Get the dimensions for the sheet.
+ # This is the upper bound of cells that might
+ # be parsed. (the document may be sparse so cell count is only upper bound)
+ def dimensions
+ @dimensions ||= extract_dimensions
+ end
+
+ # Yield each row xml element to caller
+ def each_row_streaming(&block)
+ Roo::Utils.each_element(@path, 'row', &block)
+ end
+
+ # Yield each cell as Excelx::Cell to caller for given
+ # row xml
+ def each_cell(row_xml)
+ return [] unless row_xml
+ row_xml.children.each do |cell_element|
+ # If you're sure you're not going to need this hyperlinks you can discard it
+ hyperlinks = unless @options[:no_hyperlinks]
+ key = ::Roo::Utils.ref_to_key(cell_element['r'])
+ hyperlinks(@relationships)[key]
+ end
+
+ yield cell_from_xml(cell_element, hyperlinks)
+ end
+ end
+
+ private
+
+ def cell_value_type(type, format)
+ case type
+ when 's'.freeze
+ :shared
+ when 'b'.freeze
+ :boolean
+ when 'str'.freeze
+ :string
+ when 'inlineStr'.freeze
+ :inlinestr
+ else
+ Excelx::Format.to_type(format)
+ end
+ end
+
+ # Internal: Creates a cell based on an XML clell..
+ #
+ # cell_xml - a Nokogiri::XML::Element. e.g.
+ # <c r="A5" s="2">
+ # <v>22606</v>
+ # </c>
+ # hyperlink - a String for the hyperlink for the cell or nil when no
+ # hyperlink is present.
+ #
+ # Examples
+ #
+ # cells_from_xml(<Nokogiri::XML::Element>, nil)
+ # # => <Excelx::Cell::String>
+ #
+ # Returns a type of <Excelx::Cell>.
+ def cell_from_xml(cell_xml, hyperlink)
+ coordinate = extract_coordinate(cell_xml['r'])
+ return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
+
+ # NOTE: This is error prone, to_i will silently turn a nil into a 0.
+ # This works by coincidence because Format[0] is General.
+ style = cell_xml['s'].to_i
+ format = styles.style_format(style)
+ value_type = cell_value_type(cell_xml['t'], format)
+ formula = nil
+
+ cell_xml.children.each do |cell|
+ case cell.name
+ when 'is'
+ content_arr = cell.search('t').map(&:content)
+ unless content_arr.empty?
+ return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
+ end
+ when 'f'
+ formula = cell.content
+ when 'v'
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+ end
+ end
+
+ Excelx::Cell::Empty.new(coordinate)
+ end
+
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+ # NOTE: format.to_s can replace excelx_type as an argument for
+ # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
+ # it will break some brittle tests.
+ excelx_type = [:numeric_or_formula, format.to_s]
+
+ # NOTE: There are only a few situations where value != cell.content
+ # 1. when a sharedString is used. value = sharedString;
+ # cell.content = id of sharedString
+ # 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1';
+ # But a boolean cell should use TRUE|FALSE as the formatted value
+ # and use a Boolean for it's value. Using a Boolean value breaks
+ # Roo::Base#to_csv.
+ # 3. formula
+ case value_type
+ when :shared
+ value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
+ Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
+ when :boolean, :string
+ value = cell.content
+ Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
+ when :time, :datetime
+ cell_content = cell.content.to_f
+ # NOTE: A date will be a whole number. A time will have be > 1. And
+ # in general, a datetime will have decimals. But if the cell is
+ # using a custom format, it's possible to be interpreted incorrectly.
+ # cell_content.to_i == cell_content && standard_style?=> :date
+ #
+ # Should check to see if the format is standard or not. If it's a
+ # standard format, than it's a date, otherwise, it is a datetime.
+ # @styles.standard_style?(style_id)
+ # STANDARD_STYLES.keys.include?(style_id.to_i)
+ cell_type = if cell_content < 1.0
+ :time
+ elsif (cell_content - cell_content.floor).abs > 0.000001
+ :datetime
+ else
+ :date
+ end
+ Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+ when :date
+ Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+ else
+ Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
+ end
+ end
+
+ def extract_coordinate(coordinate)
+ row, column = ::Roo::Utils.split_coordinate(coordinate)
+
+ Excelx::Coordinate.new(row, column)
+ end
+
+ def extract_hyperlinks(relationships)
+ return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
+
+ Hash[hyperlinks.map do |hyperlink|
+ if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
+ [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
+ end
+ end.compact]
+ end
+
+ def expand_merged_ranges(cells)
+ # Extract merged ranges from xml
+ merges = {}
+ doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
+ tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
+ for row in tl[0]..br[0] do
+ for col in tl[1]..br[1] do
+ next if row == tl[0] && col == tl[1]
+ merges[[row, col]] = tl
+ end
+ end
+ end
+ # Duplicate value into all cells in merged range
+ merges.each do |dst, src|
+ cells[dst] = cells[src]
+ end
+ end
+
+ def extract_cells(relationships)
+ extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
+ key = ::Roo::Utils.ref_to_key(cell_xml['r'])
+ [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
+ end]
+
+ expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
+
+ extracted_cells
+ end
+
+ def extract_dimensions
+ Roo::Utils.each_element(@path, 'dimension') do |dimension|
+ return dimension.attributes['ref'].value
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb
new file mode 100644
index 0000000..87f1713
--- /dev/null
+++ b/lib/roo/excelx/styles.rb
@@ -0,0 +1,64 @@
+require 'roo/font'
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class Styles < Excelx::Extractor
+ # convert internal excelx attribute to a format
+ def style_format(style)
+ id = num_fmt_ids[style.to_i]
+ num_fmts[id] || Excelx::Format::STANDARD_FORMATS[id.to_i]
+ end
+
+ def definitions
+ @definitions ||= extract_definitions
+ end
+
+ private
+
+ def num_fmt_ids
+ @num_fmt_ids ||= extract_num_fmt_ids
+ end
+
+ def num_fmts
+ @num_fmts ||= extract_num_fmts
+ end
+
+ def fonts
+ @fonts ||= extract_fonts
+ end
+
+ def extract_definitions
+ doc.xpath('//cellXfs').flat_map do |xfs|
+ xfs.children.map do |xf|
+ fonts[xf['fontId'].to_i]
+ end
+ end
+ end
+
+ def extract_fonts
+ doc.xpath('//fonts/font').map do |font_el|
+ Font.new.tap do |font|
+ font.bold = !font_el.xpath('./b').empty?
+ font.italic = !font_el.xpath('./i').empty?
+ font.underline = !font_el.xpath('./u').empty?
+ end
+ end
+ end
+
+ def extract_num_fmt_ids
+ doc.xpath('//cellXfs').flat_map do |xfs|
+ xfs.children.map do |xf|
+ xf['numFmtId']
+ end
+ end.compact
+ end
+
+ def extract_num_fmts
+ Hash[doc.xpath('//numFmt').map do |num_fmt|
+ [num_fmt['numFmtId'], num_fmt['formatCode']]
+ end]
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb
new file mode 100644
index 0000000..7ef841f
--- /dev/null
+++ b/lib/roo/excelx/workbook.rb
@@ -0,0 +1,59 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class Workbook < Excelx::Extractor
+ class Label
+ attr_reader :sheet, :row, :col, :name
+
+ def initialize(name, sheet, row, col)
+ @name = name
+ @sheet = sheet
+ @row = row.to_i
+ @col = ::Roo::Utils.letter_to_number(col)
+ end
+
+ def key
+ [@row, @col]
+ end
+ end
+
+ def initialize(path)
+ super
+ fail ArgumentError, 'missing required workbook file' unless doc_exists?
+ end
+
+ def sheets
+ doc.xpath('//sheet')
+ end
+
+ # aka labels
+ def defined_names
+ Hash[doc.xpath('//definedName').map do |defined_name|
+ # "Sheet1!$C$5"
+ sheet, coordinates = defined_name.text.split('!$', 2)
+ col, row = coordinates.split('$')
+ name = defined_name['name']
+ [name, Label.new(name, sheet, row, col)]
+ end]
+ end
+
+ def base_date
+ @base_date ||=
+ begin
+ # Default to 1900 (minus one day due to excel quirk) but use 1904 if
+ # it's set in the Workbook's workbookPr
+ # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
+ result = Date.new(1899, 12, 30) # default
+ doc.css('workbookPr[date1904]').each do |workbookPr|
+ if workbookPr['date1904'] =~ /true|1/i
+ result = Date.new(1904, 01, 01)
+ break
+ end
+ end
+ result
+ end
+ end
+ end
+ end
+end