aboutsummaryrefslogtreecommitdiffstats
path: root/lib/roo/excelx
diff options
context:
space:
mode:
Diffstat (limited to 'lib/roo/excelx')
-rw-r--r--lib/roo/excelx/cell.rb16
-rw-r--r--lib/roo/excelx/cell/base.rb38
-rw-r--r--lib/roo/excelx/cell/boolean.rb15
-rw-r--r--lib/roo/excelx/cell/date.rb14
-rw-r--r--lib/roo/excelx/cell/datetime.rb32
-rw-r--r--lib/roo/excelx/cell/empty.rb5
-rw-r--r--lib/roo/excelx/cell/number.rb69
-rw-r--r--lib/roo/excelx/cell/string.rb6
-rw-r--r--lib/roo/excelx/cell/time.rb7
-rw-r--r--lib/roo/excelx/comments.rb6
-rw-r--r--lib/roo/excelx/coordinate.rb15
-rwxr-xr-x[-rw-r--r--]lib/roo/excelx/extractor.rb24
-rw-r--r--lib/roo/excelx/format.rb69
-rw-r--r--lib/roo/excelx/images.rb26
-rw-r--r--lib/roo/excelx/relationships.rb6
-rwxr-xr-x[-rw-r--r--]lib/roo/excelx/shared.rb13
-rwxr-xr-xlib/roo/excelx/shared_strings.rb24
-rw-r--r--lib/roo/excelx/sheet.rb59
-rwxr-xr-xlib/roo/excelx/sheet_doc.rb134
-rw-r--r--lib/roo/excelx/styles.rb6
-rw-r--r--lib/roo/excelx/workbook.rb10
21 files changed, 377 insertions, 217 deletions
diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb
index 2fc78e1..7abd074 100644
--- a/lib/roo/excelx/cell.rb
+++ b/lib/roo/excelx/cell.rb
@@ -40,19 +40,23 @@ module Roo
end
def self.create_cell(type, *values)
+ cell_class(type)&.new(*values)
+ end
+
+ def self.cell_class(type)
case type
when :string
- Cell::String.new(*values)
+ Cell::String
when :boolean
- Cell::Boolean.new(*values)
+ Cell::Boolean
when :number
- Cell::Number.new(*values)
+ Cell::Number
when :date
- Cell::Date.new(*values)
+ Cell::Date
when :datetime
- Cell::DateTime.new(*values)
+ Cell::DateTime
when :time
- Cell::Time.new(*values)
+ Cell::Time
end
end
diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb
index aea8808..51fc75f 100644
--- a/lib/roo/excelx/cell/base.rb
+++ b/lib/roo/excelx/cell/base.rb
@@ -1,13 +1,18 @@
+# frozen_string_literal: true
+
+require "roo/helpers/default_attr_reader"
+
module Roo
class Excelx
class Cell
class Base
+ extend Roo::Helpers::DefaultAttrReader
attr_reader :cell_type, :cell_value, :value
# FIXME: I think style should be deprecated. Having a style attribute
# for a cell doesn't really accomplish much. It seems to be used
# when you want to export to excelx.
- attr_reader :style
+ attr_reader_with_default default_type: :base, style: 1
# FIXME: Updating a cell's value should be able tochange the cell's type,
@@ -34,14 +39,12 @@ module Roo
attr_writer :value
def initialize(value, formula, excelx_type, style, link, coordinate)
- @link = !!link
@cell_value = value
- @cell_type = excelx_type
- @formula = formula
- @style = style
+ @cell_type = excelx_type if excelx_type
+ @formula = formula if formula
+ @style = style unless style == 1
@coordinate = coordinate
- @type = :base
- @value = link? ? Roo::Link.new(link, value) : value
+ @value = link ? Roo::Link.new(link, value) : value
end
def type
@@ -50,16 +53,16 @@ module Roo
elsif link?
:link
else
- @type
+ default_type
end
end
def formula?
- !!@formula
+ !!(defined?(@formula) && @formula)
end
def link?
- !!@link
+ Roo::Link === @value
end
alias_method :formatted_value, :value
@@ -68,9 +71,16 @@ module Roo
formatted_value
end
- # DEPRECATED: Please use link instead.
+ # DEPRECATED: Please use link? instead.
def hyperlink
- warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.'
+ warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link?` instead.'
+ link?
+ end
+
+ # DEPRECATED: Please use link? instead.
+ def link
+ warn '[DEPRECATION] `link` is deprecated. Please use `link?` instead.'
+ link?
end
# DEPRECATED: Please use cell_value instead.
@@ -88,6 +98,10 @@ module Roo
def empty?
false
end
+
+ def presence
+ empty? ? nil : self
+ end
end
end
end
diff --git a/lib/roo/excelx/cell/boolean.rb b/lib/roo/excelx/cell/boolean.rb
index fe1f691..2cdfc22 100644
--- a/lib/roo/excelx/cell/boolean.rb
+++ b/lib/roo/excelx/cell/boolean.rb
@@ -1,17 +1,20 @@
+# frozen_string_literal: true
+
module Roo
class Excelx
class Cell
class Boolean < Cell::Base
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :boolean, cell_type: :boolean
def initialize(value, formula, style, link, coordinate)
- super(value, formula, nil, style, link, coordinate)
- @type = @cell_type = :boolean
- @value = link? ? Roo::Link.new(link, value) : create_boolean(value)
+ super(value, formula, nil, style, nil, coordinate)
+ @value = link ? Roo::Link.new(link, value) : create_boolean(value)
end
def formatted_value
- value ? 'TRUE'.freeze : 'FALSE'.freeze
+ value ? 'TRUE' : 'FALSE'
end
private
@@ -19,7 +22,7 @@ module Roo
def create_boolean(value)
# FIXME: Using a boolean will cause methods like Base#to_csv to fail.
# Roo is using some method to ignore false/nil values.
- value.to_i == 1 ? true : false
+ value.to_i == 1
end
end
end
diff --git a/lib/roo/excelx/cell/date.rb b/lib/roo/excelx/cell/date.rb
index 8e2c6cb..8627bc5 100644
--- a/lib/roo/excelx/cell/date.rb
+++ b/lib/roo/excelx/cell/date.rb
@@ -4,23 +4,23 @@ module Roo
class Excelx
class Cell
class Date < Roo::Excelx::Cell::DateTime
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :date
def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
# NOTE: Pass all arguments to the parent class, DateTime.
super
- @type = :date
@format = excelx_type.last
- @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value)
+ @value = link ? Roo::Link.new(link, value) : create_date(base_date, value)
end
private
- def create_date(base_date, value)
- date = base_date + value.to_i
- yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
+ def create_datetime(_,_); end
- ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+ def create_date(base_date, value)
+ base_date + value.to_i
end
end
end
diff --git a/lib/roo/excelx/cell/datetime.rb b/lib/roo/excelx/cell/datetime.rb
index 35d93ac..63f3260 100644
--- a/lib/roo/excelx/cell/datetime.rb
+++ b/lib/roo/excelx/cell/datetime.rb
@@ -1,16 +1,21 @@
+# frozen_string_literal: true
+
require 'date'
module Roo
class Excelx
class Cell
class DateTime < Cell::Base
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+ SECONDS_IN_DAY = 60 * 60 * 24
+
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
- def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
- super(value, formula, excelx_type, style, link, coordinate)
- @type = :datetime
+ attr_reader_with_default default_type: :datetime
+
+ def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate)
+ super(value, formula, excelx_type, style, nil, coordinate)
@format = excelx_type.last
- @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
+ @value = link ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value)
end
# Public: Returns formatted value for a datetime. Format's can be an
@@ -78,7 +83,7 @@ module Roo
TIME_FORMATS = {
'hh' => '%H', # Hour (24): 01
- 'h' => '%-k'.freeze, # Hour (24): 1
+ 'h' => '%-k', # Hour (24): 1
# 'hh'.freeze => '%I'.freeze, # Hour (12): 08
# 'h'.freeze => '%-l'.freeze, # Hour (12): 8
'mm' => '%M', # Minute: 01
@@ -92,18 +97,9 @@ module Roo
'0' => '%1N' # Fractional Seconds: tenths.
}
- def create_datetime(base_date, value)
- date = base_date + value.to_f.round(6)
- datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
- t = round_datetime(datetime_string)
-
- ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
- end
-
- def round_datetime(datetime_string)
- /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
-
- ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
+ def create_datetime(base_timestamp, value)
+ timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0)
+ ::Time.at(timestamp).utc.to_datetime
end
end
end
diff --git a/lib/roo/excelx/cell/empty.rb b/lib/roo/excelx/cell/empty.rb
index 49a20e7..f0c683c 100644
--- a/lib/roo/excelx/cell/empty.rb
+++ b/lib/roo/excelx/cell/empty.rb
@@ -3,10 +3,11 @@ module Roo
class Excelx
class Cell
class Empty < Cell::Base
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: nil, style: nil
def initialize(coordinate)
- @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil
@coordinate = coordinate
end
diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb
index 2015562..9f23c4f 100644
--- a/lib/roo/excelx/cell/number.rb
+++ b/lib/roo/excelx/cell/number.rb
@@ -1,16 +1,19 @@
+# frozen_string_literal: true
+
module Roo
class Excelx
class Cell
class Number < Cell::Base
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ # FIXME: change default_type to number. This will break brittle tests.
+ attr_reader_with_default default_type: :float
def initialize(value, formula, excelx_type, style, link, coordinate)
super
- # FIXME: change @type to number. This will break brittle tests.
# FIXME: Excelx_type is an array, but the first value isn't used.
- @type = :float
@format = excelx_type.last
- @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
+ @value = link ? Roo::Link.new(link, value) : create_numeric(value)
end
def create_numeric(number)
@@ -21,48 +24,50 @@ module Roo
when /\.0/
Float(number)
else
- (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number)
+ (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10)
end
end
def formatted_value
return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
- formatter = formats[@format]
+ formatter = generate_formatter(@format)
if formatter.is_a? Proc
formatter.call(@cell_value)
- elsif zero_padded_number?
- "%0#{@format.size}d" % @cell_value
else
Kernel.format(formatter, @cell_value)
end
end
- def formats
+ def generate_formatter(format)
# FIXME: numbers can be other colors besides red:
# [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
- {
- 'General' => '%.0f',
- '0' => '%.0f',
- '0.00' => '%.2f',
- '0.000000' => '%.6f',
- '#,##0' => number_format('%.0f'),
- '#,##0.00' => number_format('%.2f'),
- '0%' => proc do |number|
- Kernel.format('%d%', number.to_f * 100)
- end,
- '0.00%' => proc do |number|
- Kernel.format('%.2f%', number.to_f * 100)
- end,
- '0.00E+00' => '%.2E',
- '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'),
- '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'),
- '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'),
- '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'),
+ case format
+ when /^General$/i then '%.0f'
+ when '0' then '%.0f'
+ when /^(0+)$/ then "%0#{$1.size}d"
+ when /^0\.(0+)$/ then "%.#{$1.size}f"
+ when '#,##0' then number_format('%.0f')
+ when '#,##0.00' then number_format('%.2f')
+ when '0%'
+ proc do |number|
+ Kernel.format('%d%%', number.to_f * 100)
+ end
+ when '0.00%'
+ proc do |number|
+ Kernel.format('%.2f%%', number.to_f * 100)
+ end
+ when '0.00E+00' then '%.2E'
+ when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)')
+ when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)')
+ when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)')
+ when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)')
# FIXME: not quite sure what the format should look like in this case.
- '##0.0E+0' => '%.1E',
- '@' => proc { |number| number }
- }
+ when '##0.0E+0' then '%.1E'
+ when '@' then proc { |number| number }
+ else
+ raise "Unknown format: #{format.inspect}"
+ end
end
private
@@ -77,10 +82,6 @@ module Roo
Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
end
end
-
- def zero_padded_number?
- @format[/0+/] == @format
- end
end
end
end
diff --git a/lib/roo/excelx/cell/string.rb b/lib/roo/excelx/cell/string.rb
index 7967806..dacf0b6 100644
--- a/lib/roo/excelx/cell/string.rb
+++ b/lib/roo/excelx/cell/string.rb
@@ -2,12 +2,12 @@ module Roo
class Excelx
class Cell
class String < Cell::Base
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :string, cell_type: :string
def initialize(value, formula, style, link, coordinate)
super(value, formula, nil, style, link, coordinate)
- @type = @cell_type = :string
- @value = link? ? Roo::Link.new(link, value) : value
end
def empty?
diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb
index d661ab8..a1f0864 100644
--- a/lib/roo/excelx/cell/time.rb
+++ b/lib/roo/excelx/cell/time.rb
@@ -4,15 +4,16 @@ module Roo
class Excelx
class Cell
class Time < Roo::Excelx::Cell::DateTime
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :time
def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
# NOTE: Pass all arguments to DateTime super class.
super
- @type = :time
@format = excelx_type.last
@datetime = create_datetime(base_date, value)
- @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
+ @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
end
def formatted_value
diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb
index 1a89908..65044a9 100644
--- a/lib/roo/excelx/comments.rb
+++ b/lib/roo/excelx/comments.rb
@@ -12,10 +12,10 @@ module Roo
def extract_comments
return {} unless doc_exists?
- Hash[doc.xpath('//comments/commentList/comment').map do |comment|
+ doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash|
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
- end]
+ hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value
+ end
end
end
end
diff --git a/lib/roo/excelx/coordinate.rb b/lib/roo/excelx/coordinate.rb
index 53b24ba..4f61b17 100644
--- a/lib/roo/excelx/coordinate.rb
+++ b/lib/roo/excelx/coordinate.rb
@@ -1,11 +1,18 @@
module Roo
class Excelx
- class Coordinate
- attr_accessor :row, :column
+ class Coordinate < ::Array
def initialize(row, column)
- @row = row
- @column = column
+ super() << row << column
+ freeze
+ end
+
+ def row
+ self[0]
+ end
+
+ def column
+ self[1]
end
end
end
diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb
index 1cdd13b..b87a84e 100644..100755
--- a/lib/roo/excelx/extractor.rb
+++ b/lib/roo/excelx/extractor.rb
@@ -1,16 +1,34 @@
+# frozen_string_literal: true
+
+require "roo/helpers/weak_instance_cache"
+
module Roo
class Excelx
class Extractor
- def initialize(path)
+ include Roo::Helpers::WeakInstanceCache
+
+ COMMON_STRINGS = {
+ t: "t",
+ r: "r",
+ s: "s",
+ ref: "ref",
+ html_tag_open: "<html>",
+ html_tag_closed: "</html>"
+ }
+
+ def initialize(path, options = {})
@path = path
+ @options = options
end
private
def doc
- raise FileNotFound, "#{@path} file not found" unless doc_exists?
+ instance_cache(:@doc) do
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
- ::Roo::Utils.load_xml(@path).remove_namespaces!
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
+ end
end
def doc_exists?
diff --git a/lib/roo/excelx/format.rb b/lib/roo/excelx/format.rb
index 72b36d9..1c1968a 100644
--- a/lib/roo/excelx/format.rb
+++ b/lib/roo/excelx/format.rb
@@ -1,49 +1,57 @@
+# frozen_string_literal: true
+
module Roo
class Excelx
module Format
+ extend self
EXCEPTIONAL_FORMATS = {
'h:mm am/pm' => :date,
'h:mm:ss am/pm' => :date
}
STANDARD_FORMATS = {
- 0 => 'General'.freeze,
- 1 => '0'.freeze,
- 2 => '0.00'.freeze,
- 3 => '#,##0'.freeze,
- 4 => '#,##0.00'.freeze,
- 9 => '0%'.freeze,
- 10 => '0.00%'.freeze,
- 11 => '0.00E+00'.freeze,
- 12 => '# ?/?'.freeze,
- 13 => '# ??/??'.freeze,
- 14 => 'mm-dd-yy'.freeze,
- 15 => 'd-mmm-yy'.freeze,
- 16 => 'd-mmm'.freeze,
- 17 => 'mmm-yy'.freeze,
- 18 => 'h:mm AM/PM'.freeze,
- 19 => 'h:mm:ss AM/PM'.freeze,
- 20 => 'h:mm'.freeze,
- 21 => 'h:mm:ss'.freeze,
- 22 => 'm/d/yy h:mm'.freeze,
- 37 => '#,##0 ;(#,##0)'.freeze,
- 38 => '#,##0 ;[Red](#,##0)'.freeze,
- 39 => '#,##0.00;(#,##0.00)'.freeze,
- 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
- 45 => 'mm:ss'.freeze,
- 46 => '[h]:mm:ss'.freeze,
- 47 => 'mmss.0'.freeze,
- 48 => '##0.0E+0'.freeze,
- 49 => '@'.freeze
+ 0 => 'General',
+ 1 => '0',
+ 2 => '0.00',
+ 3 => '#,##0',
+ 4 => '#,##0.00',
+ 9 => '0%',
+ 10 => '0.00%',
+ 11 => '0.00E+00',
+ 12 => '# ?/?',
+ 13 => '# ??/??',
+ 14 => 'mm-dd-yy',
+ 15 => 'd-mmm-yy',
+ 16 => 'd-mmm',
+ 17 => 'mmm-yy',
+ 18 => 'h:mm AM/PM',
+ 19 => 'h:mm:ss AM/PM',
+ 20 => 'h:mm',
+ 21 => 'h:mm:ss',
+ 22 => 'm/d/yy h:mm',
+ 37 => '#,##0 ;(#,##0)',
+ 38 => '#,##0 ;[Red](#,##0)',
+ 39 => '#,##0.00;(#,##0.00)',
+ 40 => '#,##0.00;[Red](#,##0.00)',
+ 45 => 'mm:ss',
+ 46 => '[h]:mm:ss',
+ 47 => 'mmss.0',
+ 48 => '##0.0E+0',
+ 49 => '@'
}
def to_type(format)
+ @to_type ||= {}
+ @to_type[format] ||= _to_type(format)
+ end
+
+ def _to_type(format)
format = format.to_s.downcase
if (type = EXCEPTIONAL_FORMATS[format])
type
elsif format.include?('#')
:float
- elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
+ elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
if format.include?('h') || format.include?('s')
:datetime
else
@@ -58,7 +66,6 @@ module Roo
end
end
- module_function :to_type
end
- end
+ end
end
diff --git a/lib/roo/excelx/images.rb b/lib/roo/excelx/images.rb
new file mode 100644
index 0000000..cf0221f
--- /dev/null
+++ b/lib/roo/excelx/images.rb
@@ -0,0 +1,26 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class Images < Excelx::Extractor
+
+ # Returns: Hash { id1: extracted_file_name1 },
+ # Example: { "rId1"=>"roo_media_image1.png",
+ # "rId2"=>"roo_media_image2.png",
+ # "rId3"=>"roo_media_image3.png" }
+ def list
+ @images ||= extract_images_names
+ end
+
+ private
+
+ def extract_images_names
+ return {} unless doc_exists?
+
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
+ hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_')
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb
index 8a0ed97..19f9919 100644
--- a/lib/roo/excelx/relationships.rb
+++ b/lib/roo/excelx/relationships.rb
@@ -16,9 +16,9 @@ module Roo
def extract_relationships
return [] unless doc_exists?
- Hash[doc.xpath('/Relationships/Relationship').map do |rel|
- [rel.attribute('Id').text, rel]
- end]
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
+ hash[rel['Id']] = rel
+ end
end
end
end
diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb
index 3677fa2..bcd2c08 100644..100755
--- a/lib/roo/excelx/shared.rb
+++ b/lib/roo/excelx/shared.rb
@@ -4,12 +4,15 @@ module Roo
# reduce memory usage and reduce the number of objects being passed
# to various inititializers.
class Shared
- attr_accessor :comments_files, :sheet_files, :rels_files
- def initialize(dir)
+ attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files
+ def initialize(dir, options = {})
@dir = dir
@comments_files = []
@sheet_files = []
@rels_files = []
+ @options = options
+ @image_rels = []
+ @image_files = []
end
def styles
@@ -17,7 +20,7 @@ module Roo
end
def shared_strings
- @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
end
def workbook
@@ -27,6 +30,10 @@ module Roo
def base_date
workbook.base_date
end
+
+ def base_timestamp
+ workbook.base_timestamp
+ end
end
end
end
diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb
index f7caf7c..e70b623 100755
--- a/lib/roo/excelx/shared_strings.rb
+++ b/lib/roo/excelx/shared_strings.rb
@@ -1,16 +1,10 @@
+# frozen_string_literal: true
+
require 'roo/excelx/extractor'
module Roo
class Excelx
class SharedStrings < Excelx::Extractor
-
- COMMON_STRINGS = {
- t: "t",
- r: "r",
- html_tag_open: "<html>",
- html_tag_closed: "</html>"
- }
-
def [](index)
to_a[index]
end
@@ -26,6 +20,7 @@ module Roo
# Use to_html or to_a for html returns
# See what is happening with commit???
def use_html?(index)
+ return false if @options[:disable_html_wrapper]
to_html[index][/<([biu]|sup|sub)>/]
end
@@ -45,7 +40,7 @@ module Roo
document = fix_invalid_shared_strings(doc)
# read the shared strings xml document
document.xpath('/sst/si').map do |si|
- shared_string = ''
+ shared_string = +""
si.children.each do |elem|
case elem.name
when 'r'
@@ -65,7 +60,7 @@ module Roo
fix_invalid_shared_strings(doc)
# read the shared strings xml document
doc.xpath('/sst/si').map do |si|
- html_string = '<html>'
+ html_string = '<html>'.dup
si.children.each do |elem|
case elem.name
when 'r'
@@ -95,7 +90,7 @@ module Roo
#
# Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
def extract_html_r(r_elem)
- str = ''
+ str = +""
xml_elems = {
sub: false,
sup: false,
@@ -103,7 +98,6 @@ module Roo
i: false,
u: false
}
- b, i, u, sub, sup = false, false, false, false, false
r_elem.children.each do |elem|
case elem.name
when 'rPr'
@@ -141,13 +135,13 @@ module Roo
# This will return an html string
def create_html(text, formatting)
- tmp_str = ''
+ tmp_str = +""
formatting.each do |elem, val|
tmp_str << "<#{elem}>" if val
end
tmp_str << text
- reverse_format = Hash[formatting.to_a.reverse]
- reverse_format.each do |elem, val|
+
+ formatting.reverse_each do |elem, val|
tmp_str << "</#{elem}>" if val
end
tmp_str
diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb
index add92f0..840a053 100644
--- a/lib/roo/excelx/sheet.rb
+++ b/lib/roo/excelx/sheet.rb
@@ -4,11 +4,15 @@ module Roo
class Sheet
extend Forwardable
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
+
+ attr_reader :images
def initialize(name, shared, sheet_index, options = {})
@name = name
@shared = shared
+ @sheet_index = sheet_index
+ @images = Images.new(image_rels[sheet_index]).list
@rels = Relationships.new(rels_files[sheet_index])
@comments = Comments.new(comments_files[sheet_index])
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
@@ -19,7 +23,14 @@ module Roo
end
def present_cells
- @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
+ @present_cells ||= begin
+ warn %{
+[DEPRECATION] present_cells is deprecated. Alternate:
+ with activesupport => cells[key].presence
+ without activesupport => cells[key]&.presence
+ }
+ cells.select { |_, cell| cell&.presence }
+ end
end
# Yield each row as array of Excelx::Cell objects
@@ -39,33 +50,33 @@ module Roo
def row(row_number)
first_column.upto(last_column).map do |col|
- cells[[row_number, col]]
- end.map { |cell| cell && cell.value }
+ cells[[row_number, col]]&.value
+ end
end
def column(col_number)
first_row.upto(last_row).map do |row|
- cells[[row, col_number]]
- end.map { |cell| cell && cell.value }
+ cells[[row, col_number]]&.value
+ end
end
# returns the number of the first non-empty row
def first_row
- @first_row ||= present_cells.keys.map { |row, _| row }.min
+ @first_row ||= first_last_row_col[:first_row]
end
def last_row
- @last_row ||= present_cells.keys.map { |row, _| row }.max
+ @last_row ||= first_last_row_col[:last_row]
end
# returns the number of the first non-empty column
def first_column
- @first_column ||= present_cells.keys.map { |_, col| col }.min
+ @first_column ||= first_last_row_col[:first_column]
end
# returns the number of the last non-empty column
def last_column
- @last_column ||= present_cells.keys.map { |_, col| col }.max
+ @last_column ||= first_last_row_col[:last_column]
end
def excelx_format(key)
@@ -107,6 +118,34 @@ module Roo
(cell.coordinate.column - 1 - last_column).times { pad << nil }
pad
end
+
+ def first_last_row_col
+ @first_last_row_col ||= begin
+ first_row = last_row = first_col = last_col = nil
+
+ cells.each do |(row, col), cell|
+ next unless cell&.presence
+ first_row ||= row
+ last_row ||= row
+ first_col ||= col
+ last_col ||= col
+
+ if row > last_row
+ last_row = row
+ elsif row < first_row
+ first_row = row
+ end
+
+ if col > last_col
+ last_col = col
+ elsif col < first_col
+ first_col = col
+ end
+ end
+
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
+ end
+ end
end
end
end
diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb
index a705958..8b0c686 100755
--- a/lib/roo/excelx/sheet_doc.rb
+++ b/lib/roo/excelx/sheet_doc.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
require 'forwardable'
require 'roo/excelx/extractor'
@@ -5,7 +7,7 @@ module Roo
class Excelx
class SheetDoc < Excelx::Extractor
extend Forwardable
- delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
+ delegate [:workbook] => :@shared
def initialize(path, relationships, shared, options = {})
super(path)
@@ -19,7 +21,12 @@ module Roo
end
def hyperlinks(relationships)
- @hyperlinks ||= extract_hyperlinks(relationships)
+ # If you're sure you're not going to need this hyperlinks you can discard it
+ @hyperlinks ||= if @options[:no_hyperlinks]
+ {}
+ else
+ extract_hyperlinks(relationships)
+ end
end
# Get the dimensions for the sheet.
@@ -39,13 +46,10 @@ module Roo
def each_cell(row_xml)
return [] unless row_xml
row_xml.children.each do |cell_element|
- # If you're sure you're not going to need this hyperlinks you can discard it
- hyperlinks = unless @options[:no_hyperlinks]
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
- hyperlinks(@relationships)[key]
- end
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
+ hyperlinks = hyperlinks(@relationships)[coordinate]
- yield cell_from_xml(cell_element, hyperlinks)
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
end
end
@@ -53,13 +57,13 @@ module Roo
def cell_value_type(type, format)
case type
- when 's'.freeze
+ when 's'
:shared
- when 'b'.freeze
+ when 'b'
:boolean
- when 'str'.freeze
+ when 'str'
:string
- when 'inlineStr'.freeze
+ when 'inlineStr'
:inlinestr
else
Excelx::Format.to_type(format)
@@ -74,42 +78,58 @@ module Roo
# </c>
# hyperlink - a String for the hyperlink for the cell or nil when no
# hyperlink is present.
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
+ # or nil to extract coordinate from cell_xml.
+ # empty_cell - an Optional Boolean value.
#
# Examples
#
- # cells_from_xml(<Nokogiri::XML::Element>, nil)
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
# # => <Excelx::Cell::String>
#
# Returns a type of <Excelx::Cell>.
- def cell_from_xml(cell_xml, hyperlink)
- coordinate = extract_coordinate(cell_xml['r'])
- return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
+ cell_xml_children = cell_xml.children
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
# This works by coincidence because Format[0] is General.
- style = cell_xml['s'].to_i
- format = styles.style_format(style)
- value_type = cell_value_type(cell_xml['t'], format)
+ style = cell_xml["s"].to_i
formula = nil
- cell_xml.children.each do |cell|
+ cell_xml_children.each do |cell|
case cell.name
when 'is'
- content_arr = cell.search('t').map(&:content)
- unless content_arr.empty?
- return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
+ content = +""
+ cell.children.each do |inline_str|
+ if inline_str.name == 't'
+ content << inline_str.content
+ end
+ end
+ unless content.empty?
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
end
when 'f'
formula = cell.content
when 'v'
- return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+ format = style_format(style)
+ value_type = cell_value_type(cell_xml["t"], format)
+
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
end
end
- Excelx::Cell::Empty.new(coordinate)
+ create_empty_cell(coordinate)
end
- def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+ def create_empty_cell(coordinate, empty_cell)
+ if empty_cell
+ Excelx::Cell::Empty.new(coordinate)
+ end
+ end
+
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
# NOTE: format.to_s can replace excelx_type as an argument for
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
# it will break some brittle tests.
@@ -125,11 +145,12 @@ module Roo
# 3. formula
case value_type
when :shared
- value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
- Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
+ cell_content = cell.content.to_i
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
when :boolean, :string
value = cell.content
- Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
when :time, :datetime
cell_content = cell.content.to_f
# NOTE: A date will be a whole number. A time will have be > 1. And
@@ -148,35 +169,32 @@ module Roo
else
:date
end
- Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+ base_value = cell_type == :date ? base_date : base_timestamp
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
when :date
- Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
else
- Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
end
end
- def extract_coordinate(coordinate)
- row, column = ::Roo::Utils.split_coordinate(coordinate)
-
- Excelx::Coordinate.new(row, column)
- end
-
def extract_hyperlinks(relationships)
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
- Hash[hyperlinks.map do |hyperlink|
- if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
+ if relationship = relationships[hyperlink['id']]
+ target_link = relationship['Target']
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
+ hash[::Roo::Utils.ref_to_key(hyperlink["ref"].to_s)] = target_link
end
- end.compact]
+ end
end
def expand_merged_ranges(cells)
# Extract merged ranges from xml
merges = {}
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
- tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
+ tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
for row in tl[0]..br[0] do
for col in tl[1]..br[1] do
next if row == tl[0] && col == tl[1]
@@ -191,10 +209,14 @@ module Roo
end
def extract_cells(relationships)
- extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
- end]
+ extracted_cells = {}
+ empty_cell = @options[:empty_cell]
+
+ doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml|
+ coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"])
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
+ extracted_cells[coordinate] = cell if cell
+ end
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
@@ -203,9 +225,25 @@ module Roo
def extract_dimensions
Roo::Utils.each_element(@path, 'dimension') do |dimension|
- return dimension.attributes['ref'].value
+ return dimension["ref"]
end
end
+
+ def style_format(style)
+ @shared.styles.style_format(style)
+ end
+
+ def base_date
+ @shared.base_date
+ end
+
+ def base_timestamp
+ @shared.base_timestamp
+ end
+
+ def shared_strings
+ @shared.shared_strings
+ end
end
end
end
diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb
index 87f1713..25f0bf0 100644
--- a/lib/roo/excelx/styles.rb
+++ b/lib/roo/excelx/styles.rb
@@ -55,9 +55,9 @@ module Roo
end
def extract_num_fmts
- Hash[doc.xpath('//numFmt').map do |num_fmt|
- [num_fmt['numFmtId'], num_fmt['formatCode']]
- end]
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
+ end
end
end
end
diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb
index 7ef841f..c21bb1f 100644
--- a/lib/roo/excelx/workbook.rb
+++ b/lib/roo/excelx/workbook.rb
@@ -29,13 +29,17 @@ module Roo
# aka labels
def defined_names
- Hash[doc.xpath('//definedName').map do |defined_name|
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
# "Sheet1!$C$5"
sheet, coordinates = defined_name.text.split('!$', 2)
col, row = coordinates.split('$')
name = defined_name['name']
- [name, Label.new(name, sheet, row, col)]
- end]
+ hash[name] = Label.new(name, sheet, row, col)
+ end
+ end
+
+ def base_timestamp
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
end
def base_date