diff options
Diffstat (limited to 'lib/roo/excelx')
| -rw-r--r-- | lib/roo/excelx/cell.rb | 16 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/base.rb | 38 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/boolean.rb | 15 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/date.rb | 14 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/datetime.rb | 32 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/empty.rb | 5 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/number.rb | 69 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/string.rb | 6 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/time.rb | 7 | ||||
| -rw-r--r-- | lib/roo/excelx/comments.rb | 6 | ||||
| -rw-r--r-- | lib/roo/excelx/coordinate.rb | 15 | ||||
| -rwxr-xr-x[-rw-r--r--] | lib/roo/excelx/extractor.rb | 24 | ||||
| -rw-r--r-- | lib/roo/excelx/format.rb | 69 | ||||
| -rw-r--r-- | lib/roo/excelx/images.rb | 26 | ||||
| -rw-r--r-- | lib/roo/excelx/relationships.rb | 6 | ||||
| -rwxr-xr-x[-rw-r--r--] | lib/roo/excelx/shared.rb | 13 | ||||
| -rwxr-xr-x | lib/roo/excelx/shared_strings.rb | 24 | ||||
| -rw-r--r-- | lib/roo/excelx/sheet.rb | 59 | ||||
| -rwxr-xr-x | lib/roo/excelx/sheet_doc.rb | 134 | ||||
| -rw-r--r-- | lib/roo/excelx/styles.rb | 6 | ||||
| -rw-r--r-- | lib/roo/excelx/workbook.rb | 10 |
21 files changed, 377 insertions, 217 deletions
diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb index 2fc78e1..7abd074 100644 --- a/lib/roo/excelx/cell.rb +++ b/lib/roo/excelx/cell.rb @@ -40,19 +40,23 @@ module Roo end def self.create_cell(type, *values) + cell_class(type)&.new(*values) + end + + def self.cell_class(type) case type when :string - Cell::String.new(*values) + Cell::String when :boolean - Cell::Boolean.new(*values) + Cell::Boolean when :number - Cell::Number.new(*values) + Cell::Number when :date - Cell::Date.new(*values) + Cell::Date when :datetime - Cell::DateTime.new(*values) + Cell::DateTime when :time - Cell::Time.new(*values) + Cell::Time end end diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb index aea8808..51fc75f 100644 --- a/lib/roo/excelx/cell/base.rb +++ b/lib/roo/excelx/cell/base.rb @@ -1,13 +1,18 @@ +# frozen_string_literal: true + +require "roo/helpers/default_attr_reader" + module Roo class Excelx class Cell class Base + extend Roo::Helpers::DefaultAttrReader attr_reader :cell_type, :cell_value, :value # FIXME: I think style should be deprecated. Having a style attribute # for a cell doesn't really accomplish much. It seems to be used # when you want to export to excelx. - attr_reader :style + attr_reader_with_default default_type: :base, style: 1 # FIXME: Updating a cell's value should be able tochange the cell's type, @@ -34,14 +39,12 @@ module Roo attr_writer :value def initialize(value, formula, excelx_type, style, link, coordinate) - @link = !!link @cell_value = value - @cell_type = excelx_type - @formula = formula - @style = style + @cell_type = excelx_type if excelx_type + @formula = formula if formula + @style = style unless style == 1 @coordinate = coordinate - @type = :base - @value = link? ? Roo::Link.new(link, value) : value + @value = link ? Roo::Link.new(link, value) : value end def type @@ -50,16 +53,16 @@ module Roo elsif link? :link else - @type + default_type end end def formula? - !!@formula + !!(defined?(@formula) && @formula) end def link? - !!@link + Roo::Link === @value end alias_method :formatted_value, :value @@ -68,9 +71,16 @@ module Roo formatted_value end - # DEPRECATED: Please use link instead. + # DEPRECATED: Please use link? instead. def hyperlink - warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.' + warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link?` instead.' + link? + end + + # DEPRECATED: Please use link? instead. + def link + warn '[DEPRECATION] `link` is deprecated. Please use `link?` instead.' + link? end # DEPRECATED: Please use cell_value instead. @@ -88,6 +98,10 @@ module Roo def empty? false end + + def presence + empty? ? nil : self + end end end end diff --git a/lib/roo/excelx/cell/boolean.rb b/lib/roo/excelx/cell/boolean.rb index fe1f691..2cdfc22 100644 --- a/lib/roo/excelx/cell/boolean.rb +++ b/lib/roo/excelx/cell/boolean.rb @@ -1,17 +1,20 @@ +# frozen_string_literal: true + module Roo class Excelx class Cell class Boolean < Cell::Base - attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + attr_reader_with_default default_type: :boolean, cell_type: :boolean def initialize(value, formula, style, link, coordinate) - super(value, formula, nil, style, link, coordinate) - @type = @cell_type = :boolean - @value = link? ? Roo::Link.new(link, value) : create_boolean(value) + super(value, formula, nil, style, nil, coordinate) + @value = link ? Roo::Link.new(link, value) : create_boolean(value) end def formatted_value - value ? 'TRUE'.freeze : 'FALSE'.freeze + value ? 'TRUE' : 'FALSE' end private @@ -19,7 +22,7 @@ module Roo def create_boolean(value) # FIXME: Using a boolean will cause methods like Base#to_csv to fail. # Roo is using some method to ignore false/nil values. - value.to_i == 1 ? true : false + value.to_i == 1 end end end diff --git a/lib/roo/excelx/cell/date.rb b/lib/roo/excelx/cell/date.rb index 8e2c6cb..8627bc5 100644 --- a/lib/roo/excelx/cell/date.rb +++ b/lib/roo/excelx/cell/date.rb @@ -4,23 +4,23 @@ module Roo class Excelx class Cell class Date < Roo::Excelx::Cell::DateTime - attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate + + attr_reader_with_default default_type: :date def initialize(value, formula, excelx_type, style, link, base_date, coordinate) # NOTE: Pass all arguments to the parent class, DateTime. super - @type = :date @format = excelx_type.last - @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value) + @value = link ? Roo::Link.new(link, value) : create_date(base_date, value) end private - def create_date(base_date, value) - date = base_date + value.to_i - yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-') + def create_datetime(_,_); end - ::Date.new(yyyy.to_i, mm.to_i, dd.to_i) + def create_date(base_date, value) + base_date + value.to_i end end end diff --git a/lib/roo/excelx/cell/datetime.rb b/lib/roo/excelx/cell/datetime.rb index 35d93ac..63f3260 100644 --- a/lib/roo/excelx/cell/datetime.rb +++ b/lib/roo/excelx/cell/datetime.rb @@ -1,16 +1,21 @@ +# frozen_string_literal: true + require 'date' module Roo class Excelx class Cell class DateTime < Cell::Base - attr_reader :value, :formula, :format, :cell_value, :link, :coordinate + SECONDS_IN_DAY = 60 * 60 * 24 + + attr_reader :value, :formula, :format, :cell_value, :coordinate - def initialize(value, formula, excelx_type, style, link, base_date, coordinate) - super(value, formula, excelx_type, style, link, coordinate) - @type = :datetime + attr_reader_with_default default_type: :datetime + + def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate) + super(value, formula, excelx_type, style, nil, coordinate) @format = excelx_type.last - @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value) + @value = link ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value) end # Public: Returns formatted value for a datetime. Format's can be an @@ -78,7 +83,7 @@ module Roo TIME_FORMATS = { 'hh' => '%H', # Hour (24): 01 - 'h' => '%-k'.freeze, # Hour (24): 1 + 'h' => '%-k', # Hour (24): 1 # 'hh'.freeze => '%I'.freeze, # Hour (12): 08 # 'h'.freeze => '%-l'.freeze, # Hour (12): 8 'mm' => '%M', # Minute: 01 @@ -92,18 +97,9 @@ module Roo '0' => '%1N' # Fractional Seconds: tenths. } - def create_datetime(base_date, value) - date = base_date + value.to_f.round(6) - datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N') - t = round_datetime(datetime_string) - - ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec) - end - - def round_datetime(datetime_string) - /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string - - ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0) + def create_datetime(base_timestamp, value) + timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0) + ::Time.at(timestamp).utc.to_datetime end end end diff --git a/lib/roo/excelx/cell/empty.rb b/lib/roo/excelx/cell/empty.rb index 49a20e7..f0c683c 100644 --- a/lib/roo/excelx/cell/empty.rb +++ b/lib/roo/excelx/cell/empty.rb @@ -3,10 +3,11 @@ module Roo class Excelx class Cell class Empty < Cell::Base - attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate + attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate + + attr_reader_with_default default_type: nil, style: nil def initialize(coordinate) - @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil @coordinate = coordinate end diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb index 2015562..9f23c4f 100644 --- a/lib/roo/excelx/cell/number.rb +++ b/lib/roo/excelx/cell/number.rb @@ -1,16 +1,19 @@ +# frozen_string_literal: true + module Roo class Excelx class Cell class Number < Cell::Base - attr_reader :value, :formula, :format, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + # FIXME: change default_type to number. This will break brittle tests. + attr_reader_with_default default_type: :float def initialize(value, formula, excelx_type, style, link, coordinate) super - # FIXME: change @type to number. This will break brittle tests. # FIXME: Excelx_type is an array, but the first value isn't used. - @type = :float @format = excelx_type.last - @value = link? ? Roo::Link.new(link, value) : create_numeric(value) + @value = link ? Roo::Link.new(link, value) : create_numeric(value) end def create_numeric(number) @@ -21,48 +24,50 @@ module Roo when /\.0/ Float(number) else - (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number) + (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10) end end def formatted_value return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value) - formatter = formats[@format] + formatter = generate_formatter(@format) if formatter.is_a? Proc formatter.call(@cell_value) - elsif zero_padded_number? - "%0#{@format.size}d" % @cell_value else Kernel.format(formatter, @cell_value) end end - def formats + def generate_formatter(format) # FIXME: numbers can be other colors besides red: # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n] - { - 'General' => '%.0f', - '0' => '%.0f', - '0.00' => '%.2f', - '0.000000' => '%.6f', - '#,##0' => number_format('%.0f'), - '#,##0.00' => number_format('%.2f'), - '0%' => proc do |number| - Kernel.format('%d%', number.to_f * 100) - end, - '0.00%' => proc do |number| - Kernel.format('%.2f%', number.to_f * 100) - end, - '0.00E+00' => '%.2E', - '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'), - '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'), - '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'), - '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'), + case format + when /^General$/i then '%.0f' + when '0' then '%.0f' + when /^(0+)$/ then "%0#{$1.size}d" + when /^0\.(0+)$/ then "%.#{$1.size}f" + when '#,##0' then number_format('%.0f') + when '#,##0.00' then number_format('%.2f') + when '0%' + proc do |number| + Kernel.format('%d%%', number.to_f * 100) + end + when '0.00%' + proc do |number| + Kernel.format('%.2f%%', number.to_f * 100) + end + when '0.00E+00' then '%.2E' + when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)') + when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)') + when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)') + when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)') # FIXME: not quite sure what the format should look like in this case. - '##0.0E+0' => '%.1E', - '@' => proc { |number| number } - } + when '##0.0E+0' then '%.1E' + when '@' then proc { |number| number } + else + raise "Unknown format: #{format.inspect}" + end end private @@ -77,10 +82,6 @@ module Roo Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse end end - - def zero_padded_number? - @format[/0+/] == @format - end end end end diff --git a/lib/roo/excelx/cell/string.rb b/lib/roo/excelx/cell/string.rb index 7967806..dacf0b6 100644 --- a/lib/roo/excelx/cell/string.rb +++ b/lib/roo/excelx/cell/string.rb @@ -2,12 +2,12 @@ module Roo class Excelx class Cell class String < Cell::Base - attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + attr_reader_with_default default_type: :string, cell_type: :string def initialize(value, formula, style, link, coordinate) super(value, formula, nil, style, link, coordinate) - @type = @cell_type = :string - @value = link? ? Roo::Link.new(link, value) : value end def empty? diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb index d661ab8..a1f0864 100644 --- a/lib/roo/excelx/cell/time.rb +++ b/lib/roo/excelx/cell/time.rb @@ -4,15 +4,16 @@ module Roo class Excelx class Cell class Time < Roo::Excelx::Cell::DateTime - attr_reader :value, :formula, :format, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + attr_reader_with_default default_type: :time def initialize(value, formula, excelx_type, style, link, base_date, coordinate) # NOTE: Pass all arguments to DateTime super class. super - @type = :time @format = excelx_type.last @datetime = create_datetime(base_date, value) - @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i + @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i end def formatted_value diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb index 1a89908..65044a9 100644 --- a/lib/roo/excelx/comments.rb +++ b/lib/roo/excelx/comments.rb @@ -12,10 +12,10 @@ module Roo def extract_comments return {} unless doc_exists? - Hash[doc.xpath('//comments/commentList/comment').map do |comment| + doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash| value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text - [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value] - end] + hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value + end end end end diff --git a/lib/roo/excelx/coordinate.rb b/lib/roo/excelx/coordinate.rb index 53b24ba..4f61b17 100644 --- a/lib/roo/excelx/coordinate.rb +++ b/lib/roo/excelx/coordinate.rb @@ -1,11 +1,18 @@ module Roo class Excelx - class Coordinate - attr_accessor :row, :column + class Coordinate < ::Array def initialize(row, column) - @row = row - @column = column + super() << row << column + freeze + end + + def row + self[0] + end + + def column + self[1] end end end diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb index 1cdd13b..b87a84e 100644..100755 --- a/lib/roo/excelx/extractor.rb +++ b/lib/roo/excelx/extractor.rb @@ -1,16 +1,34 @@ +# frozen_string_literal: true + +require "roo/helpers/weak_instance_cache" + module Roo class Excelx class Extractor - def initialize(path) + include Roo::Helpers::WeakInstanceCache + + COMMON_STRINGS = { + t: "t", + r: "r", + s: "s", + ref: "ref", + html_tag_open: "<html>", + html_tag_closed: "</html>" + } + + def initialize(path, options = {}) @path = path + @options = options end private def doc - raise FileNotFound, "#{@path} file not found" unless doc_exists? + instance_cache(:@doc) do + raise FileNotFound, "#{@path} file not found" unless doc_exists? - ::Roo::Utils.load_xml(@path).remove_namespaces! + ::Roo::Utils.load_xml(@path).remove_namespaces! + end end def doc_exists? diff --git a/lib/roo/excelx/format.rb b/lib/roo/excelx/format.rb index 72b36d9..1c1968a 100644 --- a/lib/roo/excelx/format.rb +++ b/lib/roo/excelx/format.rb @@ -1,49 +1,57 @@ +# frozen_string_literal: true + module Roo class Excelx module Format + extend self EXCEPTIONAL_FORMATS = { 'h:mm am/pm' => :date, 'h:mm:ss am/pm' => :date } STANDARD_FORMATS = { - 0 => 'General'.freeze, - 1 => '0'.freeze, - 2 => '0.00'.freeze, - 3 => '#,##0'.freeze, - 4 => '#,##0.00'.freeze, - 9 => '0%'.freeze, - 10 => '0.00%'.freeze, - 11 => '0.00E+00'.freeze, - 12 => '# ?/?'.freeze, - 13 => '# ??/??'.freeze, - 14 => 'mm-dd-yy'.freeze, - 15 => 'd-mmm-yy'.freeze, - 16 => 'd-mmm'.freeze, - 17 => 'mmm-yy'.freeze, - 18 => 'h:mm AM/PM'.freeze, - 19 => 'h:mm:ss AM/PM'.freeze, - 20 => 'h:mm'.freeze, - 21 => 'h:mm:ss'.freeze, - 22 => 'm/d/yy h:mm'.freeze, - 37 => '#,##0 ;(#,##0)'.freeze, - 38 => '#,##0 ;[Red](#,##0)'.freeze, - 39 => '#,##0.00;(#,##0.00)'.freeze, - 40 => '#,##0.00;[Red](#,##0.00)'.freeze, - 45 => 'mm:ss'.freeze, - 46 => '[h]:mm:ss'.freeze, - 47 => 'mmss.0'.freeze, - 48 => '##0.0E+0'.freeze, - 49 => '@'.freeze + 0 => 'General', + 1 => '0', + 2 => '0.00', + 3 => '#,##0', + 4 => '#,##0.00', + 9 => '0%', + 10 => '0.00%', + 11 => '0.00E+00', + 12 => '# ?/?', + 13 => '# ??/??', + 14 => 'mm-dd-yy', + 15 => 'd-mmm-yy', + 16 => 'd-mmm', + 17 => 'mmm-yy', + 18 => 'h:mm AM/PM', + 19 => 'h:mm:ss AM/PM', + 20 => 'h:mm', + 21 => 'h:mm:ss', + 22 => 'm/d/yy h:mm', + 37 => '#,##0 ;(#,##0)', + 38 => '#,##0 ;[Red](#,##0)', + 39 => '#,##0.00;(#,##0.00)', + 40 => '#,##0.00;[Red](#,##0.00)', + 45 => 'mm:ss', + 46 => '[h]:mm:ss', + 47 => 'mmss.0', + 48 => '##0.0E+0', + 49 => '@' } def to_type(format) + @to_type ||= {} + @to_type[format] ||= _to_type(format) + end + + def _to_type(format) format = format.to_s.downcase if (type = EXCEPTIONAL_FORMATS[format]) type elsif format.include?('#') :float - elsif !format.match(/d+(?![\]])/).nil? || format.include?('y') + elsif format.include?('y') || !format.match(/d+(?![\]])/).nil? if format.include?('h') || format.include?('s') :datetime else @@ -58,7 +66,6 @@ module Roo end end - module_function :to_type end - end + end end diff --git a/lib/roo/excelx/images.rb b/lib/roo/excelx/images.rb new file mode 100644 index 0000000..cf0221f --- /dev/null +++ b/lib/roo/excelx/images.rb @@ -0,0 +1,26 @@ +require 'roo/excelx/extractor' + +module Roo + class Excelx + class Images < Excelx::Extractor + + # Returns: Hash { id1: extracted_file_name1 }, + # Example: { "rId1"=>"roo_media_image1.png", + # "rId2"=>"roo_media_image2.png", + # "rId3"=>"roo_media_image3.png" } + def list + @images ||= extract_images_names + end + + private + + def extract_images_names + return {} unless doc_exists? + + doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash| + hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_') + end + end + end + end +end diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb index 8a0ed97..19f9919 100644 --- a/lib/roo/excelx/relationships.rb +++ b/lib/roo/excelx/relationships.rb @@ -16,9 +16,9 @@ module Roo def extract_relationships return [] unless doc_exists? - Hash[doc.xpath('/Relationships/Relationship').map do |rel| - [rel.attribute('Id').text, rel] - end] + doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash| + hash[rel['Id']] = rel + end end end end diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb index 3677fa2..bcd2c08 100644..100755 --- a/lib/roo/excelx/shared.rb +++ b/lib/roo/excelx/shared.rb @@ -4,12 +4,15 @@ module Roo # reduce memory usage and reduce the number of objects being passed # to various inititializers. class Shared - attr_accessor :comments_files, :sheet_files, :rels_files - def initialize(dir) + attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files + def initialize(dir, options = {}) @dir = dir @comments_files = [] @sheet_files = [] @rels_files = [] + @options = options + @image_rels = [] + @image_files = [] end def styles @@ -17,7 +20,7 @@ module Roo end def shared_strings - @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml')) + @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options) end def workbook @@ -27,6 +30,10 @@ module Roo def base_date workbook.base_date end + + def base_timestamp + workbook.base_timestamp + end end end end diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb index f7caf7c..e70b623 100755 --- a/lib/roo/excelx/shared_strings.rb +++ b/lib/roo/excelx/shared_strings.rb @@ -1,16 +1,10 @@ +# frozen_string_literal: true + require 'roo/excelx/extractor' module Roo class Excelx class SharedStrings < Excelx::Extractor - - COMMON_STRINGS = { - t: "t", - r: "r", - html_tag_open: "<html>", - html_tag_closed: "</html>" - } - def [](index) to_a[index] end @@ -26,6 +20,7 @@ module Roo # Use to_html or to_a for html returns # See what is happening with commit??? def use_html?(index) + return false if @options[:disable_html_wrapper] to_html[index][/<([biu]|sup|sub)>/] end @@ -45,7 +40,7 @@ module Roo document = fix_invalid_shared_strings(doc) # read the shared strings xml document document.xpath('/sst/si').map do |si| - shared_string = '' + shared_string = +"" si.children.each do |elem| case elem.name when 'r' @@ -65,7 +60,7 @@ module Roo fix_invalid_shared_strings(doc) # read the shared strings xml document doc.xpath('/sst/si').map do |si| - html_string = '<html>' + html_string = '<html>'.dup si.children.each do |elem| case elem.name when 'r' @@ -95,7 +90,7 @@ module Roo # # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>" def extract_html_r(r_elem) - str = '' + str = +"" xml_elems = { sub: false, sup: false, @@ -103,7 +98,6 @@ module Roo i: false, u: false } - b, i, u, sub, sup = false, false, false, false, false r_elem.children.each do |elem| case elem.name when 'rPr' @@ -141,13 +135,13 @@ module Roo # This will return an html string def create_html(text, formatting) - tmp_str = '' + tmp_str = +"" formatting.each do |elem, val| tmp_str << "<#{elem}>" if val end tmp_str << text - reverse_format = Hash[formatting.to_a.reverse] - reverse_format.each do |elem, val| + + formatting.reverse_each do |elem, val| tmp_str << "</#{elem}>" if val end tmp_str diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb index add92f0..840a053 100644 --- a/lib/roo/excelx/sheet.rb +++ b/lib/roo/excelx/sheet.rb @@ -4,11 +4,15 @@ module Roo class Sheet extend Forwardable - delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared + delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared + + attr_reader :images def initialize(name, shared, sheet_index, options = {}) @name = name @shared = shared + @sheet_index = sheet_index + @images = Images.new(image_rels[sheet_index]).list @rels = Relationships.new(rels_files[sheet_index]) @comments = Comments.new(comments_files[sheet_index]) @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options) @@ -19,7 +23,14 @@ module Roo end def present_cells - @present_cells ||= cells.select { |_, cell| cell && !cell.empty? } + @present_cells ||= begin + warn %{ +[DEPRECATION] present_cells is deprecated. Alternate: + with activesupport => cells[key].presence + without activesupport => cells[key]&.presence + } + cells.select { |_, cell| cell&.presence } + end end # Yield each row as array of Excelx::Cell objects @@ -39,33 +50,33 @@ module Roo def row(row_number) first_column.upto(last_column).map do |col| - cells[[row_number, col]] - end.map { |cell| cell && cell.value } + cells[[row_number, col]]&.value + end end def column(col_number) first_row.upto(last_row).map do |row| - cells[[row, col_number]] - end.map { |cell| cell && cell.value } + cells[[row, col_number]]&.value + end end # returns the number of the first non-empty row def first_row - @first_row ||= present_cells.keys.map { |row, _| row }.min + @first_row ||= first_last_row_col[:first_row] end def last_row - @last_row ||= present_cells.keys.map { |row, _| row }.max + @last_row ||= first_last_row_col[:last_row] end # returns the number of the first non-empty column def first_column - @first_column ||= present_cells.keys.map { |_, col| col }.min + @first_column ||= first_last_row_col[:first_column] end # returns the number of the last non-empty column def last_column - @last_column ||= present_cells.keys.map { |_, col| col }.max + @last_column ||= first_last_row_col[:last_column] end def excelx_format(key) @@ -107,6 +118,34 @@ module Roo (cell.coordinate.column - 1 - last_column).times { pad << nil } pad end + + def first_last_row_col + @first_last_row_col ||= begin + first_row = last_row = first_col = last_col = nil + + cells.each do |(row, col), cell| + next unless cell&.presence + first_row ||= row + last_row ||= row + first_col ||= col + last_col ||= col + + if row > last_row + last_row = row + elsif row < first_row + first_row = row + end + + if col > last_col + last_col = col + elsif col < first_col + first_col = col + end + end + + {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col} + end + end end end end diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb index a705958..8b0c686 100755 --- a/lib/roo/excelx/sheet_doc.rb +++ b/lib/roo/excelx/sheet_doc.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'forwardable' require 'roo/excelx/extractor' @@ -5,7 +7,7 @@ module Roo class Excelx class SheetDoc < Excelx::Extractor extend Forwardable - delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared + delegate [:workbook] => :@shared def initialize(path, relationships, shared, options = {}) super(path) @@ -19,7 +21,12 @@ module Roo end def hyperlinks(relationships) - @hyperlinks ||= extract_hyperlinks(relationships) + # If you're sure you're not going to need this hyperlinks you can discard it + @hyperlinks ||= if @options[:no_hyperlinks] + {} + else + extract_hyperlinks(relationships) + end end # Get the dimensions for the sheet. @@ -39,13 +46,10 @@ module Roo def each_cell(row_xml) return [] unless row_xml row_xml.children.each do |cell_element| - # If you're sure you're not going to need this hyperlinks you can discard it - hyperlinks = unless @options[:no_hyperlinks] - key = ::Roo::Utils.ref_to_key(cell_element['r']) - hyperlinks(@relationships)[key] - end + coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"]) + hyperlinks = hyperlinks(@relationships)[coordinate] - yield cell_from_xml(cell_element, hyperlinks) + yield cell_from_xml(cell_element, hyperlinks, coordinate) end end @@ -53,13 +57,13 @@ module Roo def cell_value_type(type, format) case type - when 's'.freeze + when 's' :shared - when 'b'.freeze + when 'b' :boolean - when 'str'.freeze + when 'str' :string - when 'inlineStr'.freeze + when 'inlineStr' :inlinestr else Excelx::Format.to_type(format) @@ -74,42 +78,58 @@ module Roo # </c> # hyperlink - a String for the hyperlink for the cell or nil when no # hyperlink is present. + # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell + # or nil to extract coordinate from cell_xml. + # empty_cell - an Optional Boolean value. # # Examples # - # cells_from_xml(<Nokogiri::XML::Element>, nil) + # cells_from_xml(<Nokogiri::XML::Element>, nil, nil) # # => <Excelx::Cell::String> # # Returns a type of <Excelx::Cell>. - def cell_from_xml(cell_xml, hyperlink) - coordinate = extract_coordinate(cell_xml['r']) - return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty? + def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true) + coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"]) + cell_xml_children = cell_xml.children + return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty? # NOTE: This is error prone, to_i will silently turn a nil into a 0. # This works by coincidence because Format[0] is General. - style = cell_xml['s'].to_i - format = styles.style_format(style) - value_type = cell_value_type(cell_xml['t'], format) + style = cell_xml["s"].to_i formula = nil - cell_xml.children.each do |cell| + cell_xml_children.each do |cell| case cell.name when 'is' - content_arr = cell.search('t').map(&:content) - unless content_arr.empty? - return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate) + content = +"" + cell.children.each do |inline_str| + if inline_str.name == 't' + content << inline_str.content + end + end + unless content.empty? + return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate) end when 'f' formula = cell.content when 'v' - return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate) + format = style_format(style) + value_type = cell_value_type(cell_xml["t"], format) + + return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate) end end - Excelx::Cell::Empty.new(coordinate) + create_empty_cell(coordinate) end - def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate) + def create_empty_cell(coordinate, empty_cell) + if empty_cell + Excelx::Cell::Empty.new(coordinate) + end + end + + def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate) # NOTE: format.to_s can replace excelx_type as an argument for # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but # it will break some brittle tests. @@ -125,11 +145,12 @@ module Roo # 3. formula case value_type when :shared - value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i] - Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate) + cell_content = cell.content.to_i + value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content] + Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate) when :boolean, :string value = cell.content - Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate) + Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate) when :time, :datetime cell_content = cell.content.to_f # NOTE: A date will be a whole number. A time will have be > 1. And @@ -148,35 +169,32 @@ module Roo else :date end - Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) + base_value = cell_type == :date ? base_date : base_timestamp + Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate) when :date - Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) + Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) else - Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate) + Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate) end end - def extract_coordinate(coordinate) - row, column = ::Roo::Utils.split_coordinate(coordinate) - - Excelx::Coordinate.new(row, column) - end - def extract_hyperlinks(relationships) return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink')) - Hash[hyperlinks.map do |hyperlink| - if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text]) - [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text] + hyperlinks.each_with_object({}) do |hyperlink, hash| + if relationship = relationships[hyperlink['id']] + target_link = relationship['Target'] + target_link += "##{hyperlink['location']}" if hyperlink['location'] + hash[::Roo::Utils.ref_to_key(hyperlink["ref"].to_s)] = target_link end - end.compact] + end end def expand_merged_ranges(cells) # Extract merged ranges from xml merges = {} doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml| - tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } + tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } for row in tl[0]..br[0] do for col in tl[1]..br[1] do next if row == tl[0] && col == tl[1] @@ -191,10 +209,14 @@ module Roo end def extract_cells(relationships) - extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml| - key = ::Roo::Utils.ref_to_key(cell_xml['r']) - [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])] - end] + extracted_cells = {} + empty_cell = @options[:empty_cell] + + doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml| + coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"]) + cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell) + extracted_cells[coordinate] = cell if cell + end expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges] @@ -203,9 +225,25 @@ module Roo def extract_dimensions Roo::Utils.each_element(@path, 'dimension') do |dimension| - return dimension.attributes['ref'].value + return dimension["ref"] end end + + def style_format(style) + @shared.styles.style_format(style) + end + + def base_date + @shared.base_date + end + + def base_timestamp + @shared.base_timestamp + end + + def shared_strings + @shared.shared_strings + end end end end diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb index 87f1713..25f0bf0 100644 --- a/lib/roo/excelx/styles.rb +++ b/lib/roo/excelx/styles.rb @@ -55,9 +55,9 @@ module Roo end def extract_num_fmts - Hash[doc.xpath('//numFmt').map do |num_fmt| - [num_fmt['numFmtId'], num_fmt['formatCode']] - end] + doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash| + hash[num_fmt['numFmtId']] = num_fmt['formatCode'] + end end end end diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb index 7ef841f..c21bb1f 100644 --- a/lib/roo/excelx/workbook.rb +++ b/lib/roo/excelx/workbook.rb @@ -29,13 +29,17 @@ module Roo # aka labels def defined_names - Hash[doc.xpath('//definedName').map do |defined_name| + doc.xpath('//definedName').each_with_object({}) do |defined_name, hash| # "Sheet1!$C$5" sheet, coordinates = defined_name.text.split('!$', 2) col, row = coordinates.split('$') name = defined_name['name'] - [name, Label.new(name, sheet, row, col)] - end] + hash[name] = Label.new(name, sheet, row, col) + end + end + + def base_timestamp + @base_timestamp ||= base_date.to_datetime.to_time.to_i end def base_date |
