From dddfa903d2b856146f05ffb4415c31d6127bb5bf Mon Sep 17 00:00:00 2001 From: Unit 193 Date: Mon, 14 Jan 2019 01:40:56 -0500 Subject: New upstream version 2.8.0 --- lib/roo.rb | 5 +- lib/roo/base.rb | 121 +++++++++++++++-------------- lib/roo/constants.rb | 8 +- lib/roo/csv.rb | 32 +++++--- lib/roo/excelx.rb | 58 ++++++++++---- lib/roo/excelx/cell.rb | 16 ++-- lib/roo/excelx/cell/base.rb | 38 +++++++--- lib/roo/excelx/cell/boolean.rb | 15 ++-- lib/roo/excelx/cell/date.rb | 14 ++-- lib/roo/excelx/cell/datetime.rb | 32 ++++---- lib/roo/excelx/cell/empty.rb | 5 +- lib/roo/excelx/cell/number.rb | 69 ++++++++--------- lib/roo/excelx/cell/string.rb | 6 +- lib/roo/excelx/cell/time.rb | 7 +- lib/roo/excelx/comments.rb | 6 +- lib/roo/excelx/coordinate.rb | 15 +++- lib/roo/excelx/extractor.rb | 24 +++++- lib/roo/excelx/format.rb | 69 +++++++++-------- lib/roo/excelx/images.rb | 26 +++++++ lib/roo/excelx/relationships.rb | 6 +- lib/roo/excelx/shared.rb | 13 +++- lib/roo/excelx/shared_strings.rb | 24 +++--- lib/roo/excelx/sheet.rb | 59 ++++++++++++--- lib/roo/excelx/sheet_doc.rb | 134 +++++++++++++++++++++------------ lib/roo/excelx/styles.rb | 6 +- lib/roo/excelx/workbook.rb | 10 ++- lib/roo/helpers/default_attr_reader.rb | 20 +++++ lib/roo/helpers/weak_instance_cache.rb | 41 ++++++++++ lib/roo/open_office.rb | 14 ++-- lib/roo/spreadsheet.rb | 2 +- lib/roo/utils.rb | 67 ++++++++++++----- lib/roo/version.rb | 2 +- 32 files changed, 632 insertions(+), 332 deletions(-) mode change 100644 => 100755 lib/roo/excelx.rb mode change 100644 => 100755 lib/roo/excelx/extractor.rb create mode 100644 lib/roo/excelx/images.rb mode change 100644 => 100755 lib/roo/excelx/shared.rb create mode 100644 lib/roo/helpers/default_attr_reader.rb create mode 100644 lib/roo/helpers/weak_instance_cache.rb (limited to 'lib') diff --git a/lib/roo.rb b/lib/roo.rb index ec8eb42..add1e4a 100644 --- a/lib/roo.rb +++ b/lib/roo.rb @@ -1,3 +1,6 @@ +# frozen_string_literal: true + +require 'roo/version' require 'roo/constants' require 'roo/errors' require 'roo/spreadsheet' @@ -9,7 +12,7 @@ module Roo autoload :Excelx, 'roo/excelx' autoload :CSV, 'roo/csv' - TEMP_PREFIX = 'roo_'.freeze + TEMP_PREFIX = 'roo_' CLASS_FOR_EXTENSION = { ods: Roo::OpenOffice, diff --git a/lib/roo/base.rb b/lib/roo/base.rb index 53e4075..19eb844 100644 --- a/lib/roo/base.rb +++ b/lib/roo/base.rb @@ -1,9 +1,7 @@ -# encoding: utf-8 - -require 'tmpdir' -require 'stringio' -require 'nokogiri' -require 'roo/utils' +require "tmpdir" +require "stringio" +require "nokogiri" +require "roo/utils" require "roo/formatters/base" require "roo/formatters/csv" require "roo/formatters/matrix" @@ -19,8 +17,8 @@ class Roo::Base include Roo::Formatters::XML include Roo::Formatters::YAML - MAX_ROW_COL = 999_999.freeze - MIN_ROW_COL = 0.freeze + MAX_ROW_COL = 999_999 + MIN_ROW_COL = 0 attr_reader :headers @@ -28,7 +26,7 @@ class Roo::Base attr_accessor :header_line def self.TEMP_PREFIX - warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX' + warn "[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX" Roo::TEMP_PREFIX end @@ -56,6 +54,11 @@ class Roo::Base if self.class.respond_to?(:finalize_tempdirs) self.class.finalize_tempdirs(object_id) end + + instance_variables.each do |instance_variable| + instance_variable_set(instance_variable, nil) + end + nil end @@ -64,10 +67,10 @@ class Roo::Base end # sets the working sheet in the document - # 'sheet' can be a number (1 = first sheet) or the name of a sheet. + # 'sheet' can be a number (0 = first sheet) or the name of a sheet. def default_sheet=(sheet) validate_sheet!(sheet) - @default_sheet = sheet + @default_sheet = sheet.is_a?(String) ? sheet : sheets[sheet] @first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil @cells_read[sheet] = false end @@ -100,7 +103,7 @@ class Roo::Base def collect_last_row_col_for_sheet(sheet) first_row = first_column = MAX_ROW_COL last_row = last_column = MIN_ROW_COL - @cell[sheet].each_pair do|key, value| + @cell[sheet].each_pair do |key, value| next unless value first_row = [first_row, key.first.to_i].min last_row = [last_row, key.first.to_i].max @@ -110,13 +113,12 @@ class Roo::Base { first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column } end - %w(first_row last_row first_column last_column).each do |key| - class_eval <<-EOS, __FILE__, __LINE__ + 1 - def #{key}(sheet = default_sheet) # def first_row(sheet = default_sheet) - read_cells(sheet) # read_cells(sheet) - @#{key}[sheet] ||= first_last_row_col_for_sheet(sheet)[:#{key}] # @first_row[sheet] ||= first_last_row_col_for_sheet(sheet)[:first_row] - end # end - EOS + %i(first_row last_row first_column last_column).each do |key| + ivar = "@#{key}".to_sym + define_method(key) do |sheet = default_sheet| + read_cells(sheet) + instance_variable_get(ivar)[sheet] ||= first_last_row_col_for_sheet(sheet)[key] + end end def inspect @@ -203,16 +205,16 @@ class Roo::Base "Number of sheets: #{sheets.size}\n"\ "Sheets: #{sheets.join(', ')}\n" n = 1 - sheets.each do|sheet| + sheets.each do |sheet| self.default_sheet = sheet - result << 'Sheet ' + n.to_s + ":\n" + result << "Sheet " + n.to_s + ":\n" if first_row result << " First row: #{first_row}\n" result << " Last row: #{last_row}\n" result << " First column: #{::Roo::Utils.number_to_letter(first_column)}\n" result << " Last column: #{::Roo::Utils.number_to_letter(last_column)}" else - result << ' - empty -' + result << " - empty -" end result << "\n" if sheet != sheets.last n += 1 @@ -286,12 +288,12 @@ class Roo::Base clean_sheet_if_need(options) search_or_set_header(options) headers = @headers || - Hash[(first_column..last_column).map do |col| - [cell(@header_line, col), col] - end] + (first_column..last_column).each_with_object({}) do |col, hash| + hash[cell(@header_line, col)] = col + end @header_line.upto(last_row) do |line| - yield(Hash[headers.map { |k, v| [k, cell(line, v)] }]) + yield(headers.each_with_object({}) { |(k, v), hash| hash[k] = cell(line, v) }) end end end @@ -306,18 +308,22 @@ class Roo::Base def row_with(query, return_headers = false) line_no = 0 + closest_mismatched_headers = [] each do |row| line_no += 1 headers = query.map { |q| row.grep(q)[0] }.compact - if headers.length == query.length @header_line = line_no return return_headers ? headers : line_no - elsif line_no > 100 - raise Roo::HeaderRowNotFoundError + else + closest_mismatched_headers = headers if headers.length > closest_mismatched_headers.length + if line_no > 100 + break + end end end - raise Roo::HeaderRowNotFoundError + missing_headers = query.select { |q| closest_mismatched_headers.grep(q).empty? } + raise Roo::HeaderRowNotFoundError, missing_headers end protected @@ -330,7 +336,7 @@ class Roo::Base filename = File.basename(filename, File.extname(filename)) end - if uri?(filename) && (qs_begin = filename.rindex('?')) + if uri?(filename) && (qs_begin = filename.rindex("?")) filename = filename[0..qs_begin - 1] end exts = Array(exts) @@ -356,7 +362,7 @@ class Roo::Base # Diese Methode ist eine temp. Loesung, um zu erforschen, ob der # Zugriff mit numerischen Keys schneller ist. def key_to_num(str) - r, c = str.split(',') + r, c = str.split(",") [r.to_i, c.to_i] end @@ -418,9 +424,9 @@ class Roo::Base def find_by_conditions(options) rows = first_row.upto(last_row) - header_for = Hash[1.upto(last_column).map do |col| - [col, cell(@header_line, col)] - end] + header_for = 1.upto(last_column).each_with_object({}) do |col, hash| + hash[col] = cell(@header_line, col) + end # are all conditions met? conditions = options[:conditions] @@ -435,9 +441,9 @@ class Roo::Base rows.map { |i| row(i) } else rows.map do |i| - Hash[1.upto(row(i).size).map do |j| - [header_for.fetch(j), cell(i, j)] - end] + 1.upto(row(i).size).each_with_object({}) do |j, hash| + hash[header_for.fetch(j)] = cell(i, j) + end end end end @@ -455,7 +461,7 @@ class Roo::Base def find_basename(filename) if uri?(filename) - require 'uri' + require "uri" uri = URI.parse filename File.basename(uri.path) elsif !is_stream?(filename) @@ -464,9 +470,9 @@ class Roo::Base end def make_tmpdir(prefix = nil, root = nil, &block) - warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead' + warn "[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead" prefix = "#{Roo::TEMP_PREFIX}#{prefix}" - root ||= ENV['ROO_TMP'] + root ||= ENV["ROO_TMP"] if block_given? # folder is deleted at end of block @@ -485,14 +491,17 @@ class Roo::Base end def sanitize_value(v) - v.gsub(/[[:cntrl:]]|^[\p{Space}]+|[\p{Space}]+$/, '') + v.gsub(/[[:cntrl:]]|^[\p{Space}]+|[\p{Space}]+$/, "") end def set_headers(hash = {}) # try to find header row with all values or give an error # then create new hash by indexing strings and keeping integers for header array - @headers = row_with(hash.values, true) - @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })] + header_row = row_with(hash.values, true) + @headers = {} + hash.each_with_index do |(key, _), index| + @headers[key] = header_index(header_row[index]) + end end def header_index(query) @@ -525,17 +534,17 @@ class Roo::Base end def uri?(filename) - filename.start_with?('http://', 'https://', 'ftp://') + filename.start_with?("http://", "https://", "ftp://") rescue false end def download_uri(uri, tmpdir) - require 'open-uri' + require "open-uri" tempfilename = File.join(tmpdir, find_basename(uri)) begin - File.open(tempfilename, 'wb') do |file| - open(uri, 'User-Agent' => "Ruby/#{RUBY_VERSION}") do |net| + File.open(tempfilename, "wb") do |file| + open(uri, "User-Agent" => "Ruby/#{RUBY_VERSION}") do |net| file.write(net.read) end end @@ -546,15 +555,15 @@ class Roo::Base end def open_from_stream(stream, tmpdir) - tempfilename = File.join(tmpdir, 'spreadsheet') - File.open(tempfilename, 'wb') do |file| + tempfilename = File.join(tmpdir, "spreadsheet") + File.open(tempfilename, "wb") do |file| file.write(stream[7..-1]) end - File.join(tmpdir, 'spreadsheet') + File.join(tmpdir, "spreadsheet") end def unzip(filename, tmpdir) - require 'zip/filesystem' + require "zip/filesystem" Zip::File.open(filename) do |zip| process_zipfile_packed(zip, tmpdir) @@ -567,7 +576,7 @@ class Roo::Base when nil fail ArgumentError, "Error: sheet 'nil' not valid" when Integer - sheets.fetch(sheet - 1) do + sheets.fetch(sheet) do fail RangeError, "sheet index #{sheet} not found" end when String @@ -579,16 +588,16 @@ class Roo::Base end end - def process_zipfile_packed(zip, tmpdir, path = '') + def process_zipfile_packed(zip, tmpdir, path = "") if zip.file.file? path # extract and return filename - File.open(File.join(tmpdir, path), 'wb') do |file| + File.open(File.join(tmpdir, path), "wb") do |file| file.write(zip.read(path)) end File.join(tmpdir, path) else ret = nil - path += '/' unless path.empty? + path += "/" unless path.empty? zip.dir.foreach(path) do |filename| ret = process_zipfile_packed(zip, tmpdir, path + filename) end diff --git a/lib/roo/constants.rb b/lib/roo/constants.rb index 90d54ee..1ae5ca7 100644 --- a/lib/roo/constants.rb +++ b/lib/roo/constants.rb @@ -1,5 +1,7 @@ +# frozen_string_literal: true + module Roo - ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze - ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze - ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze + ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel." + ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML." + ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google." end \ No newline at end of file diff --git a/lib/roo/csv.rb b/lib/roo/csv.rb index c161c64..516def6 100644 --- a/lib/roo/csv.rb +++ b/lib/roo/csv.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require "csv" require "time" @@ -63,25 +65,31 @@ module Roo def read_cells(sheet = default_sheet) sheet ||= default_sheet return if @cells_read[sheet] - set_row_count(sheet) - set_column_count(sheet) - row_num = 1 + row_num = 0 + max_col_num = 0 each_row csv_options do |row| - row.each_with_index do |elem, col_num| - coordinate = [row_num, col_num + 1] + row_num += 1 + col_num = 0 + + row.each do |elem| + col_num += 1 + coordinate = [row_num, col_num] @cell[coordinate] = elem @cell_type[coordinate] = celltype_class(elem) end - row_num += 1 + + max_col_num = col_num if col_num > max_col_num end + set_row_count(sheet, row_num) + set_column_count(sheet, max_col_num) @cells_read[sheet] = true end def each_row(options, &block) if uri?(filename) - each_row_using_temp_dir(filename) + each_row_using_tempdir(options, &block) elsif is_stream?(filename_or_stream) ::CSV.new(filename_or_stream, options).each(&block) else @@ -89,24 +97,24 @@ module Roo end end - def each_row_using_tempdir + def each_row_using_tempdir(options, &block) ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir| tmp_filename = download_uri(filename, tmpdir) ::CSV.foreach(tmp_filename, options, &block) end end - def set_row_count(sheet) + def set_row_count(sheet, last_row) @first_row[sheet] = 1 - @last_row[sheet] = ::CSV.readlines(@filename, csv_options).size + @last_row[sheet] = last_row @last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero? nil end - def set_column_count(sheet) + def set_column_count(sheet, last_col) @first_column[sheet] = 1 - @last_column[sheet] = (::CSV.readlines(@filename, csv_options).first || []).size + @last_column[sheet] = last_col @last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero? nil diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb old mode 100644 new mode 100755 index 82c1431..f9f0ee2 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -24,8 +24,9 @@ module Roo require 'roo/excelx/sheet_doc' require 'roo/excelx/coordinate' require 'roo/excelx/format' + require 'roo/excelx/images' - delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared + delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared ExceedsMaxError = Class.new(StandardError) # initialization and opening of a spreadsheet file @@ -39,7 +40,10 @@ module Roo sheet_options = {} sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false) sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false) + sheet_options[:empty_cell] = (options[:empty_cell] || false) + shared_options = {} + shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false) unless is_stream?(filename_or_stream) file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed) basename = find_basename(filename_or_stream) @@ -52,7 +56,7 @@ module Roo @tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root]) ObjectSpace.define_finalizer(self, self.class.finalize(object_id)) - @shared = Shared.new(@tmpdir) + @shared = Shared.new(@tmpdir, shared_options) @filename = local_filename(filename_or_stream, @tmpdir, packed) process_zipfile(@filename || filename_or_stream) @@ -62,10 +66,10 @@ module Roo end end.compact @sheets = [] - @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n| - @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options) - [sheet_name, @sheets[n]] - end] + @sheets_by_name = {} + @sheet_names.each_with_index do |sheet_name, n| + @sheets_by_name[sheet_name] = @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options) + end if cell_max cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions) @@ -94,7 +98,12 @@ module Roo def sheet_for(sheet) sheet ||= default_sheet validate_sheet!(sheet) - @sheets_by_name[sheet] + @sheets_by_name[sheet] || @sheets[sheet] + end + + def images(sheet = nil) + images_names = sheet_for(sheet).images.map(&:last) + images_names.map { |iname| image_files.find { |ifile| ifile[iname] } } end # Returns the content of a spreadsheet-cell. @@ -325,7 +334,7 @@ module Roo wb.extract(path) workbook_doc = Roo::Utils.load_xml(path).remove_namespaces! - workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value } + workbook_doc.xpath('//sheet').map { |s| s['id'] } end # Internal @@ -349,17 +358,13 @@ module Roo wb_rels.extract(path) rels_doc = Roo::Utils.load_xml(path).remove_namespaces! - worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet' relationships = rels_doc.xpath('//Relationship').select do |relationship| - relationship.attributes['Type'].value == worksheet_type + worksheet_types.include? relationship['Type'] end - relationships.inject({}) do |hash, relationship| - attributes = relationship.attributes - id = attributes['Id'] - hash[id.value] = attributes['Target'].value - hash + relationships.each_with_object({}) do |relationship, hash| + hash[relationship['Id']] = relationship['Target'] end end @@ -376,6 +381,15 @@ module Roo end end + def extract_images(entries, tmpdir) + img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] } + img_entries.each do |entry| + path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}" + image_files << path + entry.extract(path) + end + end + # Extracts all needed files from the zip file def process_zipfile(zipfilename_or_stream) @sheet_files = [] @@ -409,6 +423,7 @@ module Roo sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml") sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels") extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir) + extract_images(entries, @tmpdir) entries.each do |entry| path = @@ -435,6 +450,10 @@ module Roo # drawings, etc. nr = Regexp.last_match[1].to_i rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}" + when /drawing([0-9]+).xml.rels$/ + # Extracting drawing relationships to make images lists for each sheet + nr = Regexp.last_match[1].to_i + image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}" end entry.extract(path) if path @@ -442,7 +461,14 @@ module Roo end def safe_send(object, method, *args) - object.send(method, *args) if object && object.respond_to?(method) + object.send(method, *args) if object&.respond_to?(method) + end + + def worksheet_types + [ + 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional + 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict + ] end end end diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb index 2fc78e1..7abd074 100644 --- a/lib/roo/excelx/cell.rb +++ b/lib/roo/excelx/cell.rb @@ -40,19 +40,23 @@ module Roo end def self.create_cell(type, *values) + cell_class(type)&.new(*values) + end + + def self.cell_class(type) case type when :string - Cell::String.new(*values) + Cell::String when :boolean - Cell::Boolean.new(*values) + Cell::Boolean when :number - Cell::Number.new(*values) + Cell::Number when :date - Cell::Date.new(*values) + Cell::Date when :datetime - Cell::DateTime.new(*values) + Cell::DateTime when :time - Cell::Time.new(*values) + Cell::Time end end diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb index aea8808..51fc75f 100644 --- a/lib/roo/excelx/cell/base.rb +++ b/lib/roo/excelx/cell/base.rb @@ -1,13 +1,18 @@ +# frozen_string_literal: true + +require "roo/helpers/default_attr_reader" + module Roo class Excelx class Cell class Base + extend Roo::Helpers::DefaultAttrReader attr_reader :cell_type, :cell_value, :value # FIXME: I think style should be deprecated. Having a style attribute # for a cell doesn't really accomplish much. It seems to be used # when you want to export to excelx. - attr_reader :style + attr_reader_with_default default_type: :base, style: 1 # FIXME: Updating a cell's value should be able tochange the cell's type, @@ -34,14 +39,12 @@ module Roo attr_writer :value def initialize(value, formula, excelx_type, style, link, coordinate) - @link = !!link @cell_value = value - @cell_type = excelx_type - @formula = formula - @style = style + @cell_type = excelx_type if excelx_type + @formula = formula if formula + @style = style unless style == 1 @coordinate = coordinate - @type = :base - @value = link? ? Roo::Link.new(link, value) : value + @value = link ? Roo::Link.new(link, value) : value end def type @@ -50,16 +53,16 @@ module Roo elsif link? :link else - @type + default_type end end def formula? - !!@formula + !!(defined?(@formula) && @formula) end def link? - !!@link + Roo::Link === @value end alias_method :formatted_value, :value @@ -68,9 +71,16 @@ module Roo formatted_value end - # DEPRECATED: Please use link instead. + # DEPRECATED: Please use link? instead. def hyperlink - warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.' + warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link?` instead.' + link? + end + + # DEPRECATED: Please use link? instead. + def link + warn '[DEPRECATION] `link` is deprecated. Please use `link?` instead.' + link? end # DEPRECATED: Please use cell_value instead. @@ -88,6 +98,10 @@ module Roo def empty? false end + + def presence + empty? ? nil : self + end end end end diff --git a/lib/roo/excelx/cell/boolean.rb b/lib/roo/excelx/cell/boolean.rb index fe1f691..2cdfc22 100644 --- a/lib/roo/excelx/cell/boolean.rb +++ b/lib/roo/excelx/cell/boolean.rb @@ -1,17 +1,20 @@ +# frozen_string_literal: true + module Roo class Excelx class Cell class Boolean < Cell::Base - attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + attr_reader_with_default default_type: :boolean, cell_type: :boolean def initialize(value, formula, style, link, coordinate) - super(value, formula, nil, style, link, coordinate) - @type = @cell_type = :boolean - @value = link? ? Roo::Link.new(link, value) : create_boolean(value) + super(value, formula, nil, style, nil, coordinate) + @value = link ? Roo::Link.new(link, value) : create_boolean(value) end def formatted_value - value ? 'TRUE'.freeze : 'FALSE'.freeze + value ? 'TRUE' : 'FALSE' end private @@ -19,7 +22,7 @@ module Roo def create_boolean(value) # FIXME: Using a boolean will cause methods like Base#to_csv to fail. # Roo is using some method to ignore false/nil values. - value.to_i == 1 ? true : false + value.to_i == 1 end end end diff --git a/lib/roo/excelx/cell/date.rb b/lib/roo/excelx/cell/date.rb index 8e2c6cb..8627bc5 100644 --- a/lib/roo/excelx/cell/date.rb +++ b/lib/roo/excelx/cell/date.rb @@ -4,23 +4,23 @@ module Roo class Excelx class Cell class Date < Roo::Excelx::Cell::DateTime - attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate + + attr_reader_with_default default_type: :date def initialize(value, formula, excelx_type, style, link, base_date, coordinate) # NOTE: Pass all arguments to the parent class, DateTime. super - @type = :date @format = excelx_type.last - @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value) + @value = link ? Roo::Link.new(link, value) : create_date(base_date, value) end private - def create_date(base_date, value) - date = base_date + value.to_i - yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-') + def create_datetime(_,_); end - ::Date.new(yyyy.to_i, mm.to_i, dd.to_i) + def create_date(base_date, value) + base_date + value.to_i end end end diff --git a/lib/roo/excelx/cell/datetime.rb b/lib/roo/excelx/cell/datetime.rb index 35d93ac..63f3260 100644 --- a/lib/roo/excelx/cell/datetime.rb +++ b/lib/roo/excelx/cell/datetime.rb @@ -1,16 +1,21 @@ +# frozen_string_literal: true + require 'date' module Roo class Excelx class Cell class DateTime < Cell::Base - attr_reader :value, :formula, :format, :cell_value, :link, :coordinate + SECONDS_IN_DAY = 60 * 60 * 24 + + attr_reader :value, :formula, :format, :cell_value, :coordinate - def initialize(value, formula, excelx_type, style, link, base_date, coordinate) - super(value, formula, excelx_type, style, link, coordinate) - @type = :datetime + attr_reader_with_default default_type: :datetime + + def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate) + super(value, formula, excelx_type, style, nil, coordinate) @format = excelx_type.last - @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value) + @value = link ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value) end # Public: Returns formatted value for a datetime. Format's can be an @@ -78,7 +83,7 @@ module Roo TIME_FORMATS = { 'hh' => '%H', # Hour (24): 01 - 'h' => '%-k'.freeze, # Hour (24): 1 + 'h' => '%-k', # Hour (24): 1 # 'hh'.freeze => '%I'.freeze, # Hour (12): 08 # 'h'.freeze => '%-l'.freeze, # Hour (12): 8 'mm' => '%M', # Minute: 01 @@ -92,18 +97,9 @@ module Roo '0' => '%1N' # Fractional Seconds: tenths. } - def create_datetime(base_date, value) - date = base_date + value.to_f.round(6) - datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N') - t = round_datetime(datetime_string) - - ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec) - end - - def round_datetime(datetime_string) - /(?\d+)-(?\d+)-(?
\d+) (?\d+):(?\d+):(?\d+.\d+)/ =~ datetime_string - - ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0) + def create_datetime(base_timestamp, value) + timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0) + ::Time.at(timestamp).utc.to_datetime end end end diff --git a/lib/roo/excelx/cell/empty.rb b/lib/roo/excelx/cell/empty.rb index 49a20e7..f0c683c 100644 --- a/lib/roo/excelx/cell/empty.rb +++ b/lib/roo/excelx/cell/empty.rb @@ -3,10 +3,11 @@ module Roo class Excelx class Cell class Empty < Cell::Base - attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate + attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate + + attr_reader_with_default default_type: nil, style: nil def initialize(coordinate) - @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil @coordinate = coordinate end diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb index 2015562..9f23c4f 100644 --- a/lib/roo/excelx/cell/number.rb +++ b/lib/roo/excelx/cell/number.rb @@ -1,16 +1,19 @@ +# frozen_string_literal: true + module Roo class Excelx class Cell class Number < Cell::Base - attr_reader :value, :formula, :format, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + # FIXME: change default_type to number. This will break brittle tests. + attr_reader_with_default default_type: :float def initialize(value, formula, excelx_type, style, link, coordinate) super - # FIXME: change @type to number. This will break brittle tests. # FIXME: Excelx_type is an array, but the first value isn't used. - @type = :float @format = excelx_type.last - @value = link? ? Roo::Link.new(link, value) : create_numeric(value) + @value = link ? Roo::Link.new(link, value) : create_numeric(value) end def create_numeric(number) @@ -21,48 +24,50 @@ module Roo when /\.0/ Float(number) else - (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number) + (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10) end end def formatted_value return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value) - formatter = formats[@format] + formatter = generate_formatter(@format) if formatter.is_a? Proc formatter.call(@cell_value) - elsif zero_padded_number? - "%0#{@format.size}d" % @cell_value else Kernel.format(formatter, @cell_value) end end - def formats + def generate_formatter(format) # FIXME: numbers can be other colors besides red: # [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n] - { - 'General' => '%.0f', - '0' => '%.0f', - '0.00' => '%.2f', - '0.000000' => '%.6f', - '#,##0' => number_format('%.0f'), - '#,##0.00' => number_format('%.2f'), - '0%' => proc do |number| - Kernel.format('%d%', number.to_f * 100) - end, - '0.00%' => proc do |number| - Kernel.format('%.2f%', number.to_f * 100) - end, - '0.00E+00' => '%.2E', - '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'), - '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'), - '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'), - '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'), + case format + when /^General$/i then '%.0f' + when '0' then '%.0f' + when /^(0+)$/ then "%0#{$1.size}d" + when /^0\.(0+)$/ then "%.#{$1.size}f" + when '#,##0' then number_format('%.0f') + when '#,##0.00' then number_format('%.2f') + when '0%' + proc do |number| + Kernel.format('%d%%', number.to_f * 100) + end + when '0.00%' + proc do |number| + Kernel.format('%.2f%%', number.to_f * 100) + end + when '0.00E+00' then '%.2E' + when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)') + when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)') + when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)') + when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)') # FIXME: not quite sure what the format should look like in this case. - '##0.0E+0' => '%.1E', - '@' => proc { |number| number } - } + when '##0.0E+0' then '%.1E' + when '@' then proc { |number| number } + else + raise "Unknown format: #{format.inspect}" + end end private @@ -77,10 +82,6 @@ module Roo Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse end end - - def zero_padded_number? - @format[/0+/] == @format - end end end end diff --git a/lib/roo/excelx/cell/string.rb b/lib/roo/excelx/cell/string.rb index 7967806..dacf0b6 100644 --- a/lib/roo/excelx/cell/string.rb +++ b/lib/roo/excelx/cell/string.rb @@ -2,12 +2,12 @@ module Roo class Excelx class Cell class String < Cell::Base - attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + attr_reader_with_default default_type: :string, cell_type: :string def initialize(value, formula, style, link, coordinate) super(value, formula, nil, style, link, coordinate) - @type = @cell_type = :string - @value = link? ? Roo::Link.new(link, value) : value end def empty? diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb index d661ab8..a1f0864 100644 --- a/lib/roo/excelx/cell/time.rb +++ b/lib/roo/excelx/cell/time.rb @@ -4,15 +4,16 @@ module Roo class Excelx class Cell class Time < Roo::Excelx::Cell::DateTime - attr_reader :value, :formula, :format, :cell_value, :link, :coordinate + attr_reader :value, :formula, :format, :cell_value, :coordinate + + attr_reader_with_default default_type: :time def initialize(value, formula, excelx_type, style, link, base_date, coordinate) # NOTE: Pass all arguments to DateTime super class. super - @type = :time @format = excelx_type.last @datetime = create_datetime(base_date, value) - @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i + @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i end def formatted_value diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb index 1a89908..65044a9 100644 --- a/lib/roo/excelx/comments.rb +++ b/lib/roo/excelx/comments.rb @@ -12,10 +12,10 @@ module Roo def extract_comments return {} unless doc_exists? - Hash[doc.xpath('//comments/commentList/comment').map do |comment| + doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash| value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text - [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value] - end] + hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value + end end end end diff --git a/lib/roo/excelx/coordinate.rb b/lib/roo/excelx/coordinate.rb index 53b24ba..4f61b17 100644 --- a/lib/roo/excelx/coordinate.rb +++ b/lib/roo/excelx/coordinate.rb @@ -1,11 +1,18 @@ module Roo class Excelx - class Coordinate - attr_accessor :row, :column + class Coordinate < ::Array def initialize(row, column) - @row = row - @column = column + super() << row << column + freeze + end + + def row + self[0] + end + + def column + self[1] end end end diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb old mode 100644 new mode 100755 index 1cdd13b..b87a84e --- a/lib/roo/excelx/extractor.rb +++ b/lib/roo/excelx/extractor.rb @@ -1,16 +1,34 @@ +# frozen_string_literal: true + +require "roo/helpers/weak_instance_cache" + module Roo class Excelx class Extractor - def initialize(path) + include Roo::Helpers::WeakInstanceCache + + COMMON_STRINGS = { + t: "t", + r: "r", + s: "s", + ref: "ref", + html_tag_open: "", + html_tag_closed: "" + } + + def initialize(path, options = {}) @path = path + @options = options end private def doc - raise FileNotFound, "#{@path} file not found" unless doc_exists? + instance_cache(:@doc) do + raise FileNotFound, "#{@path} file not found" unless doc_exists? - ::Roo::Utils.load_xml(@path).remove_namespaces! + ::Roo::Utils.load_xml(@path).remove_namespaces! + end end def doc_exists? diff --git a/lib/roo/excelx/format.rb b/lib/roo/excelx/format.rb index 72b36d9..1c1968a 100644 --- a/lib/roo/excelx/format.rb +++ b/lib/roo/excelx/format.rb @@ -1,49 +1,57 @@ +# frozen_string_literal: true + module Roo class Excelx module Format + extend self EXCEPTIONAL_FORMATS = { 'h:mm am/pm' => :date, 'h:mm:ss am/pm' => :date } STANDARD_FORMATS = { - 0 => 'General'.freeze, - 1 => '0'.freeze, - 2 => '0.00'.freeze, - 3 => '#,##0'.freeze, - 4 => '#,##0.00'.freeze, - 9 => '0%'.freeze, - 10 => '0.00%'.freeze, - 11 => '0.00E+00'.freeze, - 12 => '# ?/?'.freeze, - 13 => '# ??/??'.freeze, - 14 => 'mm-dd-yy'.freeze, - 15 => 'd-mmm-yy'.freeze, - 16 => 'd-mmm'.freeze, - 17 => 'mmm-yy'.freeze, - 18 => 'h:mm AM/PM'.freeze, - 19 => 'h:mm:ss AM/PM'.freeze, - 20 => 'h:mm'.freeze, - 21 => 'h:mm:ss'.freeze, - 22 => 'm/d/yy h:mm'.freeze, - 37 => '#,##0 ;(#,##0)'.freeze, - 38 => '#,##0 ;[Red](#,##0)'.freeze, - 39 => '#,##0.00;(#,##0.00)'.freeze, - 40 => '#,##0.00;[Red](#,##0.00)'.freeze, - 45 => 'mm:ss'.freeze, - 46 => '[h]:mm:ss'.freeze, - 47 => 'mmss.0'.freeze, - 48 => '##0.0E+0'.freeze, - 49 => '@'.freeze + 0 => 'General', + 1 => '0', + 2 => '0.00', + 3 => '#,##0', + 4 => '#,##0.00', + 9 => '0%', + 10 => '0.00%', + 11 => '0.00E+00', + 12 => '# ?/?', + 13 => '# ??/??', + 14 => 'mm-dd-yy', + 15 => 'd-mmm-yy', + 16 => 'd-mmm', + 17 => 'mmm-yy', + 18 => 'h:mm AM/PM', + 19 => 'h:mm:ss AM/PM', + 20 => 'h:mm', + 21 => 'h:mm:ss', + 22 => 'm/d/yy h:mm', + 37 => '#,##0 ;(#,##0)', + 38 => '#,##0 ;[Red](#,##0)', + 39 => '#,##0.00;(#,##0.00)', + 40 => '#,##0.00;[Red](#,##0.00)', + 45 => 'mm:ss', + 46 => '[h]:mm:ss', + 47 => 'mmss.0', + 48 => '##0.0E+0', + 49 => '@' } def to_type(format) + @to_type ||= {} + @to_type[format] ||= _to_type(format) + end + + def _to_type(format) format = format.to_s.downcase if (type = EXCEPTIONAL_FORMATS[format]) type elsif format.include?('#') :float - elsif !format.match(/d+(?![\]])/).nil? || format.include?('y') + elsif format.include?('y') || !format.match(/d+(?![\]])/).nil? if format.include?('h') || format.include?('s') :datetime else @@ -58,7 +66,6 @@ module Roo end end - module_function :to_type end - end + end end diff --git a/lib/roo/excelx/images.rb b/lib/roo/excelx/images.rb new file mode 100644 index 0000000..cf0221f --- /dev/null +++ b/lib/roo/excelx/images.rb @@ -0,0 +1,26 @@ +require 'roo/excelx/extractor' + +module Roo + class Excelx + class Images < Excelx::Extractor + + # Returns: Hash { id1: extracted_file_name1 }, + # Example: { "rId1"=>"roo_media_image1.png", + # "rId2"=>"roo_media_image2.png", + # "rId3"=>"roo_media_image3.png" } + def list + @images ||= extract_images_names + end + + private + + def extract_images_names + return {} unless doc_exists? + + doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash| + hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_') + end + end + end + end +end diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb index 8a0ed97..19f9919 100644 --- a/lib/roo/excelx/relationships.rb +++ b/lib/roo/excelx/relationships.rb @@ -16,9 +16,9 @@ module Roo def extract_relationships return [] unless doc_exists? - Hash[doc.xpath('/Relationships/Relationship').map do |rel| - [rel.attribute('Id').text, rel] - end] + doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash| + hash[rel['Id']] = rel + end end end end diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb old mode 100644 new mode 100755 index 3677fa2..bcd2c08 --- a/lib/roo/excelx/shared.rb +++ b/lib/roo/excelx/shared.rb @@ -4,12 +4,15 @@ module Roo # reduce memory usage and reduce the number of objects being passed # to various inititializers. class Shared - attr_accessor :comments_files, :sheet_files, :rels_files - def initialize(dir) + attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files + def initialize(dir, options = {}) @dir = dir @comments_files = [] @sheet_files = [] @rels_files = [] + @options = options + @image_rels = [] + @image_files = [] end def styles @@ -17,7 +20,7 @@ module Roo end def shared_strings - @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml')) + @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options) end def workbook @@ -27,6 +30,10 @@ module Roo def base_date workbook.base_date end + + def base_timestamp + workbook.base_timestamp + end end end end diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb index f7caf7c..e70b623 100755 --- a/lib/roo/excelx/shared_strings.rb +++ b/lib/roo/excelx/shared_strings.rb @@ -1,16 +1,10 @@ +# frozen_string_literal: true + require 'roo/excelx/extractor' module Roo class Excelx class SharedStrings < Excelx::Extractor - - COMMON_STRINGS = { - t: "t", - r: "r", - html_tag_open: "", - html_tag_closed: "" - } - def [](index) to_a[index] end @@ -26,6 +20,7 @@ module Roo # Use to_html or to_a for html returns # See what is happening with commit??? def use_html?(index) + return false if @options[:disable_html_wrapper] to_html[index][/<([biu]|sup|sub)>/] end @@ -45,7 +40,7 @@ module Roo document = fix_invalid_shared_strings(doc) # read the shared strings xml document document.xpath('/sst/si').map do |si| - shared_string = '' + shared_string = +"" si.children.each do |elem| case elem.name when 'r' @@ -65,7 +60,7 @@ module Roo fix_invalid_shared_strings(doc) # read the shared strings xml document doc.xpath('/sst/si').map do |si| - html_string = '' + html_string = ''.dup si.children.each do |elem| case elem.name when 'r' @@ -95,7 +90,7 @@ module Roo # # Expected Output ::: "TEXT" def extract_html_r(r_elem) - str = '' + str = +"" xml_elems = { sub: false, sup: false, @@ -103,7 +98,6 @@ module Roo i: false, u: false } - b, i, u, sub, sup = false, false, false, false, false r_elem.children.each do |elem| case elem.name when 'rPr' @@ -141,13 +135,13 @@ module Roo # This will return an html string def create_html(text, formatting) - tmp_str = '' + tmp_str = +"" formatting.each do |elem, val| tmp_str << "<#{elem}>" if val end tmp_str << text - reverse_format = Hash[formatting.to_a.reverse] - reverse_format.each do |elem, val| + + formatting.reverse_each do |elem, val| tmp_str << "" if val end tmp_str diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb index add92f0..840a053 100644 --- a/lib/roo/excelx/sheet.rb +++ b/lib/roo/excelx/sheet.rb @@ -4,11 +4,15 @@ module Roo class Sheet extend Forwardable - delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared + delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared + + attr_reader :images def initialize(name, shared, sheet_index, options = {}) @name = name @shared = shared + @sheet_index = sheet_index + @images = Images.new(image_rels[sheet_index]).list @rels = Relationships.new(rels_files[sheet_index]) @comments = Comments.new(comments_files[sheet_index]) @sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options) @@ -19,7 +23,14 @@ module Roo end def present_cells - @present_cells ||= cells.select { |_, cell| cell && !cell.empty? } + @present_cells ||= begin + warn %{ +[DEPRECATION] present_cells is deprecated. Alternate: + with activesupport => cells[key].presence + without activesupport => cells[key]&.presence + } + cells.select { |_, cell| cell&.presence } + end end # Yield each row as array of Excelx::Cell objects @@ -39,33 +50,33 @@ module Roo def row(row_number) first_column.upto(last_column).map do |col| - cells[[row_number, col]] - end.map { |cell| cell && cell.value } + cells[[row_number, col]]&.value + end end def column(col_number) first_row.upto(last_row).map do |row| - cells[[row, col_number]] - end.map { |cell| cell && cell.value } + cells[[row, col_number]]&.value + end end # returns the number of the first non-empty row def first_row - @first_row ||= present_cells.keys.map { |row, _| row }.min + @first_row ||= first_last_row_col[:first_row] end def last_row - @last_row ||= present_cells.keys.map { |row, _| row }.max + @last_row ||= first_last_row_col[:last_row] end # returns the number of the first non-empty column def first_column - @first_column ||= present_cells.keys.map { |_, col| col }.min + @first_column ||= first_last_row_col[:first_column] end # returns the number of the last non-empty column def last_column - @last_column ||= present_cells.keys.map { |_, col| col }.max + @last_column ||= first_last_row_col[:last_column] end def excelx_format(key) @@ -107,6 +118,34 @@ module Roo (cell.coordinate.column - 1 - last_column).times { pad << nil } pad end + + def first_last_row_col + @first_last_row_col ||= begin + first_row = last_row = first_col = last_col = nil + + cells.each do |(row, col), cell| + next unless cell&.presence + first_row ||= row + last_row ||= row + first_col ||= col + last_col ||= col + + if row > last_row + last_row = row + elsif row < first_row + first_row = row + end + + if col > last_col + last_col = col + elsif col < first_col + first_col = col + end + end + + {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col} + end + end end end end diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb index a705958..8b0c686 100755 --- a/lib/roo/excelx/sheet_doc.rb +++ b/lib/roo/excelx/sheet_doc.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'forwardable' require 'roo/excelx/extractor' @@ -5,7 +7,7 @@ module Roo class Excelx class SheetDoc < Excelx::Extractor extend Forwardable - delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared + delegate [:workbook] => :@shared def initialize(path, relationships, shared, options = {}) super(path) @@ -19,7 +21,12 @@ module Roo end def hyperlinks(relationships) - @hyperlinks ||= extract_hyperlinks(relationships) + # If you're sure you're not going to need this hyperlinks you can discard it + @hyperlinks ||= if @options[:no_hyperlinks] + {} + else + extract_hyperlinks(relationships) + end end # Get the dimensions for the sheet. @@ -39,13 +46,10 @@ module Roo def each_cell(row_xml) return [] unless row_xml row_xml.children.each do |cell_element| - # If you're sure you're not going to need this hyperlinks you can discard it - hyperlinks = unless @options[:no_hyperlinks] - key = ::Roo::Utils.ref_to_key(cell_element['r']) - hyperlinks(@relationships)[key] - end + coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"]) + hyperlinks = hyperlinks(@relationships)[coordinate] - yield cell_from_xml(cell_element, hyperlinks) + yield cell_from_xml(cell_element, hyperlinks, coordinate) end end @@ -53,13 +57,13 @@ module Roo def cell_value_type(type, format) case type - when 's'.freeze + when 's' :shared - when 'b'.freeze + when 'b' :boolean - when 'str'.freeze + when 'str' :string - when 'inlineStr'.freeze + when 'inlineStr' :inlinestr else Excelx::Format.to_type(format) @@ -74,42 +78,58 @@ module Roo # # hyperlink - a String for the hyperlink for the cell or nil when no # hyperlink is present. + # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell + # or nil to extract coordinate from cell_xml. + # empty_cell - an Optional Boolean value. # # Examples # - # cells_from_xml(, nil) + # cells_from_xml(, nil, nil) # # => # # Returns a type of . - def cell_from_xml(cell_xml, hyperlink) - coordinate = extract_coordinate(cell_xml['r']) - return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty? + def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true) + coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"]) + cell_xml_children = cell_xml.children + return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty? # NOTE: This is error prone, to_i will silently turn a nil into a 0. # This works by coincidence because Format[0] is General. - style = cell_xml['s'].to_i - format = styles.style_format(style) - value_type = cell_value_type(cell_xml['t'], format) + style = cell_xml["s"].to_i formula = nil - cell_xml.children.each do |cell| + cell_xml_children.each do |cell| case cell.name when 'is' - content_arr = cell.search('t').map(&:content) - unless content_arr.empty? - return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate) + content = +"" + cell.children.each do |inline_str| + if inline_str.name == 't' + content << inline_str.content + end + end + unless content.empty? + return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate) end when 'f' formula = cell.content when 'v' - return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate) + format = style_format(style) + value_type = cell_value_type(cell_xml["t"], format) + + return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate) end end - Excelx::Cell::Empty.new(coordinate) + create_empty_cell(coordinate) end - def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate) + def create_empty_cell(coordinate, empty_cell) + if empty_cell + Excelx::Cell::Empty.new(coordinate) + end + end + + def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate) # NOTE: format.to_s can replace excelx_type as an argument for # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but # it will break some brittle tests. @@ -125,11 +145,12 @@ module Roo # 3. formula case value_type when :shared - value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i] - Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate) + cell_content = cell.content.to_i + value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content] + Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate) when :boolean, :string value = cell.content - Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate) + Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate) when :time, :datetime cell_content = cell.content.to_f # NOTE: A date will be a whole number. A time will have be > 1. And @@ -148,35 +169,32 @@ module Roo else :date end - Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) + base_value = cell_type == :date ? base_date : base_timestamp + Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate) when :date - Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) + Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) else - Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate) + Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate) end end - def extract_coordinate(coordinate) - row, column = ::Roo::Utils.split_coordinate(coordinate) - - Excelx::Coordinate.new(row, column) - end - def extract_hyperlinks(relationships) return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink')) - Hash[hyperlinks.map do |hyperlink| - if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text]) - [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text] + hyperlinks.each_with_object({}) do |hyperlink, hash| + if relationship = relationships[hyperlink['id']] + target_link = relationship['Target'] + target_link += "##{hyperlink['location']}" if hyperlink['location'] + hash[::Roo::Utils.ref_to_key(hyperlink["ref"].to_s)] = target_link end - end.compact] + end end def expand_merged_ranges(cells) # Extract merged ranges from xml merges = {} doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml| - tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } + tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } for row in tl[0]..br[0] do for col in tl[1]..br[1] do next if row == tl[0] && col == tl[1] @@ -191,10 +209,14 @@ module Roo end def extract_cells(relationships) - extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml| - key = ::Roo::Utils.ref_to_key(cell_xml['r']) - [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])] - end] + extracted_cells = {} + empty_cell = @options[:empty_cell] + + doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml| + coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"]) + cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell) + extracted_cells[coordinate] = cell if cell + end expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges] @@ -203,9 +225,25 @@ module Roo def extract_dimensions Roo::Utils.each_element(@path, 'dimension') do |dimension| - return dimension.attributes['ref'].value + return dimension["ref"] end end + + def style_format(style) + @shared.styles.style_format(style) + end + + def base_date + @shared.base_date + end + + def base_timestamp + @shared.base_timestamp + end + + def shared_strings + @shared.shared_strings + end end end end diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb index 87f1713..25f0bf0 100644 --- a/lib/roo/excelx/styles.rb +++ b/lib/roo/excelx/styles.rb @@ -55,9 +55,9 @@ module Roo end def extract_num_fmts - Hash[doc.xpath('//numFmt').map do |num_fmt| - [num_fmt['numFmtId'], num_fmt['formatCode']] - end] + doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash| + hash[num_fmt['numFmtId']] = num_fmt['formatCode'] + end end end end diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb index 7ef841f..c21bb1f 100644 --- a/lib/roo/excelx/workbook.rb +++ b/lib/roo/excelx/workbook.rb @@ -29,13 +29,17 @@ module Roo # aka labels def defined_names - Hash[doc.xpath('//definedName').map do |defined_name| + doc.xpath('//definedName').each_with_object({}) do |defined_name, hash| # "Sheet1!$C$5" sheet, coordinates = defined_name.text.split('!$', 2) col, row = coordinates.split('$') name = defined_name['name'] - [name, Label.new(name, sheet, row, col)] - end] + hash[name] = Label.new(name, sheet, row, col) + end + end + + def base_timestamp + @base_timestamp ||= base_date.to_datetime.to_time.to_i end def base_date diff --git a/lib/roo/helpers/default_attr_reader.rb b/lib/roo/helpers/default_attr_reader.rb new file mode 100644 index 0000000..a02ba84 --- /dev/null +++ b/lib/roo/helpers/default_attr_reader.rb @@ -0,0 +1,20 @@ +# frozen_string_literal: true + +module Roo + module Helpers + module DefaultAttrReader + def attr_reader_with_default(attr_hash) + attr_hash.each do |attr_name, default_value| + instance_variable = :"@#{attr_name}" + define_method attr_name do + if instance_variable_defined? instance_variable + instance_variable_get instance_variable + else + default_value + end + end + end + end + end + end +end diff --git a/lib/roo/helpers/weak_instance_cache.rb b/lib/roo/helpers/weak_instance_cache.rb new file mode 100644 index 0000000..db26de1 --- /dev/null +++ b/lib/roo/helpers/weak_instance_cache.rb @@ -0,0 +1,41 @@ +# frozen_string_literal: true + +require "weakref" + +module Roo + module Helpers + module WeakInstanceCache + private + + def instance_cache(key) + object = nil + + if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && ref.weakref_alive? + begin + object = ref.__getobj__ + rescue => e + unless (defined?(::WeakRef::RefError) && e.is_a?(::WeakRef::RefError)) || (defined?(RefError) && e.is_a?(RefError)) + raise e + end + end + end + + unless object + object = yield + ObjectSpace.define_finalizer(object, instance_cache_finalizer(key)) + instance_variable_set(key, WeakRef.new(object)) + end + + object + end + + def instance_cache_finalizer(key) + proc do |object_id| + if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && (!ref.weakref_alive? || ref.__getobj__.object_id == object_id) + remove_instance_variable(key) + end + end + end + end + end +end diff --git a/lib/roo/open_office.rb b/lib/roo/open_office.rb index 6ccbe85..f172363 100644 --- a/lib/roo/open_office.rb +++ b/lib/roo/open_office.rb @@ -1,3 +1,5 @@ +# frozen_string_literal: true + require 'date' require 'nokogiri' require 'cgi' @@ -11,9 +13,9 @@ module Roo class OpenOffice < Roo::Base extend Roo::Tempdir - ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze - XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze - XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze + ERROR_MISSING_CONTENT_XML = 'file missing required content.xml' + XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']" + XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']" # initialization and opening of a spreadsheet file # values for packed: :zip @@ -561,7 +563,7 @@ module Roo end def read_labels - @label ||= Hash[doc.xpath('//table:named-range').map do |ne| + @label ||= doc.xpath('//table:named-range').each_with_object({}) do |ne, hash| #- # $Sheet1.$C$5 #+ @@ -569,8 +571,8 @@ module Roo sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$') col, row = coords.split('$') sheetname = sheetname[1..-1] if sheetname[0, 1] == '$' - [name, [sheetname, row, col]] - end] + hash[name] = [sheetname, row, col] + end end def read_styles(style_elements) diff --git a/lib/roo/spreadsheet.rb b/lib/roo/spreadsheet.rb index 1eef58d..cdc93f0 100644 --- a/lib/roo/spreadsheet.rb +++ b/lib/roo/spreadsheet.rb @@ -24,7 +24,7 @@ module Roo options[:file_warning] = :ignore extension.tr('.', '').downcase.to_sym else - res = ::File.extname((path =~ /\A#{::URI.regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path) + res = ::File.extname((path =~ /\A#{::URI::DEFAULT_PARSER.make_regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path) res.tr('.', '').downcase.to_sym end end diff --git a/lib/roo/utils.rb b/lib/roo/utils.rb index 17b672d..dcaedde 100644 --- a/lib/roo/utils.rb +++ b/lib/roo/utils.rb @@ -4,27 +4,39 @@ module Roo LETTERS = ('A'..'Z').to_a - def split_coordinate(str) - @split_coordinate ||= {} + def extract_coordinate(s) + num = letter_num = 0 + num_only = false - @split_coordinate[str] ||= begin - letter, number = split_coord(str) - x = letter_to_number(letter) - y = number - [y, x] + s.each_byte do |b| + if !num_only && (index = char_index(b)) + letter_num *= 26 + letter_num += index + elsif index = num_index(b) + num_only = true + num *= 10 + num += index + else + fail ArgumentError + end end + fail ArgumentError if letter_num == 0 || !num_only + + Excelx::Coordinate.new(num, letter_num) end - alias_method :ref_to_key, :split_coordinate + alias_method :ref_to_key, :extract_coordinate - def split_coord(s) - if s =~ /([a-zA-Z]+)([0-9]+)/ - letter = Regexp.last_match[1] - number = Regexp.last_match[2].to_i - else - fail ArgumentError - end - [letter, number] + def split_coordinate(str) + warn "[DEPRECATION] `Roo::Utils.split_coordinate` is deprecated. Please use `Roo::Utils.extract_coordinate` instead." + extract_coordinate(str) + end + + + + def split_coord(str) + coord = extract_coordinate(str) + [number_to_letter(coord.column), coord.row] end # convert a number to something like 'AB' (1 => 'A', 2 => 'B', ...) @@ -56,8 +68,8 @@ module Roo cells = str.split(':') return 1 if cells.count == 1 raise ArgumentError.new("invalid range string: #{str}. Supported range format 'A1:B2'") if cells.count != 2 - x1, y1 = split_coordinate(cells[0]) - x2, y2 = split_coordinate(cells[1]) + x1, y1 = extract_coordinate(cells[0]) + x2, y2 = extract_coordinate(cells[1]) (x2 - (x1 - 1)) * (y2 - (y1 - 1)) end @@ -69,10 +81,27 @@ module Roo # Yield each element of a given type ('row', 'c', etc.) to caller def each_element(path, elements) + elements = Array(elements) Nokogiri::XML::Reader(::File.open(path, 'rb'), nil, nil, Nokogiri::XML::ParseOptions::NOBLANKS).each do |node| - next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT && Array(elements).include?(node.name) + next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT && elements.include?(node.name) yield Nokogiri::XML(node.outer_xml).root if block_given? end end + + private + + def char_index(byte) + if byte >= 65 && byte <= 90 + byte - 64 + elsif byte >= 97 && byte <= 122 + byte - 96 + end + end + + def num_index(byte) + if byte >= 48 && byte <= 57 + byte - 48 + end + end end end diff --git a/lib/roo/version.rb b/lib/roo/version.rb index bdd16dc..7fd96d7 100644 --- a/lib/roo/version.rb +++ b/lib/roo/version.rb @@ -1,3 +1,3 @@ module Roo - VERSION = "2.7.1" + VERSION = "2.8.0" end -- cgit v1.2.3