aboutsummaryrefslogtreecommitdiffstats
path: root/lib/roo
diff options
context:
space:
mode:
Diffstat (limited to 'lib/roo')
-rw-r--r--lib/roo/base.rb121
-rw-r--r--lib/roo/constants.rb8
-rw-r--r--lib/roo/csv.rb32
-rwxr-xr-x[-rw-r--r--]lib/roo/excelx.rb58
-rw-r--r--lib/roo/excelx/cell.rb16
-rw-r--r--lib/roo/excelx/cell/base.rb38
-rw-r--r--lib/roo/excelx/cell/boolean.rb15
-rw-r--r--lib/roo/excelx/cell/date.rb14
-rw-r--r--lib/roo/excelx/cell/datetime.rb32
-rw-r--r--lib/roo/excelx/cell/empty.rb5
-rw-r--r--lib/roo/excelx/cell/number.rb69
-rw-r--r--lib/roo/excelx/cell/string.rb6
-rw-r--r--lib/roo/excelx/cell/time.rb7
-rw-r--r--lib/roo/excelx/comments.rb6
-rw-r--r--lib/roo/excelx/coordinate.rb15
-rwxr-xr-x[-rw-r--r--]lib/roo/excelx/extractor.rb24
-rw-r--r--lib/roo/excelx/format.rb69
-rw-r--r--lib/roo/excelx/images.rb26
-rw-r--r--lib/roo/excelx/relationships.rb6
-rwxr-xr-x[-rw-r--r--]lib/roo/excelx/shared.rb13
-rwxr-xr-xlib/roo/excelx/shared_strings.rb24
-rw-r--r--lib/roo/excelx/sheet.rb59
-rwxr-xr-xlib/roo/excelx/sheet_doc.rb134
-rw-r--r--lib/roo/excelx/styles.rb6
-rw-r--r--lib/roo/excelx/workbook.rb10
-rw-r--r--lib/roo/helpers/default_attr_reader.rb20
-rw-r--r--lib/roo/helpers/weak_instance_cache.rb41
-rw-r--r--lib/roo/open_office.rb14
-rw-r--r--lib/roo/spreadsheet.rb2
-rw-r--r--lib/roo/utils.rb67
-rw-r--r--lib/roo/version.rb2
31 files changed, 628 insertions, 331 deletions
diff --git a/lib/roo/base.rb b/lib/roo/base.rb
index 53e4075..19eb844 100644
--- a/lib/roo/base.rb
+++ b/lib/roo/base.rb
@@ -1,9 +1,7 @@
-# encoding: utf-8
-
-require 'tmpdir'
-require 'stringio'
-require 'nokogiri'
-require 'roo/utils'
+require "tmpdir"
+require "stringio"
+require "nokogiri"
+require "roo/utils"
require "roo/formatters/base"
require "roo/formatters/csv"
require "roo/formatters/matrix"
@@ -19,8 +17,8 @@ class Roo::Base
include Roo::Formatters::XML
include Roo::Formatters::YAML
- MAX_ROW_COL = 999_999.freeze
- MIN_ROW_COL = 0.freeze
+ MAX_ROW_COL = 999_999
+ MIN_ROW_COL = 0
attr_reader :headers
@@ -28,7 +26,7 @@ class Roo::Base
attr_accessor :header_line
def self.TEMP_PREFIX
- warn '[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX'
+ warn "[DEPRECATION] please access TEMP_PREFIX via Roo::TEMP_PREFIX"
Roo::TEMP_PREFIX
end
@@ -56,6 +54,11 @@ class Roo::Base
if self.class.respond_to?(:finalize_tempdirs)
self.class.finalize_tempdirs(object_id)
end
+
+ instance_variables.each do |instance_variable|
+ instance_variable_set(instance_variable, nil)
+ end
+
nil
end
@@ -64,10 +67,10 @@ class Roo::Base
end
# sets the working sheet in the document
- # 'sheet' can be a number (1 = first sheet) or the name of a sheet.
+ # 'sheet' can be a number (0 = first sheet) or the name of a sheet.
def default_sheet=(sheet)
validate_sheet!(sheet)
- @default_sheet = sheet
+ @default_sheet = sheet.is_a?(String) ? sheet : sheets[sheet]
@first_row[sheet] = @last_row[sheet] = @first_column[sheet] = @last_column[sheet] = nil
@cells_read[sheet] = false
end
@@ -100,7 +103,7 @@ class Roo::Base
def collect_last_row_col_for_sheet(sheet)
first_row = first_column = MAX_ROW_COL
last_row = last_column = MIN_ROW_COL
- @cell[sheet].each_pair do|key, value|
+ @cell[sheet].each_pair do |key, value|
next unless value
first_row = [first_row, key.first.to_i].min
last_row = [last_row, key.first.to_i].max
@@ -110,13 +113,12 @@ class Roo::Base
{ first_row: first_row, first_column: first_column, last_row: last_row, last_column: last_column }
end
- %w(first_row last_row first_column last_column).each do |key|
- class_eval <<-EOS, __FILE__, __LINE__ + 1
- def #{key}(sheet = default_sheet) # def first_row(sheet = default_sheet)
- read_cells(sheet) # read_cells(sheet)
- @#{key}[sheet] ||= first_last_row_col_for_sheet(sheet)[:#{key}] # @first_row[sheet] ||= first_last_row_col_for_sheet(sheet)[:first_row]
- end # end
- EOS
+ %i(first_row last_row first_column last_column).each do |key|
+ ivar = "@#{key}".to_sym
+ define_method(key) do |sheet = default_sheet|
+ read_cells(sheet)
+ instance_variable_get(ivar)[sheet] ||= first_last_row_col_for_sheet(sheet)[key]
+ end
end
def inspect
@@ -203,16 +205,16 @@ class Roo::Base
"Number of sheets: #{sheets.size}\n"\
"Sheets: #{sheets.join(', ')}\n"
n = 1
- sheets.each do|sheet|
+ sheets.each do |sheet|
self.default_sheet = sheet
- result << 'Sheet ' + n.to_s + ":\n"
+ result << "Sheet " + n.to_s + ":\n"
if first_row
result << " First row: #{first_row}\n"
result << " Last row: #{last_row}\n"
result << " First column: #{::Roo::Utils.number_to_letter(first_column)}\n"
result << " Last column: #{::Roo::Utils.number_to_letter(last_column)}"
else
- result << ' - empty -'
+ result << " - empty -"
end
result << "\n" if sheet != sheets.last
n += 1
@@ -286,12 +288,12 @@ class Roo::Base
clean_sheet_if_need(options)
search_or_set_header(options)
headers = @headers ||
- Hash[(first_column..last_column).map do |col|
- [cell(@header_line, col), col]
- end]
+ (first_column..last_column).each_with_object({}) do |col, hash|
+ hash[cell(@header_line, col)] = col
+ end
@header_line.upto(last_row) do |line|
- yield(Hash[headers.map { |k, v| [k, cell(line, v)] }])
+ yield(headers.each_with_object({}) { |(k, v), hash| hash[k] = cell(line, v) })
end
end
end
@@ -306,18 +308,22 @@ class Roo::Base
def row_with(query, return_headers = false)
line_no = 0
+ closest_mismatched_headers = []
each do |row|
line_no += 1
headers = query.map { |q| row.grep(q)[0] }.compact
-
if headers.length == query.length
@header_line = line_no
return return_headers ? headers : line_no
- elsif line_no > 100
- raise Roo::HeaderRowNotFoundError
+ else
+ closest_mismatched_headers = headers if headers.length > closest_mismatched_headers.length
+ if line_no > 100
+ break
+ end
end
end
- raise Roo::HeaderRowNotFoundError
+ missing_headers = query.select { |q| closest_mismatched_headers.grep(q).empty? }
+ raise Roo::HeaderRowNotFoundError, missing_headers
end
protected
@@ -330,7 +336,7 @@ class Roo::Base
filename = File.basename(filename, File.extname(filename))
end
- if uri?(filename) && (qs_begin = filename.rindex('?'))
+ if uri?(filename) && (qs_begin = filename.rindex("?"))
filename = filename[0..qs_begin - 1]
end
exts = Array(exts)
@@ -356,7 +362,7 @@ class Roo::Base
# Diese Methode ist eine temp. Loesung, um zu erforschen, ob der
# Zugriff mit numerischen Keys schneller ist.
def key_to_num(str)
- r, c = str.split(',')
+ r, c = str.split(",")
[r.to_i, c.to_i]
end
@@ -418,9 +424,9 @@ class Roo::Base
def find_by_conditions(options)
rows = first_row.upto(last_row)
- header_for = Hash[1.upto(last_column).map do |col|
- [col, cell(@header_line, col)]
- end]
+ header_for = 1.upto(last_column).each_with_object({}) do |col, hash|
+ hash[col] = cell(@header_line, col)
+ end
# are all conditions met?
conditions = options[:conditions]
@@ -435,9 +441,9 @@ class Roo::Base
rows.map { |i| row(i) }
else
rows.map do |i|
- Hash[1.upto(row(i).size).map do |j|
- [header_for.fetch(j), cell(i, j)]
- end]
+ 1.upto(row(i).size).each_with_object({}) do |j, hash|
+ hash[header_for.fetch(j)] = cell(i, j)
+ end
end
end
end
@@ -455,7 +461,7 @@ class Roo::Base
def find_basename(filename)
if uri?(filename)
- require 'uri'
+ require "uri"
uri = URI.parse filename
File.basename(uri.path)
elsif !is_stream?(filename)
@@ -464,9 +470,9 @@ class Roo::Base
end
def make_tmpdir(prefix = nil, root = nil, &block)
- warn '[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead'
+ warn "[DEPRECATION] extend Roo::Tempdir and use its .make_tempdir instead"
prefix = "#{Roo::TEMP_PREFIX}#{prefix}"
- root ||= ENV['ROO_TMP']
+ root ||= ENV["ROO_TMP"]
if block_given?
# folder is deleted at end of block
@@ -485,14 +491,17 @@ class Roo::Base
end
def sanitize_value(v)
- v.gsub(/[[:cntrl:]]|^[\p{Space}]+|[\p{Space}]+$/, '')
+ v.gsub(/[[:cntrl:]]|^[\p{Space}]+|[\p{Space}]+$/, "")
end
def set_headers(hash = {})
# try to find header row with all values or give an error
# then create new hash by indexing strings and keeping integers for header array
- @headers = row_with(hash.values, true)
- @headers = Hash[hash.keys.zip(@headers.map { |x| header_index(x) })]
+ header_row = row_with(hash.values, true)
+ @headers = {}
+ hash.each_with_index do |(key, _), index|
+ @headers[key] = header_index(header_row[index])
+ end
end
def header_index(query)
@@ -525,17 +534,17 @@ class Roo::Base
end
def uri?(filename)
- filename.start_with?('http://', 'https://', 'ftp://')
+ filename.start_with?("http://", "https://", "ftp://")
rescue
false
end
def download_uri(uri, tmpdir)
- require 'open-uri'
+ require "open-uri"
tempfilename = File.join(tmpdir, find_basename(uri))
begin
- File.open(tempfilename, 'wb') do |file|
- open(uri, 'User-Agent' => "Ruby/#{RUBY_VERSION}") do |net|
+ File.open(tempfilename, "wb") do |file|
+ open(uri, "User-Agent" => "Ruby/#{RUBY_VERSION}") do |net|
file.write(net.read)
end
end
@@ -546,15 +555,15 @@ class Roo::Base
end
def open_from_stream(stream, tmpdir)
- tempfilename = File.join(tmpdir, 'spreadsheet')
- File.open(tempfilename, 'wb') do |file|
+ tempfilename = File.join(tmpdir, "spreadsheet")
+ File.open(tempfilename, "wb") do |file|
file.write(stream[7..-1])
end
- File.join(tmpdir, 'spreadsheet')
+ File.join(tmpdir, "spreadsheet")
end
def unzip(filename, tmpdir)
- require 'zip/filesystem'
+ require "zip/filesystem"
Zip::File.open(filename) do |zip|
process_zipfile_packed(zip, tmpdir)
@@ -567,7 +576,7 @@ class Roo::Base
when nil
fail ArgumentError, "Error: sheet 'nil' not valid"
when Integer
- sheets.fetch(sheet - 1) do
+ sheets.fetch(sheet) do
fail RangeError, "sheet index #{sheet} not found"
end
when String
@@ -579,16 +588,16 @@ class Roo::Base
end
end
- def process_zipfile_packed(zip, tmpdir, path = '')
+ def process_zipfile_packed(zip, tmpdir, path = "")
if zip.file.file? path
# extract and return filename
- File.open(File.join(tmpdir, path), 'wb') do |file|
+ File.open(File.join(tmpdir, path), "wb") do |file|
file.write(zip.read(path))
end
File.join(tmpdir, path)
else
ret = nil
- path += '/' unless path.empty?
+ path += "/" unless path.empty?
zip.dir.foreach(path) do |filename|
ret = process_zipfile_packed(zip, tmpdir, path + filename)
end
diff --git a/lib/roo/constants.rb b/lib/roo/constants.rb
index 90d54ee..1ae5ca7 100644
--- a/lib/roo/constants.rb
+++ b/lib/roo/constants.rb
@@ -1,5 +1,7 @@
+# frozen_string_literal: true
+
module Roo
- ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel.".freeze
- ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML.".freeze
- ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google.".freeze
+ ROO_EXCEL_NOTICE = "Excel support has been extracted to roo-xls due to its dependency on the GPL'd spreadsheet gem. Install roo-xls to use Roo::Excel."
+ ROO_EXCELML_NOTICE = "Excel SpreadsheetML support has been extracted to roo-xls. Install roo-xls to use Roo::Excel2003XML."
+ ROO_GOOGLE_NOTICE = "Google support has been extracted to roo-google. Install roo-google to use Roo::Google."
end \ No newline at end of file
diff --git a/lib/roo/csv.rb b/lib/roo/csv.rb
index c161c64..516def6 100644
--- a/lib/roo/csv.rb
+++ b/lib/roo/csv.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
require "csv"
require "time"
@@ -63,25 +65,31 @@ module Roo
def read_cells(sheet = default_sheet)
sheet ||= default_sheet
return if @cells_read[sheet]
- set_row_count(sheet)
- set_column_count(sheet)
- row_num = 1
+ row_num = 0
+ max_col_num = 0
each_row csv_options do |row|
- row.each_with_index do |elem, col_num|
- coordinate = [row_num, col_num + 1]
+ row_num += 1
+ col_num = 0
+
+ row.each do |elem|
+ col_num += 1
+ coordinate = [row_num, col_num]
@cell[coordinate] = elem
@cell_type[coordinate] = celltype_class(elem)
end
- row_num += 1
+
+ max_col_num = col_num if col_num > max_col_num
end
+ set_row_count(sheet, row_num)
+ set_column_count(sheet, max_col_num)
@cells_read[sheet] = true
end
def each_row(options, &block)
if uri?(filename)
- each_row_using_temp_dir(filename)
+ each_row_using_tempdir(options, &block)
elsif is_stream?(filename_or_stream)
::CSV.new(filename_or_stream, options).each(&block)
else
@@ -89,24 +97,24 @@ module Roo
end
end
- def each_row_using_tempdir
+ def each_row_using_tempdir(options, &block)
::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir|
tmp_filename = download_uri(filename, tmpdir)
::CSV.foreach(tmp_filename, options, &block)
end
end
- def set_row_count(sheet)
+ def set_row_count(sheet, last_row)
@first_row[sheet] = 1
- @last_row[sheet] = ::CSV.readlines(@filename, csv_options).size
+ @last_row[sheet] = last_row
@last_row[sheet] = @first_row[sheet] if @last_row[sheet].zero?
nil
end
- def set_column_count(sheet)
+ def set_column_count(sheet, last_col)
@first_column[sheet] = 1
- @last_column[sheet] = (::CSV.readlines(@filename, csv_options).first || []).size
+ @last_column[sheet] = last_col
@last_column[sheet] = @first_column[sheet] if @last_column[sheet].zero?
nil
diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb
index 82c1431..f9f0ee2 100644..100755
--- a/lib/roo/excelx.rb
+++ b/lib/roo/excelx.rb
@@ -24,8 +24,9 @@ module Roo
require 'roo/excelx/sheet_doc'
require 'roo/excelx/coordinate'
require 'roo/excelx/format'
+ require 'roo/excelx/images'
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels, :image_files] => :@shared
ExceedsMaxError = Class.new(StandardError)
# initialization and opening of a spreadsheet file
@@ -39,7 +40,10 @@ module Roo
sheet_options = {}
sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
sheet_options[:no_hyperlinks] = (options[:no_hyperlinks] || false)
+ sheet_options[:empty_cell] = (options[:empty_cell] || false)
+ shared_options = {}
+ shared_options[:disable_html_wrapper] = (options[:disable_html_wrapper] || false)
unless is_stream?(filename_or_stream)
file_type_check(filename_or_stream, %w[.xlsx .xlsm], 'an Excel 2007', file_warning, packed)
basename = find_basename(filename_or_stream)
@@ -52,7 +56,7 @@ module Roo
@tmpdir = self.class.make_tempdir(self, basename, options[:tmpdir_root])
ObjectSpace.define_finalizer(self, self.class.finalize(object_id))
- @shared = Shared.new(@tmpdir)
+ @shared = Shared.new(@tmpdir, shared_options)
@filename = local_filename(filename_or_stream, @tmpdir, packed)
process_zipfile(@filename || filename_or_stream)
@@ -62,10 +66,10 @@ module Roo
end
end.compact
@sheets = []
- @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
- @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
- [sheet_name, @sheets[n]]
- end]
+ @sheets_by_name = {}
+ @sheet_names.each_with_index do |sheet_name, n|
+ @sheets_by_name[sheet_name] = @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options)
+ end
if cell_max
cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
@@ -94,7 +98,12 @@ module Roo
def sheet_for(sheet)
sheet ||= default_sheet
validate_sheet!(sheet)
- @sheets_by_name[sheet]
+ @sheets_by_name[sheet] || @sheets[sheet]
+ end
+
+ def images(sheet = nil)
+ images_names = sheet_for(sheet).images.map(&:last)
+ images_names.map { |iname| image_files.find { |ifile| ifile[iname] } }
end
# Returns the content of a spreadsheet-cell.
@@ -325,7 +334,7 @@ module Roo
wb.extract(path)
workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
- workbook_doc.xpath('//sheet').map { |s| s.attributes['id'].value }
+ workbook_doc.xpath('//sheet').map { |s| s['id'] }
end
# Internal
@@ -349,17 +358,13 @@ module Roo
wb_rels.extract(path)
rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
- worksheet_type = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
relationships = rels_doc.xpath('//Relationship').select do |relationship|
- relationship.attributes['Type'].value == worksheet_type
+ worksheet_types.include? relationship['Type']
end
- relationships.inject({}) do |hash, relationship|
- attributes = relationship.attributes
- id = attributes['Id']
- hash[id.value] = attributes['Target'].value
- hash
+ relationships.each_with_object({}) do |relationship, hash|
+ hash[relationship['Id']] = relationship['Target']
end
end
@@ -376,6 +381,15 @@ module Roo
end
end
+ def extract_images(entries, tmpdir)
+ img_entries = entries.select { |e| e.name[/media\/image([0-9]+)/] }
+ img_entries.each do |entry|
+ path = "#{@tmpdir}/roo#{entry.name.gsub(/xl\/|\//, "_")}"
+ image_files << path
+ entry.extract(path)
+ end
+ end
+
# Extracts all needed files from the zip file
def process_zipfile(zipfilename_or_stream)
@sheet_files = []
@@ -409,6 +423,7 @@ module Roo
sheet_ids = extract_worksheet_ids(entries, "#{@tmpdir}/roo_workbook.xml")
sheets = extract_worksheet_rels(entries, "#{@tmpdir}/roo_workbook.xml.rels")
extract_sheets_in_order(entries, sheet_ids, sheets, @tmpdir)
+ extract_images(entries, @tmpdir)
entries.each do |entry|
path =
@@ -435,6 +450,10 @@ module Roo
# drawings, etc.
nr = Regexp.last_match[1].to_i
rels_files[nr - 1] = "#{@tmpdir}/roo_rels#{nr}"
+ when /drawing([0-9]+).xml.rels$/
+ # Extracting drawing relationships to make images lists for each sheet
+ nr = Regexp.last_match[1].to_i
+ image_rels[nr - 1] = "#{@tmpdir}/roo_image_rels#{nr}"
end
entry.extract(path) if path
@@ -442,7 +461,14 @@ module Roo
end
def safe_send(object, method, *args)
- object.send(method, *args) if object && object.respond_to?(method)
+ object.send(method, *args) if object&.respond_to?(method)
+ end
+
+ def worksheet_types
+ [
+ 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet', # OOXML Transitional
+ 'http://purl.oclc.org/ooxml/officeDocument/relationships/worksheet' # OOXML Strict
+ ]
end
end
end
diff --git a/lib/roo/excelx/cell.rb b/lib/roo/excelx/cell.rb
index 2fc78e1..7abd074 100644
--- a/lib/roo/excelx/cell.rb
+++ b/lib/roo/excelx/cell.rb
@@ -40,19 +40,23 @@ module Roo
end
def self.create_cell(type, *values)
+ cell_class(type)&.new(*values)
+ end
+
+ def self.cell_class(type)
case type
when :string
- Cell::String.new(*values)
+ Cell::String
when :boolean
- Cell::Boolean.new(*values)
+ Cell::Boolean
when :number
- Cell::Number.new(*values)
+ Cell::Number
when :date
- Cell::Date.new(*values)
+ Cell::Date
when :datetime
- Cell::DateTime.new(*values)
+ Cell::DateTime
when :time
- Cell::Time.new(*values)
+ Cell::Time
end
end
diff --git a/lib/roo/excelx/cell/base.rb b/lib/roo/excelx/cell/base.rb
index aea8808..51fc75f 100644
--- a/lib/roo/excelx/cell/base.rb
+++ b/lib/roo/excelx/cell/base.rb
@@ -1,13 +1,18 @@
+# frozen_string_literal: true
+
+require "roo/helpers/default_attr_reader"
+
module Roo
class Excelx
class Cell
class Base
+ extend Roo::Helpers::DefaultAttrReader
attr_reader :cell_type, :cell_value, :value
# FIXME: I think style should be deprecated. Having a style attribute
# for a cell doesn't really accomplish much. It seems to be used
# when you want to export to excelx.
- attr_reader :style
+ attr_reader_with_default default_type: :base, style: 1
# FIXME: Updating a cell's value should be able tochange the cell's type,
@@ -34,14 +39,12 @@ module Roo
attr_writer :value
def initialize(value, formula, excelx_type, style, link, coordinate)
- @link = !!link
@cell_value = value
- @cell_type = excelx_type
- @formula = formula
- @style = style
+ @cell_type = excelx_type if excelx_type
+ @formula = formula if formula
+ @style = style unless style == 1
@coordinate = coordinate
- @type = :base
- @value = link? ? Roo::Link.new(link, value) : value
+ @value = link ? Roo::Link.new(link, value) : value
end
def type
@@ -50,16 +53,16 @@ module Roo
elsif link?
:link
else
- @type
+ default_type
end
end
def formula?
- !!@formula
+ !!(defined?(@formula) && @formula)
end
def link?
- !!@link
+ Roo::Link === @value
end
alias_method :formatted_value, :value
@@ -68,9 +71,16 @@ module Roo
formatted_value
end
- # DEPRECATED: Please use link instead.
+ # DEPRECATED: Please use link? instead.
def hyperlink
- warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link` instead.'
+ warn '[DEPRECATION] `hyperlink` is deprecated. Please use `link?` instead.'
+ link?
+ end
+
+ # DEPRECATED: Please use link? instead.
+ def link
+ warn '[DEPRECATION] `link` is deprecated. Please use `link?` instead.'
+ link?
end
# DEPRECATED: Please use cell_value instead.
@@ -88,6 +98,10 @@ module Roo
def empty?
false
end
+
+ def presence
+ empty? ? nil : self
+ end
end
end
end
diff --git a/lib/roo/excelx/cell/boolean.rb b/lib/roo/excelx/cell/boolean.rb
index fe1f691..2cdfc22 100644
--- a/lib/roo/excelx/cell/boolean.rb
+++ b/lib/roo/excelx/cell/boolean.rb
@@ -1,17 +1,20 @@
+# frozen_string_literal: true
+
module Roo
class Excelx
class Cell
class Boolean < Cell::Base
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :boolean, cell_type: :boolean
def initialize(value, formula, style, link, coordinate)
- super(value, formula, nil, style, link, coordinate)
- @type = @cell_type = :boolean
- @value = link? ? Roo::Link.new(link, value) : create_boolean(value)
+ super(value, formula, nil, style, nil, coordinate)
+ @value = link ? Roo::Link.new(link, value) : create_boolean(value)
end
def formatted_value
- value ? 'TRUE'.freeze : 'FALSE'.freeze
+ value ? 'TRUE' : 'FALSE'
end
private
@@ -19,7 +22,7 @@ module Roo
def create_boolean(value)
# FIXME: Using a boolean will cause methods like Base#to_csv to fail.
# Roo is using some method to ignore false/nil values.
- value.to_i == 1 ? true : false
+ value.to_i == 1
end
end
end
diff --git a/lib/roo/excelx/cell/date.rb b/lib/roo/excelx/cell/date.rb
index 8e2c6cb..8627bc5 100644
--- a/lib/roo/excelx/cell/date.rb
+++ b/lib/roo/excelx/cell/date.rb
@@ -4,23 +4,23 @@ module Roo
class Excelx
class Cell
class Date < Roo::Excelx::Cell::DateTime
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :date
def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
# NOTE: Pass all arguments to the parent class, DateTime.
super
- @type = :date
@format = excelx_type.last
- @value = link? ? Roo::Link.new(link, value) : create_date(base_date, value)
+ @value = link ? Roo::Link.new(link, value) : create_date(base_date, value)
end
private
- def create_date(base_date, value)
- date = base_date + value.to_i
- yyyy, mm, dd = date.strftime('%Y-%m-%d').split('-')
+ def create_datetime(_,_); end
- ::Date.new(yyyy.to_i, mm.to_i, dd.to_i)
+ def create_date(base_date, value)
+ base_date + value.to_i
end
end
end
diff --git a/lib/roo/excelx/cell/datetime.rb b/lib/roo/excelx/cell/datetime.rb
index 35d93ac..63f3260 100644
--- a/lib/roo/excelx/cell/datetime.rb
+++ b/lib/roo/excelx/cell/datetime.rb
@@ -1,16 +1,21 @@
+# frozen_string_literal: true
+
require 'date'
module Roo
class Excelx
class Cell
class DateTime < Cell::Base
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+ SECONDS_IN_DAY = 60 * 60 * 24
+
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
- def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
- super(value, formula, excelx_type, style, link, coordinate)
- @type = :datetime
+ attr_reader_with_default default_type: :datetime
+
+ def initialize(value, formula, excelx_type, style, link, base_timestamp, coordinate)
+ super(value, formula, excelx_type, style, nil, coordinate)
@format = excelx_type.last
- @value = link? ? Roo::Link.new(link, value) : create_datetime(base_date, value)
+ @value = link ? Roo::Link.new(link, value) : create_datetime(base_timestamp, value)
end
# Public: Returns formatted value for a datetime. Format's can be an
@@ -78,7 +83,7 @@ module Roo
TIME_FORMATS = {
'hh' => '%H', # Hour (24): 01
- 'h' => '%-k'.freeze, # Hour (24): 1
+ 'h' => '%-k', # Hour (24): 1
# 'hh'.freeze => '%I'.freeze, # Hour (12): 08
# 'h'.freeze => '%-l'.freeze, # Hour (12): 8
'mm' => '%M', # Minute: 01
@@ -92,18 +97,9 @@ module Roo
'0' => '%1N' # Fractional Seconds: tenths.
}
- def create_datetime(base_date, value)
- date = base_date + value.to_f.round(6)
- datetime_string = date.strftime('%Y-%m-%d %H:%M:%S.%N')
- t = round_datetime(datetime_string)
-
- ::DateTime.civil(t.year, t.month, t.day, t.hour, t.min, t.sec)
- end
-
- def round_datetime(datetime_string)
- /(?<yyyy>\d+)-(?<mm>\d+)-(?<dd>\d+) (?<hh>\d+):(?<mi>\d+):(?<ss>\d+.\d+)/ =~ datetime_string
-
- ::Time.new(yyyy, mm, dd, hh, mi, ss.to_r).round(0)
+ def create_datetime(base_timestamp, value)
+ timestamp = (base_timestamp + (value.to_f.round(6) * SECONDS_IN_DAY)).round(0)
+ ::Time.at(timestamp).utc.to_datetime
end
end
end
diff --git a/lib/roo/excelx/cell/empty.rb b/lib/roo/excelx/cell/empty.rb
index 49a20e7..f0c683c 100644
--- a/lib/roo/excelx/cell/empty.rb
+++ b/lib/roo/excelx/cell/empty.rb
@@ -3,10 +3,11 @@ module Roo
class Excelx
class Cell
class Empty < Cell::Base
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :hyperlink, :coordinate
+ attr_reader :value, :formula, :format, :cell_type, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: nil, style: nil
def initialize(coordinate)
- @value = @formula = @format = @cell_type = @cell_value = @hyperlink = nil
@coordinate = coordinate
end
diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb
index 2015562..9f23c4f 100644
--- a/lib/roo/excelx/cell/number.rb
+++ b/lib/roo/excelx/cell/number.rb
@@ -1,16 +1,19 @@
+# frozen_string_literal: true
+
module Roo
class Excelx
class Cell
class Number < Cell::Base
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ # FIXME: change default_type to number. This will break brittle tests.
+ attr_reader_with_default default_type: :float
def initialize(value, formula, excelx_type, style, link, coordinate)
super
- # FIXME: change @type to number. This will break brittle tests.
# FIXME: Excelx_type is an array, but the first value isn't used.
- @type = :float
@format = excelx_type.last
- @value = link? ? Roo::Link.new(link, value) : create_numeric(value)
+ @value = link ? Roo::Link.new(link, value) : create_numeric(value)
end
def create_numeric(number)
@@ -21,48 +24,50 @@ module Roo
when /\.0/
Float(number)
else
- (number.include?('.') || (/\A[-+]?\d+E[-+]\d+\z/i =~ number)) ? Float(number) : Integer(number)
+ (number.include?('.') || (/\A[-+]?\d+E[-+]?\d+\z/i =~ number)) ? Float(number) : Integer(number, 10)
end
end
def formatted_value
return @cell_value if Excelx::ERROR_VALUES.include?(@cell_value)
- formatter = formats[@format]
+ formatter = generate_formatter(@format)
if formatter.is_a? Proc
formatter.call(@cell_value)
- elsif zero_padded_number?
- "%0#{@format.size}d" % @cell_value
else
Kernel.format(formatter, @cell_value)
end
end
- def formats
+ def generate_formatter(format)
# FIXME: numbers can be other colors besides red:
# [BLACK], [BLUE], [CYAN], [GREEN], [MAGENTA], [RED], [WHITE], [YELLOW], [COLOR n]
- {
- 'General' => '%.0f',
- '0' => '%.0f',
- '0.00' => '%.2f',
- '0.000000' => '%.6f',
- '#,##0' => number_format('%.0f'),
- '#,##0.00' => number_format('%.2f'),
- '0%' => proc do |number|
- Kernel.format('%d%', number.to_f * 100)
- end,
- '0.00%' => proc do |number|
- Kernel.format('%.2f%', number.to_f * 100)
- end,
- '0.00E+00' => '%.2E',
- '#,##0 ;(#,##0)' => number_format('%.0f', '(%.0f)'),
- '#,##0 ;[Red](#,##0)' => number_format('%.0f', '[Red](%.0f)'),
- '#,##0.00;(#,##0.00)' => number_format('%.2f', '(%.2f)'),
- '#,##0.00;[Red](#,##0.00)' => number_format('%.2f', '[Red](%.2f)'),
+ case format
+ when /^General$/i then '%.0f'
+ when '0' then '%.0f'
+ when /^(0+)$/ then "%0#{$1.size}d"
+ when /^0\.(0+)$/ then "%.#{$1.size}f"
+ when '#,##0' then number_format('%.0f')
+ when '#,##0.00' then number_format('%.2f')
+ when '0%'
+ proc do |number|
+ Kernel.format('%d%%', number.to_f * 100)
+ end
+ when '0.00%'
+ proc do |number|
+ Kernel.format('%.2f%%', number.to_f * 100)
+ end
+ when '0.00E+00' then '%.2E'
+ when '#,##0 ;(#,##0)' then number_format('%.0f', '(%.0f)')
+ when '#,##0 ;[Red](#,##0)' then number_format('%.0f', '[Red](%.0f)')
+ when '#,##0.00;(#,##0.00)' then number_format('%.2f', '(%.2f)')
+ when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)')
# FIXME: not quite sure what the format should look like in this case.
- '##0.0E+0' => '%.1E',
- '@' => proc { |number| number }
- }
+ when '##0.0E+0' then '%.1E'
+ when '@' then proc { |number| number }
+ else
+ raise "Unknown format: #{format.inspect}"
+ end
end
private
@@ -77,10 +82,6 @@ module Roo
Kernel.format(formatter, number).reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse
end
end
-
- def zero_padded_number?
- @format[/0+/] == @format
- end
end
end
end
diff --git a/lib/roo/excelx/cell/string.rb b/lib/roo/excelx/cell/string.rb
index 7967806..dacf0b6 100644
--- a/lib/roo/excelx/cell/string.rb
+++ b/lib/roo/excelx/cell/string.rb
@@ -2,12 +2,12 @@ module Roo
class Excelx
class Cell
class String < Cell::Base
- attr_reader :value, :formula, :format, :cell_type, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :string, cell_type: :string
def initialize(value, formula, style, link, coordinate)
super(value, formula, nil, style, link, coordinate)
- @type = @cell_type = :string
- @value = link? ? Roo::Link.new(link, value) : value
end
def empty?
diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb
index d661ab8..a1f0864 100644
--- a/lib/roo/excelx/cell/time.rb
+++ b/lib/roo/excelx/cell/time.rb
@@ -4,15 +4,16 @@ module Roo
class Excelx
class Cell
class Time < Roo::Excelx::Cell::DateTime
- attr_reader :value, :formula, :format, :cell_value, :link, :coordinate
+ attr_reader :value, :formula, :format, :cell_value, :coordinate
+
+ attr_reader_with_default default_type: :time
def initialize(value, formula, excelx_type, style, link, base_date, coordinate)
# NOTE: Pass all arguments to DateTime super class.
super
- @type = :time
@format = excelx_type.last
@datetime = create_datetime(base_date, value)
- @value = link? ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
+ @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i
end
def formatted_value
diff --git a/lib/roo/excelx/comments.rb b/lib/roo/excelx/comments.rb
index 1a89908..65044a9 100644
--- a/lib/roo/excelx/comments.rb
+++ b/lib/roo/excelx/comments.rb
@@ -12,10 +12,10 @@ module Roo
def extract_comments
return {} unless doc_exists?
- Hash[doc.xpath('//comments/commentList/comment').map do |comment|
+ doc.xpath('//comments/commentList/comment').each_with_object({}) do |comment, hash|
value = (comment.at_xpath('./text/r/t') || comment.at_xpath('./text/t')).text
- [::Roo::Utils.ref_to_key(comment.attributes['ref'].to_s), value]
- end]
+ hash[::Roo::Utils.ref_to_key(comment['ref'].to_s)] = value
+ end
end
end
end
diff --git a/lib/roo/excelx/coordinate.rb b/lib/roo/excelx/coordinate.rb
index 53b24ba..4f61b17 100644
--- a/lib/roo/excelx/coordinate.rb
+++ b/lib/roo/excelx/coordinate.rb
@@ -1,11 +1,18 @@
module Roo
class Excelx
- class Coordinate
- attr_accessor :row, :column
+ class Coordinate < ::Array
def initialize(row, column)
- @row = row
- @column = column
+ super() << row << column
+ freeze
+ end
+
+ def row
+ self[0]
+ end
+
+ def column
+ self[1]
end
end
end
diff --git a/lib/roo/excelx/extractor.rb b/lib/roo/excelx/extractor.rb
index 1cdd13b..b87a84e 100644..100755
--- a/lib/roo/excelx/extractor.rb
+++ b/lib/roo/excelx/extractor.rb
@@ -1,16 +1,34 @@
+# frozen_string_literal: true
+
+require "roo/helpers/weak_instance_cache"
+
module Roo
class Excelx
class Extractor
- def initialize(path)
+ include Roo::Helpers::WeakInstanceCache
+
+ COMMON_STRINGS = {
+ t: "t",
+ r: "r",
+ s: "s",
+ ref: "ref",
+ html_tag_open: "<html>",
+ html_tag_closed: "</html>"
+ }
+
+ def initialize(path, options = {})
@path = path
+ @options = options
end
private
def doc
- raise FileNotFound, "#{@path} file not found" unless doc_exists?
+ instance_cache(:@doc) do
+ raise FileNotFound, "#{@path} file not found" unless doc_exists?
- ::Roo::Utils.load_xml(@path).remove_namespaces!
+ ::Roo::Utils.load_xml(@path).remove_namespaces!
+ end
end
def doc_exists?
diff --git a/lib/roo/excelx/format.rb b/lib/roo/excelx/format.rb
index 72b36d9..1c1968a 100644
--- a/lib/roo/excelx/format.rb
+++ b/lib/roo/excelx/format.rb
@@ -1,49 +1,57 @@
+# frozen_string_literal: true
+
module Roo
class Excelx
module Format
+ extend self
EXCEPTIONAL_FORMATS = {
'h:mm am/pm' => :date,
'h:mm:ss am/pm' => :date
}
STANDARD_FORMATS = {
- 0 => 'General'.freeze,
- 1 => '0'.freeze,
- 2 => '0.00'.freeze,
- 3 => '#,##0'.freeze,
- 4 => '#,##0.00'.freeze,
- 9 => '0%'.freeze,
- 10 => '0.00%'.freeze,
- 11 => '0.00E+00'.freeze,
- 12 => '# ?/?'.freeze,
- 13 => '# ??/??'.freeze,
- 14 => 'mm-dd-yy'.freeze,
- 15 => 'd-mmm-yy'.freeze,
- 16 => 'd-mmm'.freeze,
- 17 => 'mmm-yy'.freeze,
- 18 => 'h:mm AM/PM'.freeze,
- 19 => 'h:mm:ss AM/PM'.freeze,
- 20 => 'h:mm'.freeze,
- 21 => 'h:mm:ss'.freeze,
- 22 => 'm/d/yy h:mm'.freeze,
- 37 => '#,##0 ;(#,##0)'.freeze,
- 38 => '#,##0 ;[Red](#,##0)'.freeze,
- 39 => '#,##0.00;(#,##0.00)'.freeze,
- 40 => '#,##0.00;[Red](#,##0.00)'.freeze,
- 45 => 'mm:ss'.freeze,
- 46 => '[h]:mm:ss'.freeze,
- 47 => 'mmss.0'.freeze,
- 48 => '##0.0E+0'.freeze,
- 49 => '@'.freeze
+ 0 => 'General',
+ 1 => '0',
+ 2 => '0.00',
+ 3 => '#,##0',
+ 4 => '#,##0.00',
+ 9 => '0%',
+ 10 => '0.00%',
+ 11 => '0.00E+00',
+ 12 => '# ?/?',
+ 13 => '# ??/??',
+ 14 => 'mm-dd-yy',
+ 15 => 'd-mmm-yy',
+ 16 => 'd-mmm',
+ 17 => 'mmm-yy',
+ 18 => 'h:mm AM/PM',
+ 19 => 'h:mm:ss AM/PM',
+ 20 => 'h:mm',
+ 21 => 'h:mm:ss',
+ 22 => 'm/d/yy h:mm',
+ 37 => '#,##0 ;(#,##0)',
+ 38 => '#,##0 ;[Red](#,##0)',
+ 39 => '#,##0.00;(#,##0.00)',
+ 40 => '#,##0.00;[Red](#,##0.00)',
+ 45 => 'mm:ss',
+ 46 => '[h]:mm:ss',
+ 47 => 'mmss.0',
+ 48 => '##0.0E+0',
+ 49 => '@'
}
def to_type(format)
+ @to_type ||= {}
+ @to_type[format] ||= _to_type(format)
+ end
+
+ def _to_type(format)
format = format.to_s.downcase
if (type = EXCEPTIONAL_FORMATS[format])
type
elsif format.include?('#')
:float
- elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
+ elsif format.include?('y') || !format.match(/d+(?![\]])/).nil?
if format.include?('h') || format.include?('s')
:datetime
else
@@ -58,7 +66,6 @@ module Roo
end
end
- module_function :to_type
end
- end
+ end
end
diff --git a/lib/roo/excelx/images.rb b/lib/roo/excelx/images.rb
new file mode 100644
index 0000000..cf0221f
--- /dev/null
+++ b/lib/roo/excelx/images.rb
@@ -0,0 +1,26 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class Images < Excelx::Extractor
+
+ # Returns: Hash { id1: extracted_file_name1 },
+ # Example: { "rId1"=>"roo_media_image1.png",
+ # "rId2"=>"roo_media_image2.png",
+ # "rId3"=>"roo_media_image3.png" }
+ def list
+ @images ||= extract_images_names
+ end
+
+ private
+
+ def extract_images_names
+ return {} unless doc_exists?
+
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
+ hash[rel['Id']] = "roo" + rel['Target'].gsub(/\.\.\/|\//, '_')
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/excelx/relationships.rb b/lib/roo/excelx/relationships.rb
index 8a0ed97..19f9919 100644
--- a/lib/roo/excelx/relationships.rb
+++ b/lib/roo/excelx/relationships.rb
@@ -16,9 +16,9 @@ module Roo
def extract_relationships
return [] unless doc_exists?
- Hash[doc.xpath('/Relationships/Relationship').map do |rel|
- [rel.attribute('Id').text, rel]
- end]
+ doc.xpath('/Relationships/Relationship').each_with_object({}) do |rel, hash|
+ hash[rel['Id']] = rel
+ end
end
end
end
diff --git a/lib/roo/excelx/shared.rb b/lib/roo/excelx/shared.rb
index 3677fa2..bcd2c08 100644..100755
--- a/lib/roo/excelx/shared.rb
+++ b/lib/roo/excelx/shared.rb
@@ -4,12 +4,15 @@ module Roo
# reduce memory usage and reduce the number of objects being passed
# to various inititializers.
class Shared
- attr_accessor :comments_files, :sheet_files, :rels_files
- def initialize(dir)
+ attr_accessor :comments_files, :sheet_files, :rels_files, :image_rels, :image_files
+ def initialize(dir, options = {})
@dir = dir
@comments_files = []
@sheet_files = []
@rels_files = []
+ @options = options
+ @image_rels = []
+ @image_files = []
end
def styles
@@ -17,7 +20,7 @@ module Roo
end
def shared_strings
- @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'))
+ @shared_strings ||= SharedStrings.new(File.join(@dir, 'roo_sharedStrings.xml'), @options)
end
def workbook
@@ -27,6 +30,10 @@ module Roo
def base_date
workbook.base_date
end
+
+ def base_timestamp
+ workbook.base_timestamp
+ end
end
end
end
diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb
index f7caf7c..e70b623 100755
--- a/lib/roo/excelx/shared_strings.rb
+++ b/lib/roo/excelx/shared_strings.rb
@@ -1,16 +1,10 @@
+# frozen_string_literal: true
+
require 'roo/excelx/extractor'
module Roo
class Excelx
class SharedStrings < Excelx::Extractor
-
- COMMON_STRINGS = {
- t: "t",
- r: "r",
- html_tag_open: "<html>",
- html_tag_closed: "</html>"
- }
-
def [](index)
to_a[index]
end
@@ -26,6 +20,7 @@ module Roo
# Use to_html or to_a for html returns
# See what is happening with commit???
def use_html?(index)
+ return false if @options[:disable_html_wrapper]
to_html[index][/<([biu]|sup|sub)>/]
end
@@ -45,7 +40,7 @@ module Roo
document = fix_invalid_shared_strings(doc)
# read the shared strings xml document
document.xpath('/sst/si').map do |si|
- shared_string = ''
+ shared_string = +""
si.children.each do |elem|
case elem.name
when 'r'
@@ -65,7 +60,7 @@ module Roo
fix_invalid_shared_strings(doc)
# read the shared strings xml document
doc.xpath('/sst/si').map do |si|
- html_string = '<html>'
+ html_string = '<html>'.dup
si.children.each do |elem|
case elem.name
when 'r'
@@ -95,7 +90,7 @@ module Roo
#
# Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
def extract_html_r(r_elem)
- str = ''
+ str = +""
xml_elems = {
sub: false,
sup: false,
@@ -103,7 +98,6 @@ module Roo
i: false,
u: false
}
- b, i, u, sub, sup = false, false, false, false, false
r_elem.children.each do |elem|
case elem.name
when 'rPr'
@@ -141,13 +135,13 @@ module Roo
# This will return an html string
def create_html(text, formatting)
- tmp_str = ''
+ tmp_str = +""
formatting.each do |elem, val|
tmp_str << "<#{elem}>" if val
end
tmp_str << text
- reverse_format = Hash[formatting.to_a.reverse]
- reverse_format.each do |elem, val|
+
+ formatting.reverse_each do |elem, val|
tmp_str << "</#{elem}>" if val
end
tmp_str
diff --git a/lib/roo/excelx/sheet.rb b/lib/roo/excelx/sheet.rb
index add92f0..840a053 100644
--- a/lib/roo/excelx/sheet.rb
+++ b/lib/roo/excelx/sheet.rb
@@ -4,11 +4,15 @@ module Roo
class Sheet
extend Forwardable
- delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files] => :@shared
+ delegate [:styles, :workbook, :shared_strings, :rels_files, :sheet_files, :comments_files, :image_rels] => :@shared
+
+ attr_reader :images
def initialize(name, shared, sheet_index, options = {})
@name = name
@shared = shared
+ @sheet_index = sheet_index
+ @images = Images.new(image_rels[sheet_index]).list
@rels = Relationships.new(rels_files[sheet_index])
@comments = Comments.new(comments_files[sheet_index])
@sheet = SheetDoc.new(sheet_files[sheet_index], @rels, shared, options)
@@ -19,7 +23,14 @@ module Roo
end
def present_cells
- @present_cells ||= cells.select { |_, cell| cell && !cell.empty? }
+ @present_cells ||= begin
+ warn %{
+[DEPRECATION] present_cells is deprecated. Alternate:
+ with activesupport => cells[key].presence
+ without activesupport => cells[key]&.presence
+ }
+ cells.select { |_, cell| cell&.presence }
+ end
end
# Yield each row as array of Excelx::Cell objects
@@ -39,33 +50,33 @@ module Roo
def row(row_number)
first_column.upto(last_column).map do |col|
- cells[[row_number, col]]
- end.map { |cell| cell && cell.value }
+ cells[[row_number, col]]&.value
+ end
end
def column(col_number)
first_row.upto(last_row).map do |row|
- cells[[row, col_number]]
- end.map { |cell| cell && cell.value }
+ cells[[row, col_number]]&.value
+ end
end
# returns the number of the first non-empty row
def first_row
- @first_row ||= present_cells.keys.map { |row, _| row }.min
+ @first_row ||= first_last_row_col[:first_row]
end
def last_row
- @last_row ||= present_cells.keys.map { |row, _| row }.max
+ @last_row ||= first_last_row_col[:last_row]
end
# returns the number of the first non-empty column
def first_column
- @first_column ||= present_cells.keys.map { |_, col| col }.min
+ @first_column ||= first_last_row_col[:first_column]
end
# returns the number of the last non-empty column
def last_column
- @last_column ||= present_cells.keys.map { |_, col| col }.max
+ @last_column ||= first_last_row_col[:last_column]
end
def excelx_format(key)
@@ -107,6 +118,34 @@ module Roo
(cell.coordinate.column - 1 - last_column).times { pad << nil }
pad
end
+
+ def first_last_row_col
+ @first_last_row_col ||= begin
+ first_row = last_row = first_col = last_col = nil
+
+ cells.each do |(row, col), cell|
+ next unless cell&.presence
+ first_row ||= row
+ last_row ||= row
+ first_col ||= col
+ last_col ||= col
+
+ if row > last_row
+ last_row = row
+ elsif row < first_row
+ first_row = row
+ end
+
+ if col > last_col
+ last_col = col
+ elsif col < first_col
+ first_col = col
+ end
+ end
+
+ {first_row: first_row, last_row: last_row, first_column: first_col, last_column: last_col}
+ end
+ end
end
end
end
diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb
index a705958..8b0c686 100755
--- a/lib/roo/excelx/sheet_doc.rb
+++ b/lib/roo/excelx/sheet_doc.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
require 'forwardable'
require 'roo/excelx/extractor'
@@ -5,7 +7,7 @@ module Roo
class Excelx
class SheetDoc < Excelx::Extractor
extend Forwardable
- delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared
+ delegate [:workbook] => :@shared
def initialize(path, relationships, shared, options = {})
super(path)
@@ -19,7 +21,12 @@ module Roo
end
def hyperlinks(relationships)
- @hyperlinks ||= extract_hyperlinks(relationships)
+ # If you're sure you're not going to need this hyperlinks you can discard it
+ @hyperlinks ||= if @options[:no_hyperlinks]
+ {}
+ else
+ extract_hyperlinks(relationships)
+ end
end
# Get the dimensions for the sheet.
@@ -39,13 +46,10 @@ module Roo
def each_cell(row_xml)
return [] unless row_xml
row_xml.children.each do |cell_element|
- # If you're sure you're not going to need this hyperlinks you can discard it
- hyperlinks = unless @options[:no_hyperlinks]
- key = ::Roo::Utils.ref_to_key(cell_element['r'])
- hyperlinks(@relationships)[key]
- end
+ coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"])
+ hyperlinks = hyperlinks(@relationships)[coordinate]
- yield cell_from_xml(cell_element, hyperlinks)
+ yield cell_from_xml(cell_element, hyperlinks, coordinate)
end
end
@@ -53,13 +57,13 @@ module Roo
def cell_value_type(type, format)
case type
- when 's'.freeze
+ when 's'
:shared
- when 'b'.freeze
+ when 'b'
:boolean
- when 'str'.freeze
+ when 'str'
:string
- when 'inlineStr'.freeze
+ when 'inlineStr'
:inlinestr
else
Excelx::Format.to_type(format)
@@ -74,42 +78,58 @@ module Roo
# </c>
# hyperlink - a String for the hyperlink for the cell or nil when no
# hyperlink is present.
+ # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell
+ # or nil to extract coordinate from cell_xml.
+ # empty_cell - an Optional Boolean value.
#
# Examples
#
- # cells_from_xml(<Nokogiri::XML::Element>, nil)
+ # cells_from_xml(<Nokogiri::XML::Element>, nil, nil)
# # => <Excelx::Cell::String>
#
# Returns a type of <Excelx::Cell>.
- def cell_from_xml(cell_xml, hyperlink)
- coordinate = extract_coordinate(cell_xml['r'])
- return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty?
+ def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true)
+ coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"])
+ cell_xml_children = cell_xml.children
+ return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty?
# NOTE: This is error prone, to_i will silently turn a nil into a 0.
# This works by coincidence because Format[0] is General.
- style = cell_xml['s'].to_i
- format = styles.style_format(style)
- value_type = cell_value_type(cell_xml['t'], format)
+ style = cell_xml["s"].to_i
formula = nil
- cell_xml.children.each do |cell|
+ cell_xml_children.each do |cell|
case cell.name
when 'is'
- content_arr = cell.search('t').map(&:content)
- unless content_arr.empty?
- return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate)
+ content = +""
+ cell.children.each do |inline_str|
+ if inline_str.name == 't'
+ content << inline_str.content
+ end
+ end
+ unless content.empty?
+ return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate)
end
when 'f'
formula = cell.content
when 'v'
- return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+ format = style_format(style)
+ value_type = cell_value_type(cell_xml["t"], format)
+
+ return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
end
end
- Excelx::Cell::Empty.new(coordinate)
+ create_empty_cell(coordinate)
end
- def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate)
+ def create_empty_cell(coordinate, empty_cell)
+ if empty_cell
+ Excelx::Cell::Empty.new(coordinate)
+ end
+ end
+
+ def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate)
# NOTE: format.to_s can replace excelx_type as an argument for
# Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but
# it will break some brittle tests.
@@ -125,11 +145,12 @@ module Roo
# 3. formula
case value_type
when :shared
- value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i]
- Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate)
+ cell_content = cell.content.to_i
+ value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content]
+ Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate)
when :boolean, :string
value = cell.content
- Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate)
+ Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate)
when :time, :datetime
cell_content = cell.content.to_f
# NOTE: A date will be a whole number. A time will have be > 1. And
@@ -148,35 +169,32 @@ module Roo
else
:date
end
- Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+ base_value = cell_type == :date ? base_date : base_timestamp
+ Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate)
when :date
- Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
+ Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate)
else
- Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate)
+ Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate)
end
end
- def extract_coordinate(coordinate)
- row, column = ::Roo::Utils.split_coordinate(coordinate)
-
- Excelx::Coordinate.new(row, column)
- end
-
def extract_hyperlinks(relationships)
return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink'))
- Hash[hyperlinks.map do |hyperlink|
- if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text])
- [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text]
+ hyperlinks.each_with_object({}) do |hyperlink, hash|
+ if relationship = relationships[hyperlink['id']]
+ target_link = relationship['Target']
+ target_link += "##{hyperlink['location']}" if hyperlink['location']
+ hash[::Roo::Utils.ref_to_key(hyperlink["ref"].to_s)] = target_link
end
- end.compact]
+ end
end
def expand_merged_ranges(cells)
# Extract merged ranges from xml
merges = {}
doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml|
- tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
+ tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) }
for row in tl[0]..br[0] do
for col in tl[1]..br[1] do
next if row == tl[0] && col == tl[1]
@@ -191,10 +209,14 @@ module Roo
end
def extract_cells(relationships)
- extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml|
- key = ::Roo::Utils.ref_to_key(cell_xml['r'])
- [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])]
- end]
+ extracted_cells = {}
+ empty_cell = @options[:empty_cell]
+
+ doc.xpath('/worksheet/sheetData/row/c').each do |cell_xml|
+ coordinate = ::Roo::Utils.extract_coordinate(cell_xml["r"])
+ cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell)
+ extracted_cells[coordinate] = cell if cell
+ end
expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges]
@@ -203,9 +225,25 @@ module Roo
def extract_dimensions
Roo::Utils.each_element(@path, 'dimension') do |dimension|
- return dimension.attributes['ref'].value
+ return dimension["ref"]
end
end
+
+ def style_format(style)
+ @shared.styles.style_format(style)
+ end
+
+ def base_date
+ @shared.base_date
+ end
+
+ def base_timestamp
+ @shared.base_timestamp
+ end
+
+ def shared_strings
+ @shared.shared_strings
+ end
end
end
end
diff --git a/lib/roo/excelx/styles.rb b/lib/roo/excelx/styles.rb
index 87f1713..25f0bf0 100644
--- a/lib/roo/excelx/styles.rb
+++ b/lib/roo/excelx/styles.rb
@@ -55,9 +55,9 @@ module Roo
end
def extract_num_fmts
- Hash[doc.xpath('//numFmt').map do |num_fmt|
- [num_fmt['numFmtId'], num_fmt['formatCode']]
- end]
+ doc.xpath('//numFmt').each_with_object({}) do |num_fmt, hash|
+ hash[num_fmt['numFmtId']] = num_fmt['formatCode']
+ end
end
end
end
diff --git a/lib/roo/excelx/workbook.rb b/lib/roo/excelx/workbook.rb
index 7ef841f..c21bb1f 100644
--- a/lib/roo/excelx/workbook.rb
+++ b/lib/roo/excelx/workbook.rb
@@ -29,13 +29,17 @@ module Roo
# aka labels
def defined_names
- Hash[doc.xpath('//definedName').map do |defined_name|
+ doc.xpath('//definedName').each_with_object({}) do |defined_name, hash|
# "Sheet1!$C$5"
sheet, coordinates = defined_name.text.split('!$', 2)
col, row = coordinates.split('$')
name = defined_name['name']
- [name, Label.new(name, sheet, row, col)]
- end]
+ hash[name] = Label.new(name, sheet, row, col)
+ end
+ end
+
+ def base_timestamp
+ @base_timestamp ||= base_date.to_datetime.to_time.to_i
end
def base_date
diff --git a/lib/roo/helpers/default_attr_reader.rb b/lib/roo/helpers/default_attr_reader.rb
new file mode 100644
index 0000000..a02ba84
--- /dev/null
+++ b/lib/roo/helpers/default_attr_reader.rb
@@ -0,0 +1,20 @@
+# frozen_string_literal: true
+
+module Roo
+ module Helpers
+ module DefaultAttrReader
+ def attr_reader_with_default(attr_hash)
+ attr_hash.each do |attr_name, default_value|
+ instance_variable = :"@#{attr_name}"
+ define_method attr_name do
+ if instance_variable_defined? instance_variable
+ instance_variable_get instance_variable
+ else
+ default_value
+ end
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/helpers/weak_instance_cache.rb b/lib/roo/helpers/weak_instance_cache.rb
new file mode 100644
index 0000000..db26de1
--- /dev/null
+++ b/lib/roo/helpers/weak_instance_cache.rb
@@ -0,0 +1,41 @@
+# frozen_string_literal: true
+
+require "weakref"
+
+module Roo
+ module Helpers
+ module WeakInstanceCache
+ private
+
+ def instance_cache(key)
+ object = nil
+
+ if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && ref.weakref_alive?
+ begin
+ object = ref.__getobj__
+ rescue => e
+ unless (defined?(::WeakRef::RefError) && e.is_a?(::WeakRef::RefError)) || (defined?(RefError) && e.is_a?(RefError))
+ raise e
+ end
+ end
+ end
+
+ unless object
+ object = yield
+ ObjectSpace.define_finalizer(object, instance_cache_finalizer(key))
+ instance_variable_set(key, WeakRef.new(object))
+ end
+
+ object
+ end
+
+ def instance_cache_finalizer(key)
+ proc do |object_id|
+ if instance_variable_defined?(key) && (ref = instance_variable_get(key)) && (!ref.weakref_alive? || ref.__getobj__.object_id == object_id)
+ remove_instance_variable(key)
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/lib/roo/open_office.rb b/lib/roo/open_office.rb
index 6ccbe85..f172363 100644
--- a/lib/roo/open_office.rb
+++ b/lib/roo/open_office.rb
@@ -1,3 +1,5 @@
+# frozen_string_literal: true
+
require 'date'
require 'nokogiri'
require 'cgi'
@@ -11,9 +13,9 @@ module Roo
class OpenOffice < Roo::Base
extend Roo::Tempdir
- ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'.freeze
- XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']".freeze
- XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']".freeze
+ ERROR_MISSING_CONTENT_XML = 'file missing required content.xml'
+ XPATH_FIND_TABLE_STYLES = "//*[local-name()='automatic-styles']"
+ XPATH_LOCAL_NAME_TABLE = "//*[local-name()='table']"
# initialization and opening of a spreadsheet file
# values for packed: :zip
@@ -561,7 +563,7 @@ module Roo
end
def read_labels
- @label ||= Hash[doc.xpath('//table:named-range').map do |ne|
+ @label ||= doc.xpath('//table:named-range').each_with_object({}) do |ne, hash|
#-
# $Sheet1.$C$5
#+
@@ -569,8 +571,8 @@ module Roo
sheetname, coords = attribute(ne, 'cell-range-address').to_s.split('.$')
col, row = coords.split('$')
sheetname = sheetname[1..-1] if sheetname[0, 1] == '$'
- [name, [sheetname, row, col]]
- end]
+ hash[name] = [sheetname, row, col]
+ end
end
def read_styles(style_elements)
diff --git a/lib/roo/spreadsheet.rb b/lib/roo/spreadsheet.rb
index 1eef58d..cdc93f0 100644
--- a/lib/roo/spreadsheet.rb
+++ b/lib/roo/spreadsheet.rb
@@ -24,7 +24,7 @@ module Roo
options[:file_warning] = :ignore
extension.tr('.', '').downcase.to_sym
else
- res = ::File.extname((path =~ /\A#{::URI.regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path)
+ res = ::File.extname((path =~ /\A#{::URI::DEFAULT_PARSER.make_regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path)
res.tr('.', '').downcase.to_sym
end
end
diff --git a/lib/roo/utils.rb b/lib/roo/utils.rb
index 17b672d..dcaedde 100644
--- a/lib/roo/utils.rb
+++ b/lib/roo/utils.rb
@@ -4,27 +4,39 @@ module Roo
LETTERS = ('A'..'Z').to_a
- def split_coordinate(str)
- @split_coordinate ||= {}
+ def extract_coordinate(s)
+ num = letter_num = 0
+ num_only = false
- @split_coordinate[str] ||= begin
- letter, number = split_coord(str)
- x = letter_to_number(letter)
- y = number
- [y, x]
+ s.each_byte do |b|
+ if !num_only && (index = char_index(b))
+ letter_num *= 26
+ letter_num += index
+ elsif index = num_index(b)
+ num_only = true
+ num *= 10
+ num += index
+ else
+ fail ArgumentError
+ end
end
+ fail ArgumentError if letter_num == 0 || !num_only
+
+ Excelx::Coordinate.new(num, letter_num)
end
- alias_method :ref_to_key, :split_coordinate
+ alias_method :ref_to_key, :extract_coordinate
- def split_coord(s)
- if s =~ /([a-zA-Z]+)([0-9]+)/
- letter = Regexp.last_match[1]
- number = Regexp.last_match[2].to_i
- else
- fail ArgumentError
- end
- [letter, number]
+ def split_coordinate(str)
+ warn "[DEPRECATION] `Roo::Utils.split_coordinate` is deprecated. Please use `Roo::Utils.extract_coordinate` instead."
+ extract_coordinate(str)
+ end
+
+
+
+ def split_coord(str)
+ coord = extract_coordinate(str)
+ [number_to_letter(coord.column), coord.row]
end
# convert a number to something like 'AB' (1 => 'A', 2 => 'B', ...)
@@ -56,8 +68,8 @@ module Roo
cells = str.split(':')
return 1 if cells.count == 1
raise ArgumentError.new("invalid range string: #{str}. Supported range format 'A1:B2'") if cells.count != 2
- x1, y1 = split_coordinate(cells[0])
- x2, y2 = split_coordinate(cells[1])
+ x1, y1 = extract_coordinate(cells[0])
+ x2, y2 = extract_coordinate(cells[1])
(x2 - (x1 - 1)) * (y2 - (y1 - 1))
end
@@ -69,10 +81,27 @@ module Roo
# Yield each element of a given type ('row', 'c', etc.) to caller
def each_element(path, elements)
+ elements = Array(elements)
Nokogiri::XML::Reader(::File.open(path, 'rb'), nil, nil, Nokogiri::XML::ParseOptions::NOBLANKS).each do |node|
- next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT && Array(elements).include?(node.name)
+ next unless node.node_type == Nokogiri::XML::Reader::TYPE_ELEMENT && elements.include?(node.name)
yield Nokogiri::XML(node.outer_xml).root if block_given?
end
end
+
+ private
+
+ def char_index(byte)
+ if byte >= 65 && byte <= 90
+ byte - 64
+ elsif byte >= 97 && byte <= 122
+ byte - 96
+ end
+ end
+
+ def num_index(byte)
+ if byte >= 48 && byte <= 57
+ byte - 48
+ end
+ end
end
end
diff --git a/lib/roo/version.rb b/lib/roo/version.rb
index bdd16dc..7fd96d7 100644
--- a/lib/roo/version.rb
+++ b/lib/roo/version.rb
@@ -1,3 +1,3 @@
module Roo
- VERSION = "2.7.1"
+ VERSION = "2.8.0"
end