# frozen_string_literal: true require 'forwardable' require 'roo/excelx/extractor' module Roo class Excelx class SheetDoc < Excelx::Extractor extend Forwardable delegate [:workbook] => :@shared def initialize(path, relationships, shared, options = {}) super(path) @shared = shared @options = options @relationships = relationships end def cells(relationships) @cells ||= extract_cells(relationships) end def hyperlinks(relationships) # If you're sure you're not going to need this hyperlinks you can discard it @hyperlinks ||= if @options[:no_hyperlinks] || !relationships.include_type?("hyperlink") {} else extract_hyperlinks(relationships) end end # Get the dimensions for the sheet. # This is the upper bound of cells that might # be parsed. (the document may be sparse so cell count is only upper bound) def dimensions @dimensions ||= extract_dimensions end # Yield each row xml element to caller def each_row_streaming(&block) Roo::Utils.each_element(@path, 'row', &block) end # Yield each cell as Excelx::Cell to caller for given # row xml def each_cell(row_xml) return [] unless row_xml row_xml.children.each do |cell_element| coordinate = ::Roo::Utils.extract_coordinate(cell_element["r"]) hyperlinks = hyperlinks(@relationships)[coordinate] yield cell_from_xml(cell_element, hyperlinks, coordinate) end end private def cell_value_type(type, format) case type when 's' :shared when 'b' :boolean when 'str' :string when 'inlineStr' :inlinestr else Excelx::Format.to_type(format) end end # Internal: Creates a cell based on an XML clell.. # # cell_xml - a Nokogiri::XML::Element. e.g. # # 22606 # # hyperlink - a String for the hyperlink for the cell or nil when no # hyperlink is present. # coordinate - a Roo::Excelx::Coordinate for the coordinate for the cell # or nil to extract coordinate from cell_xml. # empty_cell - an Optional Boolean value. # # Examples # # cells_from_xml(, nil, nil) # # => # # Returns a type of . def cell_from_xml(cell_xml, hyperlink, coordinate, empty_cell=true) coordinate ||= ::Roo::Utils.extract_coordinate(cell_xml["r"]) cell_xml_children = cell_xml.children return create_empty_cell(coordinate, empty_cell) if cell_xml_children.empty? # NOTE: This is error prone, to_i will silently turn a nil into a 0. # This works by coincidence because Format[0] is General. style = cell_xml["s"].to_i formula = nil cell_xml_children.each do |cell| case cell.name when 'is' content = cell.search('t').map(&:content).join unless content.empty? return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate) end when 'f' formula = cell.content when 'v' format = style_format(style) value_type = cell_value_type(cell_xml["t"], format) return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate) end end create_empty_cell(coordinate, empty_cell) end def create_empty_cell(coordinate, empty_cell) if empty_cell Excelx::Cell::Empty.new(coordinate) end end def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, coordinate) # NOTE: format.to_s can replace excelx_type as an argument for # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but # it will break some brittle tests. excelx_type = [:numeric_or_formula, format.to_s] # NOTE: There are only a few situations where value != cell.content # 1. when a sharedString is used. value = sharedString; # cell.content = id of sharedString # 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1'; # But a boolean cell should use TRUE|FALSE as the formatted value # and use a Boolean for it's value. Using a Boolean value breaks # Roo::Base#to_csv. # 3. formula case value_type when :shared cell_content = cell.content.to_i value = shared_strings.use_html?(cell_content) ? shared_strings.to_html[cell_content] : shared_strings[cell_content] Excelx::Cell.cell_class(:string).new(value, formula, style, hyperlink, coordinate) when :boolean, :string value = cell.content Excelx::Cell.cell_class(value_type).new(value, formula, style, hyperlink, coordinate) when :time, :datetime cell_content = cell.content.to_f # NOTE: A date will be a whole number. A time will have be > 1. And # in general, a datetime will have decimals. But if the cell is # using a custom format, it's possible to be interpreted incorrectly. # cell_content.to_i == cell_content && standard_style?=> :date # # Should check to see if the format is standard or not. If it's a # standard format, than it's a date, otherwise, it is a datetime. # @styles.standard_style?(style_id) # STANDARD_STYLES.keys.include?(style_id.to_i) cell_type = if cell_content < 1.0 :time elsif (cell_content - cell_content.floor).abs > 0.000001 :datetime else :date end base_value = cell_type == :date ? base_date : base_timestamp Excelx::Cell.cell_class(cell_type).new(cell_content, formula, excelx_type, style, hyperlink, base_value, coordinate) when :date Excelx::Cell.cell_class(:date).new(cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) else Excelx::Cell.cell_class(:number).new(cell.content, formula, excelx_type, style, hyperlink, coordinate) end end def extract_hyperlinks(relationships) return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink')) hyperlinks.each_with_object({}) do |hyperlink, hash| if relationship = relationships[hyperlink['id']] target_link = relationship['Target'] target_link += "##{hyperlink['location']}" if hyperlink['location'] Roo::Utils.coordinates_in_range(hyperlink["ref"].to_s) do |coord| hash[coord] = target_link end end end end def expand_merged_ranges(cells) # Extract merged ranges from xml merges = {} doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml| src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } next unless cells[src] for row in src[0]..dst[0] do for col in src[1]..dst[1] do next if row == src[0] && col == src[1] merges[[row, col]] = src end end end # Duplicate value into all cells in merged range merges.each do |dst, src| cells[dst] = cells[src] end end def extract_cells(relationships) extracted_cells = {} empty_cell = @options[:empty_cell] doc.xpath('/worksheet/sheetData/row').each.with_index(1) do |row_xml, ycoord| row_xml.xpath('c').each.with_index(1) do |cell_xml, xcoord| r = cell_xml['r'] coordinate = if r.nil? ::Roo::Excelx::Coordinate.new(ycoord, xcoord) else ::Roo::Utils.extract_coordinate(r) end cell = cell_from_xml(cell_xml, hyperlinks(relationships)[coordinate], coordinate, empty_cell) extracted_cells[coordinate] = cell if cell end end expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges] extracted_cells end def extract_dimensions Roo::Utils.each_element(@path, 'dimension') do |dimension| return dimension["ref"] end end def style_format(style) @shared.styles.style_format(style) end def base_date @shared.base_date end def base_timestamp @shared.base_timestamp end def shared_strings @shared.shared_strings end end end end