diff options
| -rw-r--r-- | .github/workflows/ruby.yml | 34 | ||||
| -rw-r--r-- | .travis.yml | 22 | ||||
| -rw-r--r-- | CHANGELOG.md | 14 | ||||
| -rwxr-xr-x | Gemfile | 1 | ||||
| -rw-r--r-- | README.md | 16 | ||||
| -rw-r--r-- | lib/roo/base.rb | 2 | ||||
| -rw-r--r-- | lib/roo/csv.rb | 14 | ||||
| -rwxr-xr-x | lib/roo/excelx.rb | 20 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/number.rb | 3 | ||||
| -rw-r--r-- | lib/roo/excelx/cell/time.rb | 2 | ||||
| -rwxr-xr-x | lib/roo/excelx/sheet_doc.rb | 18 | ||||
| -rw-r--r-- | lib/roo/spreadsheet.rb | 10 | ||||
| -rw-r--r-- | lib/roo/version.rb | 2 | ||||
| -rw-r--r-- | roo.gemspec | 11 | ||||
| -rw-r--r-- | spec/lib/roo/excelx/cell/time_spec.rb | 15 | ||||
| -rwxr-xr-x | spec/lib/roo/excelx_spec.rb | 10 | ||||
| -rw-r--r-- | spec/lib/roo/spreadsheet_spec.rb | 2 | ||||
| -rw-r--r-- | spec/lib/roo/utils_spec.rb | 9 | ||||
| -rw-r--r-- | test/excelx/cell/test_attr_reader_default.rb | 2 | ||||
| -rw-r--r-- | test/excelx/cell/test_number.rb | 2 | ||||
| -rw-r--r-- | test/files/expand_merged_ranges_issue_506.xlsx | bin | 0 -> 8753 bytes | |||
| -rw-r--r-- | test/files/hidden_sheets.xlsx | bin | 6152 -> 10296 bytes | |||
| -rw-r--r-- | test/files/merged_ranges.xlsx | bin | 10596 -> 7622 bytes | |||
| -rw-r--r-- | test/files/richtext_example.xlsx | bin | 0 -> 3566 bytes | |||
| -rw-r--r-- | test/roo/test_excelx.rb | 21 |
25 files changed, 164 insertions, 66 deletions
diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml new file mode 100644 index 0000000..6892090 --- /dev/null +++ b/.github/workflows/ruby.yml @@ -0,0 +1,34 @@ +name: Ruby +on: + push: + branches: + - master + pull_request: + branches: + - master +jobs: + build: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + ruby: + - '2.7' + - '3.0' + - '3.1' + - ruby-head + - jruby-9.3.3.0 + include: + - ruby: ruby-head + env: + RUBYOPT: '--jit' + steps: + - uses: actions/checkout@v2 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler-cache: true + - run: bundle exec rake + env: + LONG_RUN: true + diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 4dc862b..0000000 --- a/.travis.yml +++ /dev/null @@ -1,22 +0,0 @@ -language: ruby -rvm: - - 2.3 - - 2.4 - - 2.5 - - 2.6 - - ruby-head - - jruby-9.1.6.0 -env: - - LONG_RUN=true -matrix: - include: - - rvm: 2.6 - env: RUBYOPT=--jit LONG_RUN=true - - rvm: ruby-head - env: RUBYOPT=--jit LONG_RUN=true - allow_failures: - - rvm: ruby-head - - rvm: ruby-head - env: RUBYOPT=--jit LONG_RUN=true - - rvm: jruby-9.1.6.0 -bundler_args: --without local_development diff --git a/CHANGELOG.md b/CHANGELOG.md index a1c3a40..eaa98ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,19 @@ ## Unreleased +## [2.9.0] 2022-03-19 + +### Changed/Added +- Ruby 3.x Support [555](https://github.com/roo-rb/roo/pull/555) +- Ignore all richdata at 'xl/richData' of XSLX [552](https://github.com/roo-rb/roo/pull/552) +- Only copy if cell is present when `expand_merged_ranges: true` [557](https://github.com/roo-rb/roo/pull/557) +- Fixes issue where the contents of hidden sheet was returned when parsing visible sheets only. [536](https://github.com/roo-rb/roo/pull/536) +- Add formats [525](https://github.com/roo-rb/roo/pull/525) +- Fix warnings caused by Ruby 2.7 update [530](https://github.com/roo-rb/roo/pull/530) +- Add formats [525](https://github.com/roo-rb/roo/pull/525) + +### Removed +- Support for ruby 2.4, 2.5, 2.6(excluded jRuby) + ## [2.8.3] 2020-02-03 ### Changed/Added - Updated rubyzip version. Now minimal version is 1.3.0 [515](https://github.com/roo-rb/roo/pull/515) - [CVE-2019-16892](https://github.com/rubyzip/rubyzip/pull/403) @@ -10,6 +10,7 @@ group :test do gem 'simplecov', '>= 0.9.0', require: false gem 'coveralls', require: false gem "minitest-reporters" + gem 'webrick' if RUBY_VERSION >= '3.0.0' end group :local_development do @@ -249,7 +249,7 @@ ods.formula('A', 2) csv = Roo::CSV.new("mycsv.csv") ``` -Because Roo uses the [standard CSV library](), you can use options available to that library to parse csv files. You can pass options using the ``csv_options`` key. +Because Roo uses the standard CSV library, you can use options available to that library to parse csv files. You can pass options using the ``csv_options`` key. For instance, you can load tab-delimited files (``.tsv``), and you can use a particular encoding when opening the file. @@ -262,6 +262,18 @@ csv = Roo::CSV.new("mytsv.tsv", csv_options: {col_sep: "\t"}) csv = Roo::CSV.new("mycsv.csv", csv_options: {encoding: Encoding::ISO_8859_1}) ``` +You can also open csv files through the Roo::Spreadsheet class (useful if you accept both CSV and Excel types from a user file upload, for example). + +```ruby +# Load a spreadsheet from a file path +# Roo figures out the right parser based on file extension +spreadsheet = Roo::Spreadsheet.open(csv_or_xlsx_file) + +# Load a csv and auto-strip the BOM (byte order mark) +# csv files saved from MS Excel typically have the BOM marker at the beginning of the file +spreadsheet = Roo::Spreadsheet.open("mycsv.csv", { csv_options: { encoding: 'bom|utf-8' } }) +``` + ## Upgrading from Roo 1.13.x If you use ``.xls`` or Google spreadsheets, you will need to install ``roo-xls`` or ``roo-google`` to continue using that functionality. @@ -271,7 +283,7 @@ Roo's public methods have stayed relatively consistent between 1.13.x and 2.0.0, ## Contributing ### Features -1. Fork it ( https://github.com/[my-github-username]/roo/fork ) +1. Fork it ( https://github.com/roo-rb/roo/fork ) 2. Install it (`bundle install --with local_development`) 3. Create your feature branch (`git checkout -b my-new-feature`) 4. Commit your changes (`git commit -am 'My new feature'`) diff --git a/lib/roo/base.rb b/lib/roo/base.rb index 19eb844..f4ac9a3 100644 --- a/lib/roo/base.rb +++ b/lib/roo/base.rb @@ -544,7 +544,7 @@ class Roo::Base tempfilename = File.join(tmpdir, find_basename(uri)) begin File.open(tempfilename, "wb") do |file| - open(uri, "User-Agent" => "Ruby/#{RUBY_VERSION}") do |net| + URI.open(uri, "User-Agent" => "Ruby/#{RUBY_VERSION}") do |net| file.write(net.read) end end diff --git a/lib/roo/csv.rb b/lib/roo/csv.rb index 516def6..4431bc2 100644 --- a/lib/roo/csv.rb +++ b/lib/roo/csv.rb @@ -90,17 +90,23 @@ module Roo def each_row(options, &block) if uri?(filename) each_row_using_tempdir(options, &block) - elsif is_stream?(filename_or_stream) - ::CSV.new(filename_or_stream, options).each(&block) else - ::CSV.foreach(filename, options, &block) + csv_foreach(filename_or_stream, options, &block) end end def each_row_using_tempdir(options, &block) ::Dir.mktmpdir(Roo::TEMP_PREFIX, ENV["ROO_TMP"]) do |tmpdir| tmp_filename = download_uri(filename, tmpdir) - ::CSV.foreach(tmp_filename, options, &block) + csv_foreach(tmp_filename, options, &block) + end + end + + def csv_foreach(path_or_io, options, &block) + if is_stream?(path_or_io) + ::CSV.new(path_or_io, **options).each(&block) + else + ::CSV.foreach(path_or_io, **options, &block) end end diff --git a/lib/roo/excelx.rb b/lib/roo/excelx.rb index f9f0ee2..91ebc1e 100755 --- a/lib/roo/excelx.rb +++ b/lib/roo/excelx.rb @@ -60,15 +60,16 @@ module Roo @filename = local_filename(filename_or_stream, @tmpdir, packed) process_zipfile(@filename || filename_or_stream) - @sheet_names = workbook.sheets.map do |sheet| - unless options[:only_visible_sheets] && sheet['state'] == 'hidden' - sheet['name'] - end - end.compact + @sheet_names = [] @sheets = [] @sheets_by_name = {} - @sheet_names.each_with_index do |sheet_name, n| - @sheets_by_name[sheet_name] = @sheets[n] = Sheet.new(sheet_name, @shared, n, sheet_options) + + workbook.sheets.each_with_index do |sheet, index| + next if options[:only_visible_sheets] && sheet['state'] == 'hidden' + + sheet_name = sheet['name'] + @sheet_names << sheet_name + @sheets_by_name[sheet_name] = @sheets[index] = Sheet.new(sheet_name, @shared, index, sheet_options) end if cell_max @@ -428,6 +429,11 @@ module Roo entries.each do |entry| path = case entry.name.downcase + when /richdata/ + # FIXME: Ignore richData as parsing is not implemented yet and can cause + # Zip::DestinationFileExistsError when including a second "styles.xml" entry + # see http://schemas.microsoft.com/office/spreadsheetml/2017/richdata2 + nil when /sharedstrings.xml$/ "#{@tmpdir}/roo_sharedStrings.xml" when /styles.xml$/ diff --git a/lib/roo/excelx/cell/number.rb b/lib/roo/excelx/cell/number.rb index 9f23c4f..7ea48b5 100644 --- a/lib/roo/excelx/cell/number.rb +++ b/lib/roo/excelx/cell/number.rb @@ -48,7 +48,7 @@ module Roo when /^(0+)$/ then "%0#{$1.size}d" when /^0\.(0+)$/ then "%.#{$1.size}f" when '#,##0' then number_format('%.0f') - when '#,##0.00' then number_format('%.2f') + when /^#,##0.(0+)$/ then number_format("%.#{$1.size}f") when '0%' proc do |number| Kernel.format('%d%%', number.to_f * 100) @@ -64,6 +64,7 @@ module Roo when '#,##0.00;[Red](#,##0.00)' then number_format('%.2f', '[Red](%.2f)') # FIXME: not quite sure what the format should look like in this case. when '##0.0E+0' then '%.1E' + when "_-* #,##0.00\\ _€_-;\\-* #,##0.00\\ _€_-;_-* \"-\"??\\ _€_-;_-@_-" then number_format('%.2f', '-%.2f') when '@' then proc { |number| number } else raise "Unknown format: #{format.inspect}" diff --git a/lib/roo/excelx/cell/time.rb b/lib/roo/excelx/cell/time.rb index a1f0864..5fed1e2 100644 --- a/lib/roo/excelx/cell/time.rb +++ b/lib/roo/excelx/cell/time.rb @@ -13,7 +13,7 @@ module Roo super @format = excelx_type.last @datetime = create_datetime(base_date, value) - @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).to_i + @value = link ? Roo::Link.new(link, value) : (value.to_f * 86_400).round.to_i end def formatted_value diff --git a/lib/roo/excelx/sheet_doc.rb b/lib/roo/excelx/sheet_doc.rb index 6da4c26..adbb77a 100755 --- a/lib/roo/excelx/sheet_doc.rb +++ b/lib/roo/excelx/sheet_doc.rb @@ -101,12 +101,7 @@ module Roo cell_xml_children.each do |cell| case cell.name when 'is' - content = +"" - cell.children.each do |inline_str| - if inline_str.name == 't' - content << inline_str.content - end - end + content = cell.search('t').map(&:content).join unless content.empty? return Excelx::Cell.cell_class(:string).new(content, formula, style, hyperlink, coordinate) end @@ -197,11 +192,12 @@ module Roo # Extract merged ranges from xml merges = {} doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml| - tl, br = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } - for row in tl[0]..br[0] do - for col in tl[1]..br[1] do - next if row == tl[0] && col == tl[1] - merges[[row, col]] = tl + src, dst = mergecell_xml["ref"].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } + next unless cells[src] + for row in src[0]..dst[0] do + for col in src[1]..dst[1] do + next if row == src[0] && col == src[1] + merges[[row, col]] = src end end end diff --git a/lib/roo/spreadsheet.rb b/lib/roo/spreadsheet.rb index cdc93f0..54063b6 100644 --- a/lib/roo/spreadsheet.rb +++ b/lib/roo/spreadsheet.rb @@ -24,8 +24,14 @@ module Roo options[:file_warning] = :ignore extension.tr('.', '').downcase.to_sym else - res = ::File.extname((path =~ /\A#{::URI::DEFAULT_PARSER.make_regexp}\z/) ? ::URI.parse(::URI.encode(path)).path : path) - res.tr('.', '').downcase.to_sym + parsed_path = + if path =~ /\A#{::URI::DEFAULT_PARSER.make_regexp}\z/ + # path is 7th match + Regexp.last_match[7] + else + path + end + ::File.extname(parsed_path).tr('.', '').downcase.to_sym end end end diff --git a/lib/roo/version.rb b/lib/roo/version.rb index 262847c..d2bb2bf 100644 --- a/lib/roo/version.rb +++ b/lib/roo/version.rb @@ -1,3 +1,3 @@ module Roo - VERSION = "2.8.3" + VERSION = "2.9.0" end diff --git a/roo.gemspec b/roo.gemspec index 814c5fc..c6561c2 100644 --- a/roo.gemspec +++ b/roo.gemspec @@ -17,12 +17,19 @@ Gem::Specification.new do |spec| spec.files.reject! { |fn| fn.include?('test/files') } spec.require_paths = ['lib'] - spec.required_ruby_version = ">= 2.3.0" + if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby' + spec.required_ruby_version = ">= 2.6.0" + else + spec.required_ruby_version = ">= 2.7.0" + end spec.add_dependency 'nokogiri', '~> 1' spec.add_dependency 'rubyzip', '>= 1.3.0', '< 3.0.0' - spec.add_development_dependency 'rake', '~> 10.1' + spec.add_development_dependency 'rake' spec.add_development_dependency 'minitest', '~> 5.4', '>= 5.4.3' spec.add_development_dependency 'rack', '~> 1.6', '< 2.0.0' + if RUBY_VERSION >= '3.0.0' + spec.add_development_dependency 'matrix' + end end diff --git a/spec/lib/roo/excelx/cell/time_spec.rb b/spec/lib/roo/excelx/cell/time_spec.rb new file mode 100644 index 0000000..214443f --- /dev/null +++ b/spec/lib/roo/excelx/cell/time_spec.rb @@ -0,0 +1,15 @@ +require 'spec_helper'
+
+RSpec.describe Roo::Excelx::Cell::Time do
+ it "should set the cell value to the correct number of seconds" do
+ value = 0.05513888888888888 # '1:19:24'
+ excelx_type = [:numeric_or_formula, "h:mm:ss"]
+ base_timestamp = Date.new(1899, 12, 30).to_time.to_i
+ time_cell = Roo::Excelx::Cell::Time.new(value, nil, excelx_type, 1, nil, base_timestamp, nil)
+ expect(time_cell.value).to eq(1*60*60 + 19*60 + 24) # '1:19:24' in seconds
+ # use case from https://github.com/roo-rb/roo/issues/310
+ value = 0.523761574074074 # '12:34:13' in seconds
+ time_cell = Roo::Excelx::Cell::Time.new(value, nil, excelx_type, 1, nil, base_timestamp, nil)
+ expect(time_cell.value).to eq(12*60*60 + 34*60 + 13) # 12:34:13 in seconds
+ end
+end
\ No newline at end of file diff --git a/spec/lib/roo/excelx_spec.rb b/spec/lib/roo/excelx_spec.rb index 1b67a4d..7cc9b13 100755 --- a/spec/lib/roo/excelx_spec.rb +++ b/spec/lib/roo/excelx_spec.rb @@ -86,6 +86,14 @@ describe Roo::Excelx do end end + describe 'for a workbook with hidden sheets' do + let(:path) { 'test/files/hidden_sheets.xlsx' } + + it 'returns the cell contents from the visible sheet' do + expect(Roo::Excelx.new(path, only_visible_sheets: true).cell('A', 1)).to eq "visible sheet 1" + end + end + describe '#parse' do let(:path) { 'test/files/numeric-link.xlsx' } @@ -653,4 +661,4 @@ describe 'Roo::Excelx with options set' do end end end -end
\ No newline at end of file +end diff --git a/spec/lib/roo/spreadsheet_spec.rb b/spec/lib/roo/spreadsheet_spec.rb index 19fb8a7..08b6bf4 100644 --- a/spec/lib/roo/spreadsheet_spec.rb +++ b/spec/lib/roo/spreadsheet_spec.rb @@ -25,7 +25,7 @@ describe Roo::Spreadsheet do let(:filename) { tempfile.path } it 'loads the proper type' do - expect(Roo::CSV).to receive(:new).with(filename, file_warning: :ignore).and_call_original + expect(Roo::CSV).to receive(:new).with(filename, {file_warning: :ignore}).and_call_original expect(Roo::Spreadsheet.open(tempfile, extension: :csv)).to be_a(Roo::CSV) end end diff --git a/spec/lib/roo/utils_spec.rb b/spec/lib/roo/utils_spec.rb index c000ae7..a32a32d 100644 --- a/spec/lib/roo/utils_spec.rb +++ b/spec/lib/roo/utils_spec.rb @@ -43,15 +43,6 @@ RSpec.describe ::Roo::Utils do end end - context '.split_coordinate' do - it "returns the expected result" do - expect(described_class.split_coordinate('A1')).to eq [1, 1] - expect(described_class.split_coordinate('B2')).to eq [2, 2] - expect(described_class.split_coordinate('R2')).to eq [2, 18] - expect(described_class.split_coordinate('AR31')).to eq [31, 18 + 26] - end - end - context '.extract_coordinate' do it "returns the expected result" do expect(described_class.extract_coordinate('A1')).to eq [1, 1] diff --git a/test/excelx/cell/test_attr_reader_default.rb b/test/excelx/cell/test_attr_reader_default.rb index c1ae277..be2e562 100644 --- a/test/excelx/cell/test_attr_reader_default.rb +++ b/test/excelx/cell/test_attr_reader_default.rb @@ -60,7 +60,7 @@ class TestAttrReaderDefault < Minitest::Test object.instance_variable_defined?(attr_name) ? object.instance_variable_get(attr_name) : nil else object.public_send(attr_name) - end + end if expected_value assert_equal expected_value, value diff --git a/test/excelx/cell/test_number.rb b/test/excelx/cell/test_number.rb index 5c8d726..ddcffeb 100644 --- a/test/excelx/cell/test_number.rb +++ b/test/excelx/cell/test_number.rb @@ -66,6 +66,7 @@ class TestRooExcelxCellNumber < Minitest::Test ['0.000000000', '1042.000000000'], ['#,##0', '1,042'], ['#,##0.00', '1,042.00'], + ['#,##0.000', '1,042.000'], ['0%', '104200%'], ['0.00%', '104200.00%'], ['0.00E+00', '1.04E+03'], @@ -74,6 +75,7 @@ class TestRooExcelxCellNumber < Minitest::Test ['#,##0.00;(#,##0.00)', '1,042.00'], ['#,##0.00;[Red](#,##0.00)', '1,042.00'], ['##0.0E+0', '1.0E+03'], + ["_-* #,##0.00\\ _€_-;\\-* #,##0.00\\ _€_-;_-* \"-\"??\\ _€_-;_-@_-", '1,042.00'], ['@', '1042'] ].each do |style_format, result| cell = Roo::Excelx::Cell::Number.new '1042', nil, [style_format], nil, nil, nil diff --git a/test/files/expand_merged_ranges_issue_506.xlsx b/test/files/expand_merged_ranges_issue_506.xlsx Binary files differnew file mode 100644 index 0000000..89527e7 --- /dev/null +++ b/test/files/expand_merged_ranges_issue_506.xlsx diff --git a/test/files/hidden_sheets.xlsx b/test/files/hidden_sheets.xlsx Binary files differindex 78eccaa..b61589c 100644 --- a/test/files/hidden_sheets.xlsx +++ b/test/files/hidden_sheets.xlsx diff --git a/test/files/merged_ranges.xlsx b/test/files/merged_ranges.xlsx Binary files differindex d8922bf..3709b5d 100644 --- a/test/files/merged_ranges.xlsx +++ b/test/files/merged_ranges.xlsx diff --git a/test/files/richtext_example.xlsx b/test/files/richtext_example.xlsx Binary files differnew file mode 100644 index 0000000..48dc1ee --- /dev/null +++ b/test/files/richtext_example.xlsx diff --git a/test/roo/test_excelx.rb b/test/roo/test_excelx.rb index 90c7167..61161f0 100644 --- a/test/roo/test_excelx.rb +++ b/test/roo/test_excelx.rb @@ -133,6 +133,20 @@ class TestRworkbookExcelx < Minitest::Test end end + def test_expand_merged_range_doesnt_insert_nil_values + options = { expand_merged_ranges: true } + xlsx = roo_class.new(File.join(TESTDIR, "merged_ranges.xlsx"), options) + + refute_includes xlsx.sheet_for(0).cells.values, nil, "`nil` was copied into the cells hash from an empty merged range" + end + + def test_expand_merged_range_doesnt_raise_issue_506 + # Issue 506 sent an example test.xlsx file that would raise an error upon parsing. + xl = Roo::Spreadsheet.open(File.join(TESTDIR, "expand_merged_ranges_issue_506.xlsx"), expand_merged_ranges: true) + data = xl.parse(one: /one/i, two: /two/i, clean: true) + assert_equal [{:one=>"John", :two=>"Johnson"}, {:one=>"Sam", :two=>nil}, {:one=>"Dave", :two=>nil}], data + end + def test_noexpand_merged_range xlsx = roo_class.new(File.join(TESTDIR, "merged_ranges.xlsx")) @@ -320,6 +334,13 @@ class TestRworkbookExcelx < Minitest::Test end end + def test_parsing_xlsx_with_richtext + xlsx = roo_class.new(File.join(TESTDIR, "richtext_example.xlsx")) + + assert_equal "Example richtext", xlsx.cell("a", 1) + assert_equal "Example richtext", xlsx.cell("b", 1) + end + def roo_class Roo::Excelx end |
