aboutsummaryrefslogtreecommitdiffstats
path: root/lib/roo/excelx/shared_strings.rb
diff options
context:
space:
mode:
Diffstat (limited to 'lib/roo/excelx/shared_strings.rb')
-rwxr-xr-xlib/roo/excelx/shared_strings.rb157
1 files changed, 157 insertions, 0 deletions
diff --git a/lib/roo/excelx/shared_strings.rb b/lib/roo/excelx/shared_strings.rb
new file mode 100755
index 0000000..f7caf7c
--- /dev/null
+++ b/lib/roo/excelx/shared_strings.rb
@@ -0,0 +1,157 @@
+require 'roo/excelx/extractor'
+
+module Roo
+ class Excelx
+ class SharedStrings < Excelx::Extractor
+
+ COMMON_STRINGS = {
+ t: "t",
+ r: "r",
+ html_tag_open: "<html>",
+ html_tag_closed: "</html>"
+ }
+
+ def [](index)
+ to_a[index]
+ end
+
+ def to_a
+ @array ||= extract_shared_strings
+ end
+
+ def to_html
+ @html ||= extract_html
+ end
+
+ # Use to_html or to_a for html returns
+ # See what is happening with commit???
+ def use_html?(index)
+ to_html[index][/<([biu]|sup|sub)>/]
+ end
+
+ private
+
+ def fix_invalid_shared_strings(doc)
+ invalid = { '_x000D_' => "\n" }
+ xml = doc.to_s
+ return doc unless xml[/#{invalid.keys.join('|')}/]
+
+ ::Nokogiri::XML(xml.gsub(/#{invalid.keys.join('|')}/, invalid))
+ end
+
+ def extract_shared_strings
+ return [] unless doc_exists?
+
+ document = fix_invalid_shared_strings(doc)
+ # read the shared strings xml document
+ document.xpath('/sst/si').map do |si|
+ shared_string = ''
+ si.children.each do |elem|
+ case elem.name
+ when 'r'
+ elem.children.each do |r_elem|
+ shared_string << r_elem.content if r_elem.name == 't'
+ end
+ when 't'
+ shared_string = elem.content
+ end
+ end
+ shared_string
+ end
+ end
+
+ def extract_html
+ return [] unless doc_exists?
+ fix_invalid_shared_strings(doc)
+ # read the shared strings xml document
+ doc.xpath('/sst/si').map do |si|
+ html_string = '<html>'
+ si.children.each do |elem|
+ case elem.name
+ when 'r'
+ html_string << extract_html_r(elem)
+ when 't'
+ html_string << elem.content
+ end # case elem.name
+ end # si.children.each do |elem|
+ html_string << '</html>'
+ end # doc.xpath('/sst/si').map do |si|
+ end # def extract_html
+
+ # The goal of this function is to take the following XML code snippet and create a html tag
+ # r_elem ::: XML Element that is in sharedStrings.xml of excel_book.xlsx
+ # {code:xml}
+ # <r>
+ # <rPr>
+ # <i/>
+ # <b/>
+ # <u/>
+ # <vertAlign val="subscript"/>
+ # <vertAlign val="superscript"/>
+ # </rPr>
+ # <t>TEXT</t>
+ # </r>
+ # {code}
+ #
+ # Expected Output ::: "<html><sub|sup><b><i><u>TEXT</u></i></b></sub|/sup></html>"
+ def extract_html_r(r_elem)
+ str = ''
+ xml_elems = {
+ sub: false,
+ sup: false,
+ b: false,
+ i: false,
+ u: false
+ }
+ b, i, u, sub, sup = false, false, false, false, false
+ r_elem.children.each do |elem|
+ case elem.name
+ when 'rPr'
+ elem.children.each do |rPr_elem|
+ case rPr_elem.name
+ when 'b'
+ # set formatting for Bold to true
+ xml_elems[:b] = true
+ when 'i'
+ # set formatting for Italics to true
+ xml_elems[:i] = true
+ when 'u'
+ # set formatting for Underline to true
+ xml_elems[:u] = true
+ when 'vertAlign'
+ # See if the Vertical Alignment is subscript or superscript
+ case rPr_elem.xpath('@val').first.value
+ when 'subscript'
+ # set formatting for Subscript to true and Superscript to false ... Can't have both
+ xml_elems[:sub] = true
+ xml_elems[:sup] = false
+ when 'superscript'
+ # set formatting for Superscript to true and Subscript to false ... Can't have both
+ xml_elems[:sup] = true
+ xml_elems[:sub] = false
+ end
+ end
+ end
+ when 't'
+ str << create_html(elem.content, xml_elems)
+ end
+ end
+ str
+ end # extract_html_r
+
+ # This will return an html string
+ def create_html(text, formatting)
+ tmp_str = ''
+ formatting.each do |elem, val|
+ tmp_str << "<#{elem}>" if val
+ end
+ tmp_str << text
+ reverse_format = Hash[formatting.to_a.reverse]
+ reverse_format.each do |elem, val|
+ tmp_str << "</#{elem}>" if val
+ end
+ tmp_str
+ end
+ end # class SharedStrings < Excelx::Extractor
+ end # class Excelx
+end # module Roo