Skip to content

Commit 990de9c

Browse files
committed
Allow access to other XML docs in docx file like the headers and footers
1 parent 8c40b4c commit 990de9c

File tree

3 files changed

+67
-6
lines changed

3 files changed

+67
-6
lines changed

lib/docx/document.rb

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -18,15 +18,24 @@ module Docx
1818
# puts d.text
1919
# end
2020
class Document
21-
attr_reader :xml, :doc, :zip, :styles
21+
22+
# A path with * indicates that there are possibly multiple documents
23+
# matching that glob, eg. word/header1.xml, word/header2.xml
24+
DOCUMENT_PATHS = {
25+
doc: "word/document.xml",
26+
styles: "word/styles.xml",
27+
headers: "word/header*.xml",
28+
footers: "word/footer*.xml",
29+
numbering: "word/numbering.xml"
30+
}
31+
32+
attr_reader :xml, :doc, :zip, :styles, :headers, :footers, :numbering
2233

2334
def initialize(path, &block)
2435
@replace = {}
2536
@zip = Zip::File.open(path)
26-
@document_xml = @zip.read('word/document.xml')
27-
@doc = Nokogiri::XML(@document_xml)
28-
@styles_xml = @zip.read('word/styles.xml')
29-
@styles = Nokogiri::XML(@styles_xml)
37+
extract_documents
38+
3039
if block_given?
3140
yield self
3241
@zip.close
@@ -123,13 +132,49 @@ def replace_entry(entry_path, file_contents)
123132

124133
private
125134

135+
def extract_documents
136+
DOCUMENT_PATHS.each do |attr_name, path|
137+
if path.match /\*/
138+
extract_multiple_documents_from_globbed_path(attr_name, path)
139+
else
140+
extract_single_document_from_path(attr_name, path)
141+
end
142+
end
143+
end
144+
145+
def extract_single_document_from_path(attr_name, path)
146+
if @zip.find_entry(path)
147+
xml_doc = @zip.read(path)
148+
self.instance_variable_set(:"@#{attr_name}", Nokogiri::XML(xml_doc))
149+
end
150+
end
151+
152+
def extract_multiple_documents_from_globbed_path(hash_attr_name, glob_path)
153+
files = @zip.glob(glob_path).map { |h| h.name }
154+
filename_and_contents_pairs = files.map do |file|
155+
simple_file_name = file.sub(/^word\//, "").sub(/\.xml$/, "")
156+
[simple_file_name, Nokogiri::XML(@zip.read(file))]
157+
end
158+
hash = Hash[filename_and_contents_pairs]
159+
self.instance_variable_set(:"@#{hash_attr_name}", hash)
160+
end
161+
126162
#--
127163
# TODO: Flesh this out to be compatible with other files
128164
# TODO: Method to set flag on files that have been edited, probably by inserting something at the
129165
# end of methods that make edits?
130166
#++
131167
def update
132-
replace_entry "word/document.xml", doc.serialize(:save_with => 0)
168+
DOCUMENT_PATHS.each do |attr_name, path|
169+
if path.match /\*/
170+
self.instance_variable_get("@#{attr_name}").each do |simple_file_name, contents|
171+
replace_entry("word/#{simple_file_name}.xml", contents.serialize(:save_with => 0))
172+
end
173+
else
174+
xml_document = self.instance_variable_get("@#{attr_name}")
175+
replace_entry path, xml_document.serialize(:save_with => 0) if xml_document
176+
end
177+
end
133178
end
134179

135180
# generate Elements::Containers::Paragraph from paragraph XML node

spec/docx/document_spec.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,22 @@
279279
end
280280
end
281281

282+
describe 'multiple documents' do
283+
before do
284+
@doc = Docx::Document.open(@fixtures_path + '/multi_doc.docx')
285+
end
286+
287+
it 'should extract all inner documents' do
288+
expect(@doc.doc).to_not be_nil
289+
expect(@doc.styles).to_not be_nil
290+
expect(@doc.headers).to_not be_nil
291+
expect(@doc.headers["header1"].text).to eq "Hello from the header."
292+
expect(@doc.footers).to_not be_nil
293+
expect(@doc.footers["footer1"].text).to eq "Hello from the footer."
294+
expect(@doc.numbering).to_not be_nil
295+
end
296+
end
297+
282298
describe 'saving' do
283299
before do
284300
@doc = Docx::Document.open(@fixtures_path + '/saving.docx')

spec/fixtures/multi_doc.docx

6.13 KB
Binary file not shown.

0 commit comments

Comments
 (0)