diff --git a/src/DiDom/Document.php b/src/DiDom/Document.php index 4d202bc..53be63b 100644 --- a/src/DiDom/Document.php +++ b/src/DiDom/Document.php @@ -15,29 +15,35 @@ class Document */ protected $document; + /** + * @var string + */ + protected $type; + /** * Constructor. * - * @param string $html HTML code or file path + * @param string $string HTML or XML string or file path * @param bool $isFile indicates that in first parameter was passed to the file path * @param string $encoding The document encoding + * @param string $type The document type */ - public function __construct($html = null, $isFile = false, $encoding = 'UTF-8') + public function __construct($string = null, $isFile = false, $encoding = 'UTF-8', $type = 'html') { - if ($html instanceof DOMDocument) { - $this->document = $html; + if ($string instanceof DOMDocument) { + $this->document = $string; return; } + if (!is_string($encoding)) { + throw new InvalidArgumentException(sprintf('%s expects parameter 3 to be string, %s given', __METHOD__, gettype($encoding))); + } + $this->document = new DOMDocument('1.0', $encoding); - if ($html !== null) { - if ($isFile) { - $this->loadHtmlFile($html); - } else { - $this->loadHtml($html); - } + if ($string !== null) { + $this->load($string, $isFile, $type); } } @@ -85,34 +91,58 @@ public function appendChild($node) } /** - * Load HTML from a string. + * Load HTML or XML. * - * @param string $html The HTML string - * - * @return \DiDom\Document - * - * @throws \InvalidArgumentException if the provided argument is not a string + * @param string $string HTML or XML string or file path + * @param bool $isFile indicates that in first parameter was passed to the file path + * @param string $type Type of document */ - public function loadHtml($html) + public function load($string, $isFile = false, $type = 'html') { - if (!is_string($html)) { - throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, %s given', __METHOD__, (is_object($html) ? get_class($html) : gettype($html)))); + if (!is_string($string)) { + throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, %s given', __METHOD__, (is_object($string) ? get_class($string) : gettype($string)))); + } + + if ($isFile) { + $string = $this->loadFile($string); + } + + if (!is_string($string) or !in_array(strtolower($type), ['xml', 'html'])) { + throw new InvalidArgumentException(sprintf('Document type must be "xml" or "html", %s given', __METHOD__, (is_object($type) ? get_class($type) : gettype($type)))); + } + + if (substr($string, 0, 5) !== '', $this->document->encoding); + $string = $prolog.$string; } - $prolog = sprintf('', $this->document->encoding); - $html = $prolog.$html; + $this->type = strtolower($type); libxml_use_internal_errors(true); libxml_disable_entity_loader(true); - $this->document->loadHtml($html); + $this->type === 'xml' ? $this->document->loadXml($string) : $this->document->loadHtml($string); libxml_clear_errors(); libxml_disable_entity_loader(false); - libxml_use_internal_errors(false); + libxml_use_internal_errors(false); - return $this; + return $this; + } + + /** + * Load HTML from a string. + * + * @param string $html The HTML string + * + * @return \DiDom\Document + * + * @throws \InvalidArgumentException if the provided argument is not a string + */ + public function loadHtml($html) + { + return $this->load($html, false, 'html'); } /** @@ -127,6 +157,41 @@ public function loadHtml($html) * @throws \RuntimeException if you are unable to load the file */ public function loadHtmlFile($filepath) + { + return $this->load($filepath, true, 'html'); + } + + /** + * Load XML from a string. + * + * @param string $xml The XML string + * + * @return \DiDom\Document + * + * @throws \InvalidArgumentException if the provided argument is not a string + */ + public function loadXml($xml) + { + return $this->load($xml, false, 'xml'); + } + + /** + * Load XML from a file. + * + * @param string $filepath The path to the XML file + * + * @return \DiDom\Document + * + * @throws \InvalidArgumentException if the file path is not a string + * @throws \RuntimeException if the file does not exist + * @throws \RuntimeException if you are unable to load the file + */ + public function loadXmlFile($filepath) + { + return $this->load($filepath, true, 'xml'); + } + + protected function loadFile($filepath) { if (!is_string($filepath)) { throw new InvalidArgumentException(sprintf('%s expects parameter 1 to be string, %s given', __METHOD__, gettype($filepath))); @@ -138,15 +203,13 @@ public function loadHtmlFile($filepath) } } - $html = file_get_contents($filepath); + $content = file_get_contents($filepath); - if ($html === false) { + if ($content === false) { throw new RuntimeException(sprintf('Could not load file %s', $filepath)); } - $this->loadHtml($html); - - return $this; + return $content; } /** @@ -215,6 +278,16 @@ public function html() return trim($this->document->saveXML($this->getElement())); } + /** + * Dumps the internal document into a string using XML formatting. + * + * @return string The document html + */ + public function xml() + { + return trim($this->document->saveXML()); + } + /** * Nicely formats output with indentation and extra space. * @@ -263,6 +336,16 @@ public function is($document) return $this->getElement()->isSameNode($element); } + /** + * Returns the type of document (XML or HTML). + * + * @return string + */ + public function getType() + { + return $this->type; + } + /** * @return \DOMDocument */ @@ -294,7 +377,7 @@ public function toElement() */ public function __toString() { - return $this->html(); + return $this->type === 'xml' ? $this->xml() : $this->html(); } /** diff --git a/src/DiDom/Element.php b/src/DiDom/Element.php index cc60c74..54cf570 100644 --- a/src/DiDom/Element.php +++ b/src/DiDom/Element.php @@ -164,6 +164,16 @@ public function html() return $this->toDocument()->html(); } + /** + * Dumps the internal document into a string using XML formatting. + * + * @return string The node xml + */ + public function xml() + { + return $this->toDocument()->xml(); + } + /** * Get the text content of this node and its descendants. * diff --git a/tests/DiDom/DocumentTest.php b/tests/DiDom/DocumentTest.php index ff9e165..8936c91 100644 --- a/tests/DiDom/DocumentTest.php +++ b/tests/DiDom/DocumentTest.php @@ -16,6 +16,14 @@ public function testConstructWithInvalidArgument() $document = new Document(array('foo')); } + /** + * @expectedException InvalidArgumentException + */ + public function testConstructWithInvalidEncoding() + { + $document = new Document(array('foo')); + } + /** * @expectedException RuntimeException */ @@ -24,6 +32,58 @@ public function testConstructWithNotExistingFile() $document = new Document('path/to/file', true); } + /** + * @expectedException InvalidArgumentException + */ + public function testConstructorWithInvalidArgumentType() + { + $document = new Document('foo', false, 'UTF-8', null); + } + + /** + * @expectedException InvalidArgumentException + */ + public function testConstructorWithInvalidDocumentType() + { + $document = new Document('foo', false, 'UTF-8', 'bar'); + } + + /** + * @expectedException InvalidArgumentException + */ + public function testLoadWithInvalidContentArgument() + { + $document = new Document(); + $document->load(null); + } + + /** + * @expectedException RuntimeException + */ + public function testLoadWithNotExistingFile() + { + $document = new Document(); + $document->load('path/to/file', true); + } + + /** + * @expectedException InvalidArgumentException + */ + public function testLoadWithInvalidDocumentTypeArgument() + { + $document = new Document(); + $document->load('foo', false, null); + } + + /** + * @expectedException InvalidArgumentException + */ + public function testLoadWithInvalidDocumentType() + { + $document = new Document(); + $document->load('foo', false, 'bar'); + } + /** * @expectedException InvalidArgumentException */ @@ -51,12 +111,41 @@ public function testLoadHtmlFileWithInvalidArgument() $document->loadHtmlFile(array('foo')); } + /** + * @expectedException InvalidArgumentException + */ + public function testLoadXmlWithInvalidArgument() + { + $document = new Document(); + $document->loadXml(null); + } + + /** + * @expectedException RuntimeException + */ + public function testLoadXmlFileWithNotExistingFile() + { + $document = new Document(); + $document->loadXmlFile('path/to/file'); + } + + /** + * @expectedException InvalidArgumentException + */ + public function testLoadXmlFileWithInvalidArgument() + { + $document = new Document(); + $document->loadXmlFile(array('foo')); + } + /** * @expectedException InvalidArgumentException */ public function testAppendChildWithInvalidArgument() { - $document = new Document(''); + $html = $this->loadFixture('posts.html'); + + $document = new Document($html); $document->appendChild(null); } @@ -184,6 +273,14 @@ public function testHtml() $this->assertTrue(is_string($document->html())); } + public function testXml() + { + $xml = $this->loadFixture('books.xml'); + $document = new Document($xml, false, 'UTF-8', 'xml'); + + $this->assertTrue(is_string($document->xml())); + } + public function testFormat() { $html = $this->loadFixture('posts.html'); @@ -204,6 +301,23 @@ public function testText() $this->assertEquals('foo', $document->text()); } + public function testGetType() + { + $document = new Document(); + + $this->assertNull($document->getType()); + + $html = $this->loadFixture('posts.html'); + $document = new Document($html); + + $this->assertEquals('html', $document->getType()); + + $xml = $this->loadFixture('books.xml'); + $document = new Document($xml, false, 'UTF-8', 'xml'); + + $this->assertEquals('xml', $document->getType()); + } + public function testIs() { $html = $this->loadFixture('posts.html'); @@ -239,7 +353,7 @@ public function testToElement() $this->assertInstanceOf('DiDom\Element', $document->toElement()); } - public function testToString() + public function testToStringHtml() { $html = $this->loadFixture('posts.html'); $document = new Document($html, false); @@ -247,6 +361,14 @@ public function testToString() $this->assertEquals($document->html(), $document->__toString()); } + public function testToStringXml() + { + $xml = $this->loadFixture('books.xml'); + $document = new Document($xml, false, 'UTF-8', 'xml'); + + $this->assertEquals($document->xml(), $document->__toString()); + } + /** * @dataProvider findTests */ diff --git a/tests/fixtures/books.xml b/tests/fixtures/books.xml new file mode 100644 index 0000000..e3d1fe8 --- /dev/null +++ b/tests/fixtures/books.xml @@ -0,0 +1,120 @@ + + + + Gambardella, Matthew + XML Developer's Guide + Computer + 44.95 + 2000-10-01 + An in-depth look at creating applications + with XML. + + + Ralls, Kim + Midnight Rain + Fantasy + 5.95 + 2000-12-16 + A former architect battles corporate zombies, + an evil sorceress, and her own childhood to become queen + of the world. + + + Corets, Eva + Maeve Ascendant + Fantasy + 5.95 + 2000-11-17 + After the collapse of a nanotechnology + society in England, the young survivors lay the + foundation for a new society. + + + Corets, Eva + Oberon's Legacy + Fantasy + 5.95 + 2001-03-10 + In post-apocalypse England, the mysterious + agent known only as Oberon helps to create a new life + for the inhabitants of London. Sequel to Maeve + Ascendant. + + + Corets, Eva + The Sundered Grail + Fantasy + 5.95 + 2001-09-10 + The two daughters of Maeve, half-sisters, + battle one another for control of England. Sequel to + Oberon's Legacy. + + + Randall, Cynthia + Lover Birds + Romance + 4.95 + 2000-09-02 + When Carla meets Paul at an ornithology + conference, tempers fly as feathers get ruffled. + + + Thurman, Paula + Splish Splash + Romance + 4.95 + 2000-11-02 + A deep sea diver finds true love twenty + thousand leagues beneath the sea. + + + Knorr, Stefan + Creepy Crawlies + Horror + 4.95 + 2000-12-06 + An anthology of horror stories about roaches, + centipedes, scorpions and other insects. + + + Kress, Peter + Paradox Lost + Science Fiction + 6.95 + 2000-11-02 + After an inadvertant trip through a Heisenberg + Uncertainty Device, James Salway discovers the problems + of being quantum. + + + O'Brien, Tim + Microsoft .NET: The Programming Bible + Computer + 36.95 + 2000-12-09 + Microsoft's .NET initiative is explored in + detail in this deep programmer's reference. + + + O'Brien, Tim + MSXML3: A Comprehensive Guide + Computer + 36.95 + 2000-12-01 + The Microsoft MSXML3 parser is covered in + detail, with attention to XML DOM interfaces, XSLT processing, + SAX and more. + + + Galos, Mike + Visual Studio 7: A Comprehensive Guide + Computer + 49.95 + 2001-04-16 + Microsoft Visual Studio 7 is explored in depth, + looking at how Visual Basic, Visual C++, C#, and ASP+ are + integrated into a comprehensive development + environment. + + \ No newline at end of file