Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Examples #36

Draft
wants to merge 14 commits into
base: main
Choose a base branch
from
Draft
4 changes: 3 additions & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@
],
"require": {
"php": "^7.1 || ~8.0.0 || ~8.1.0 || ~8.2.0 || ~8.3.0",
"masterminds/html5": "^2.0"
"masterminds/html5": "^2.0",
"ext-dom": "*",
"ext-simplexml": "*"
},
"autoload": {
"psr-4": {
Expand Down
28 changes: 0 additions & 28 deletions examples/at_a_glance.php

This file was deleted.

File renamed without changes.
2,035 changes: 2,035 additions & 0 deletions examples/basic-docx-parser/example.xml

Large diffs are not rendered by default.

121 changes: 121 additions & 0 deletions examples/basic-docx-parser/index.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
<?php
/**
* DocX Parser
*
* For namespaces use | instead of :
*
*
* @author Emily Brand
* @license LGPL The GNU Lesser GPL (LGPL) or an MIT-like license.
* @see http://www.urbandictionary.com/
*/

use QueryPath\CSS\ParseException;
use QueryPath\DOMQuery;
use QueryPath\Exception;

require_once __DIR__ . '/../../vendor/autoload.php';

echo '<h1>Create a Basic Docx Parser</h1>';

echo '<p>This example parses a .docx file, traverse the nodes and displays the text with basic formatting. The contents of the example.xml file is the data extracted from the .docx file that QueryPath processes.</p>';

echo '<h2>Content of example.docx file...</h2>';

try {
// Try load the test.docx file, parse for text nodes and output with basic formatting
foreach (qp(docx2text('example.docx'), 'w|p') as $qp) {
/** @var $qp DOMQuery */
/** @var $qr DOMQuery */
foreach ($qp->find('w|r') as $qr) {
echo format($qr);
}

echo '<br />';
}
} catch (Exception $e) {
die($e->getMessage());
}

/**
* Get the node text and apply basic formatting, if necessary
*
* @param DOMQuery $qp
*
* @return string
* @throws ParseException
* @throws Exception
*/
function format(DOMQuery $qp): string
{
$text = $qp->find('w|t')->text() . ' ';

$text = checkUnderline($qp) ? sprintf('<u>%s</u>', $text) : $text;
$text = checkBold($qp) ? sprintf('<b>%s</b>', $text) : $text;

return $text;
}

/**
* Look for the <w:rPr><w:b></w:rPr> node to determine if the text is bolded
*
* @param DOMQuery $qp
*
* @return bool
* @throws ParseException
* @throws Exception
*/
function checkBold(DOMQuery $qp): bool
{
return (bool) $qp->children('w|rPr')
->children('w|b')
->count();
}

/**
* Look for the <w:rPr><w:u></w:rPr> node to determine if the text is underlined
*
* @param DOMQuery $qp
*
* @return bool
* @throws ParseException
* @throws Exception
*/
function checkUnderline(DOMQuery $qp): bool
{
return (bool) $qp->children('w|rPr')
->children('w|u')
->count();
}

/**
* Extract the text from a docx file
*
* @param string $archiveFile The path to the .docx file to extract information from
* @return string
*/
function docx2text(string $archiveFile): string
{
$dataFile = 'word/document.xml';

if (!class_exists('ZipArchive', false)) {
throw new RuntimeException('ZipArchive extension must be enabled to parse .docx files');
}

$zip = new ZipArchive();
// Open received archive file
if (true !== $zip->open($archiveFile)) {
throw new RuntimeException('Could not open the file using ZipArchive: ' . $zip->getStatusString());
}

$data = '';
// Search for the docx data file
if (($index = $zip->locateName($dataFile)) !== false) {
$data = $zip->getFromIndex($index);
}

// Close zip to prevent memory leak
$zip->close();

return $data;
}
202 changes: 202 additions & 0 deletions examples/basic-manipulation-filter-and-retrieval/index.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
<?php

require_once __DIR__ . '/../../vendor/autoload.php';

//TODO - consider writeHTML() or writeXML() instead of html() / xml()

/*
* HTML Example
*/
$html = <<<EOF
<table>
<tr id="row1">
<td>one</td>
<td>two</td>
<td>three</td>
</tr>

<tr id="row2">
<td>four</td>
<td>five</td>
<td>six</td>
</tr>
</table>
EOF;

/*
* XML Example
*/
$xml = <<<EOF
<?xml version="1.0"?>
<categories>
<category name="DOM">
<desc>This is the DOM description...</desc>
</category>

<category name="Traversing">
<desc>This is the Traversing description...</desc>
</category>

<category name="Filtering">
<desc>This is the Filtering description...</desc>
</category>

<category name="Selectors">
<desc>This is the Selectors description...</desc>
</category>
</categories>
EOF;

try {
echo '<h1>Basic HTML Usage</h1>';
echo 'The following HTML chunk will get parsed, traverse, filtered, and manipulated:';
echo '<pre><code>' . htmlspecialchars($html) . '</code></pre>';

echo '<h2>Example 1</h2>';
echo 'Add the attribute <code>class="cell"</code> to all <code>&lt;td&gt;</code> elements:';

echo '<pre><code>&lt;?php

echo html5qp($html, "td")
-&gt;attr("class", "cell")
-&gt;parents("table")
-&gt;html()
</code></pre>';

echo 'This will output the following HTML:';

echo '<pre><code>';

echo htmlspecialchars(
html5qp($html, 'td')
->attr('class', 'cell')
->parents('table') // traverse up the DOM until we match the table
->html() // get the HTML of the table
);

echo '</code></pre>';

echo 'If you want to output a valid HTML document, replace <code>parents(\'table\')</code> with <code>top()</code>:';

echo '<pre><code>';

echo htmlspecialchars(
html5qp($html, 'td')
->attr('class', 'cell')
->top()
->html()
);

echo '</code></pre>';

echo '<h2>Example 2</h2>';
echo 'Find and output the text of the second cell in the second row of the table:';

$text = html5qp($html)
->find('#row2 > td:nth-child(2)')
->text();

echo '<pre><code>&lt;?php

echo html5qp($html)
-&gt;find("#row2 > td:nth-child(2)")
-&gt;text();

// Result: '. $text. '
</code></pre>';

echo '<h2>Example 3</h2>';
echo 'Append an additional row at the end of the table:';
echo '<pre><code>&lt;?php

echo html5qp($html, "td")
-&gt;after("&lt;tr&gt;&lt;td&gt;seven&lt;/td&gt;&lt;td&gt;eight&lt;/td&gt;&lt;td&gt;nine&lt;/td&gt;&lt;/tr&gt;")
-&gt;parents("table") // traverse up the DOM until we match the table
-&gt;html()
</code></pre>';

echo 'This will output the following HTML:';

echo '<code><pre>';

echo htmlspecialchars(
html5qp($html, 'tr:last')
->after("\n\n\t<tr>\n\t\t<td>seven</td>\n\t\t<td>eight</td>\n\t\t<td>nine</td>\n\t</tr>")
->parents('table')
->html()
);

echo '</pre></code>';

echo '<h1>Basic XML Usage</h1>';
echo 'The following XML will get parsed, traverse, filtered, and manipulated:';
echo '<pre><code>' . htmlspecialchars($xml) . '</code></pre>';

echo '<h2>Example 1</h2>';
echo 'Add the attribute <code>class="item"</code> to all <code>&lt;desc&gt;</code> elements:';

echo '<pre><code>&lt;?php

echo qp($xml, "desc")
-&gt;attr("class", "item)
-&gt;top() // return to the root node (&lt;categories&gt;)
-&gt;xml(); // output a valid XML document.
</code></pre>';

echo 'This will output the following XML:';

echo '<pre><code>';

echo htmlspecialchars(
qp($xml, 'desc')
->attr('class', 'item')
->top() // return to the root node
->xml() // output a valid XML document
);

echo '</code></pre>';

echo 'You can omit the XML declaration by setting the first argument to true: <code>-&gt;xml(true)</code>.';

echo '<h2>Example 2</h2>';
echo 'Find and output the text of the third <code>&lt;desc&gt;</code> tag:';

$text = qp($xml)
->find('categories > category:nth-child(3) desc')
->text();

echo '<pre><code>&lt;?php

echo qp($xml)
-&gt;find("categories > category:nth-child(3) desc")
-&gt;text();

// Result: '.$text.'
</code></pre>';

echo '<h2>Example 3</h2>';
echo 'Append a category at the end of the group:';
echo '<pre><code>&lt;?php

echo qp($xml, "category:last")
-&gt;after("&lt;category name=\'Appended\'&gt;&lt;desc&gt;The appended node...&lt;/desc&gt;&lt;/category&gt;")
-&gt;top()
-&gt;xml()
</code></pre>';

echo 'This will output the following HTML:';

echo '<code><pre>';

echo htmlspecialchars(
qp($xml, 'category:last')
->after("\n\n\t<category name=\"Appended\">\n\t\t<desc>The appended node...</desc>\n\t</category>")
->top()
->xml()
);

echo '</pre></code>';
} catch (\QueryPath\Exception $e) {
// Handle QueryPath exceptions
die($e->getMessage());
}
Loading
Loading