2017-03-09 84 views
0

我正在为Google购物制作一个xml文档,并且遇到了产品说明的编码问题。我用str_replace尝试了不同的方法,并转义了一些字符,utf8_encodeiconv和一些自定义函数,但都产生了编码错误。然后我发现有人使用createTextNode,这似乎为我工作,我没有任何错误。使用命名空间的PHP DOMDocument createTextNode

我现在唯一的问题是,我无法在正确的命名空间中得到createTextNode(我希望我说的是正确的方式)。

这是工作,但未能对一些产品说明,由于编码问题的代码:

$addProduct->appendChild($domtree->createElementNS($xmlns['atom'], 'description', 'test content')); 

这造成右行:

<description>test content</description> 

现在我想用createTextNode代码,但无法使用正确的标签进行操作。这工作:

$addProduct->appendChild($domtree->createTextNode('test content')); 

但这只是把内容在我的主入口,而它需要的说明标签之间。

我怎样才能把它放在描述标签中? 或者,如果您知道在使用旧代码时解决编码问题的好方法,那也可以。


这里是我使用的全部代码:

function function_xml_entities($text = null, $charset = 'ISO-8859-1'){ 
    $text = htmlentities($text, ENT_COMPAT, $charset, false); 
    $arr_xml_special_char = array("&quot;","&amp;","&apos;","&lt;","&gt;"); 
    $arr_xml_special_char_regex = "(?"; 
    foreach($arr_xml_special_char as $key => $value){ 
     $arr_xml_special_char_regex .= "(?!$value)"; 
    } 
    $arr_xml_special_char_regex .= ")"; 
    $pattern = "/$arr_xml_special_char_regex&([a-zA-Z0-9]+;)/"; 
    $replacement = '&amp;${1}'; 
    return preg_replace($pattern, $replacement, $text); 
} 

function function_html2text($html = null){ 
    $tags = array (
     0 => '~<h[123][^>]+>~si', 
     1 => '~<h[456][^>]+>~si', 
     2 => '~<table[^>]+>~si', 
     3 => '~<tr[^>]+>~si', 
     4 => '~<li[^>]+>~si', 
     5 => '~<br[^>]+>~si', 
     6 => '~<p[^>]+>~si', 
     7 => '~<div[^>]+>~si', 
    ); 
    $html = preg_replace($tags,"\n",$html); 
    $html = preg_replace('~</t(d|h)>\s*<t(d|h)[^>]+>~si',' - ',$html); 
    $html = preg_replace('~<[^>]+>~s','',$html); 
    // reducing spaces 
    $html = preg_replace('~ +~s',' ',$html); 
    $html = preg_replace('~^\s+~m','',$html); 
    $html = preg_replace('~\s+$~m','',$html); 
    // reducing newlines 
    $html = preg_replace('~\n+~s',"\n",$html); 
    return $html; 
} 

$sql_products = "QUERY WHICH IS NOT RELEVANT"; 
$result_products = mysql_query($sql_products); 

//create a dom document with encoding utf8 
$domtree = new DOMDocument('1.0', 'UTF-8'); 

//create the root element of the xml tree 
$xmlRoot = $domtree->createElement("feed"); 
$xmlRoot = $domtree->appendChild($xmlRoot); 

//create a dom document with encoding utf8 
$domtree = new DOMDocument('1.0', 'UTF-8'); 

//create the root element of the xml tree 
$xmlns = array('atom' => 'http://www.w3.org/2005/Atom','g' =>'http://base.google.com/ns/1.0'); 
$xmlRoot = $domtree->appendChild($domtree->createElementNS($xmlns['atom'], 'feed')); 
$xmlRoot->setAttributeNS($xmlns['g'], 'g:dummy', ''); //add a dummy attribute to add the google namespace to the document element 
$xmlRoot->removeAttribute('g:dummy'); //remove dummy attribute 

//Standard things like title 
$xmlRoot->appendChild($domtree->createElement('title', 'title')); 
$link = $xmlRoot->appendChild($domtree->createElement('link')); 
$link->setAttribute('rel', 'self'); 
$link->setAttribute('href', $global_websitenaam_include); 
$xmlRoot->appendChild($domtree->createElement('updated', date('Y-m-d H:i:s'))); 
$addAuthor = $xmlRoot->appendChild($domtree->createElement("author")); 
    $addAuthor->appendChild($domtree->createElement('name', 'author name')); 
$xmlRoot->appendChild($domtree->createElement('id', 'tag:website.com,'.date('Y-m-d'))); 

//Producten doorlopen 
while($product = mysql_fetch_assoc($result_products)){ 
    //HERE ARE OTHER QUERIES AND DEFINING VARIABLES WHICH AREN'T RELEVANT TO THE CODE 
    $product_content = function_xml_entities(substr_replace(str_replace('&nbsp;',' ', function_html2text($product['content'])), "", 5000)); 

    // create the products 
    $addProduct = $xmlRoot->appendChild($domtree->createElementNS($xmlns['atom'], "entry")); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['atom'], 'id', $product['id'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['atom'], 'title', substr_replace($product['name'], "", 150))); 
    $linkProd = $addProduct->appendChild($domtree->createElement('link')); 
     $linkProd->setAttribute('href', $global_websitenaam_include.'/'.rawurlencode($product['category_slug']).'/'.rawurlencode($product['slug'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:price', number_format($product_price, 2, ',', '.'))); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:condition', $condition_product)); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:brand', substr_replace($product['manufacturer_name'], "", 70))); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:mpn', $product['typenumber'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:ean', $product['ean'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:image_link', $product_image)); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:product_type', 'Huis &amp; Tuin &gt; '.$parentcategory_name.$product['category_name'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:availability', $product_stock)); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:manufacturer', $product['supplier_name'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:weight', $product['weight'])); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:featured_product', $product_advertisement)); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['g'], 'g:size', $product['size'])); 
    $addProductShipping = $addProduct->appendChild($domtree->createElement("g:shipping")); 
     $addProductShipping->appendChild($domtree->createElement('g:country', 'NL')); 
     $addProductShipping->appendChild($domtree->createElement('g:service', 'Standaard')); 
     $addProductShipping->appendChild($domtree->createElement('g:price', number_format($shipment_price, 2, ',', '.'))); 
    $addProduct->appendChild($domtree->createElementNS($xmlns['atom'], 'description', $product_content)); 

    //$addProduct->appendChild($domtree->createTextNode($product_content)); 
} 

//get the xml printed 
header("content-type: text/xml; charset: utf-8"); 
$domtree->formatOutput = true; 
echo $domtree->saveXML();  

回答

1

性格节点节点没有命名空间。这里有两种类型。文本节点,它使用特殊的语法对XML特殊字符和CDATA部分进行编码。两者都可用于atom:descriptionatom:summary。预期内容(针对Atom解析器)取决于type属性。

默认值只是text,html意味着它期望一个HTML片段编码为文本,xhtml是XHTML命名空间中的子节点。

您不应使用createElement()/createElementNS()的content参数或设置$ nodeValue属性,除非您确定此处的值(空字符串,整数,...)中没有特殊字符。他们使用破碎的编码。使用DOMDocument::createTextNode()DOMDocument::createCDATASection()创建字符节点。

这里是一个小例子:

$xmlns = [ 
    'atom' => 'http://www.w3.org/2005/Atom' 
]; 
$htmlFragment = '<div>Description HTML Fragment</div>'; 

$document = new DOMDocument(); 
$entry = $document->appendChild(
    $document->createElementNS($xmlns['atom'], 'entry') 
); 
$summary = $entry->appendChild(
    $document->createElementNS($xmlns['atom'], 'summary') 
); 
$summary->setAttribute('type', 'text'); 
$summary->appendChild(
    $document->createTextNode('Summary Text') 
); 
$description = $entry->appendChild(
    $document->createElementNS($xmlns['atom'], 'description') 
); 
$description->setAttribute('type', 'html'); 
$description->appendChild(
    $document->createCDATASection($htmlFragment) 
); 

$document->formatOutput = TRUE; 
echo $document->saveXml(); 

输出:

<?xml version="1.0"?> 
<entry xmlns="http://www.w3.org/2005/Atom"> 
    <summary type="text">Summary Text</summary> 
    <description type="html"><![CDATA[<div>Description HTML Fragment</div>]]></description> 
</entry> 
+0

感谢精心答案!它帮助我更好地理解了文档结构。 – Femke