2011-02-11 105 views
1

我有两个选择这个插件。preg_match帮助互斥条件匹配

(1)nofollow的内容所有外部链接

和/或

(2)没有后续的联系到该目标文件夹(进入绝对URL到目标文件夹)

在选项2,链接可以是内部或外部的。

可以设置两个选项,既不可以设置选项,也可以设置单个选项。

if(get_option('my_nofollow') || get_option('my_nofollow_folder')){add_filter('wp_insert_post_data', 'save_my_nofollow');} 

因此,当设置这些选项中的任何一个时,我将设置一个筛选器,以下面的函数。我的问题是,如何修改函数,以便如果设置(2)但不是(1)我只将nofollow添加到与目标文件夹URL相匹配的链接?

function save_my_nofollow($content) { 
$my_folder = get_option('my_nofollow_folder'); 
$matches = array(); 
    preg_match_all('~<a.*>~isU',$content["post_content"],$matches); 
    for ($i = 0; $i <= sizeof($matches[0]); $i++){ 
     if (isset($matches[0][$i]) && (preg_match('~' . $my_folder . '~', $matches[0][$i]) 
       || !preg_match('~'.get_bloginfo('url').'~',$matches[0][$i]))){ 
     $result = trim($matches[0][$i],">"); 
     $result .= ' rel="nofollow">'; 
     $content["post_content"] = str_replace($matches[0][$i], $result, $content["post_content"]); 
     } 
    } 
    return $content; 
} 

更新的代码与最佳答案:

if(get_option('rseo_nofollow') 
    || get_option('rseo_nofollow_folder')){ 
    add_filter('wp_insert_post_data', 'save_rseo_nofollow'); 
    } 

function save_rseo_nofollow($content) { 
    $folder = get_option('rseo_nofollow_folder'); 
    $externalNoFollow = get_option('rseo_nofollow_external'); 
    $folderNoFollow = get_option('rseo_nofollow_folder'); 
    $extRegex = '~'.preg_quote(get_bloginfo('url'), '~') . '~i'; 
    $intRegex = '~'.preg_quote($folder, '~') . '~i'; 

    $dom = new DomDocument(); 
    libxml_use_internal_errors(true); 
    $dom->loadXml('<root>' . $content['post_content'] . '</root>'); 

    $links = $dom->getElementsByTagName('a'); 
    foreach ($links as $link) { 
     $href = $link->getAttribute('href'); 
     if ($href && $externalNoFollow && !preg_match($extRegex, $href)) { 
      $link->setAttribute('rel', 'nofollow'); 
     } elseif ($href && $folderNoFollow && preg_match($intRegex, $href)) { 
      $link->setAttribute('rel', 'nofollow'); 
     } 
    } 
// print $dom->saveXml();die; 
    //Since we want to strip the root element, we must do so: 
    $newContent = ''; 
    $root = $dom->getElementsByTagName('root')->item(0); 
    foreach ($root->childNodes as $child) { 
     $newContent .= $dom->saveXml($child); 
    } 
    $content['post_content'] = $newContent; 
return $content; 
} 

输入

This is the <a href="http://cnn.com">test</a>. This is the test. 

输出

This is the <a rel="nofollow" href="&quot;http://cnn.com&quot;">test</a>. This is the test. 

回答

1

不要用正则表达式解析HTML。这不是一个好主意......而是使用Dom功能。请注意,您可能需要包装中的内容外根标签(我加<root>这里)(。

$externalNoFollow = get_option('my_nofollow_external'); 
$folderNoFollow = get_option('my_nofollow_folder'); 
$extRegex = '~'.preg_quote(get_bloginfo('url'), '~') . '~i'; 
$intRegex = '~'.preg_quote($folder, '~') . '~i'; 

$dom = new DomDocument(); 
libxml_use_internal_errors(true); 
if (!$dom->loadHtml('<html><body>' . $content['post_content'] . '</body></html>')) { 
    /** Error out, since the loading failed. 
     Make sure `$content['post_content']` is valid html 
    **/ 
    die('Invalid HTML detected'); 
} 

$links = $dom->getElementsByTagName('a'); 
foreach ($links as $link) { 
    $href = $link->getAttribute('href'); 
    if ($href && $externalNoFollow && !preg_match($extRegex, $href)) { 
     $link->setAttribute('rel', 'nofollow'); 
    } elseif ($href && $folderNoFollow && preg_match($intRegex, $href)) { 
     $link->setAttribute('rel', 'nofollow'); 
    } 
} 
//Since we want to strip the root element, we must do so: 
$newContent = ''; 
$root = $dom->getElementsByTagName('body')->item(0); 
foreach ($root->childNodes as $child) { 
    $newContent .= $dom->saveXml($child); 
} 

$content['post_content'] = $newContent; 
return $content; 

注意,你应该增加实际的错误处理柜面无效的HTML的...

+0

@ ircmaxell:哇!现在检查出来,会回报:) – 2011-02-11 18:48:46