2010-07-20 55 views
3

除了好玩之外,我没有任何理由今天实施了Trie。目前它支持add()和search(),remove()也应该被实现,但我认为这很简单。优化Trie实施

它功能完善,但填充数据需要一点点太多,我的口味。我使用这个列表作为数据源:http://www.isc.ro/lists/twl06.zip(在SO的其他地方找到)。加载需要大约11秒。我最初的实施花了15秒左右,所以我已经给它一个很好的性能提升,但我仍然不满意:)

我的问题是:还有什么可以给我一个(大幅)的性能提升?我不受这种设计的约束,可以接受彻底的检修。

class Trie 
{ 
    private $trie; 
    public function __construct(TrieNode $trie = null) 
    { 
     if($trie !== null) $this->trie = $trie; 
     else $this->trie = new TrieNode(); 
     $this->counter = 0; 
    } 
    public function add($value, $val = null) 
    { 
     $str = ''; 
     $trie_ref = $this->trie; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      $trie_ref = $trie_ref->addNode($str); 
     } 
     $trie_ref->value = $val; 
     return true; 
    } 
    public function search($value, $only_words = false) 
    { 
     if($value === '') return $this->trie; 
     $trie_ref = $this->trie; 
     $str = ''; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      if($trie_ref = $trie_ref->getNode($str)) 
      { 
       if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref)); 
       continue; 
      } 
      return false; 
     } 
     return false; 
    } 
    public function extractWords(TrieNode $trie) 
    { 
     $res = array(); 
     foreach($trie->getChildren() as $child) 
     { 
      if($child->value !== null) $res[] = $child->value; 
      if($child->hasChildren()) $res = array_merge($res, $this->extractWords($child)); 
     } 
     return $res; 
    } 
} 
class TrieNode 
{ 
    public $value; 
    protected $children = array(); 
    public function addNode($index) 
    { 
     if(isset($this->children[$index])) return $this->children[$index]; 
     return $this->children[$index] = new self(); 
    } 
    public function getNode($index) 
    { 
     return (isset($this->children[$index]) ? $this->children[$index] : false); 
    } 
    public function getChildren() 
    { 
     return $this->children; 
    } 
    public function hasChildren() 
    { 
     return count($this->children)>0; 
    } 
} 
+0

您是否已经成型使用[XHProf的](http://pecl.php.net/package/xhprof)的代码或[Xdebug的](HTTP:// PECL .php.net /包/ Xdebug的)? – Charles 2010-07-20 17:38:03

+0

我还没有,很好的电话。我明天会! – 2010-07-20 18:55:59

回答

3

不知道PHP,但

在下面的方法:

public function add($value, $val = null) 
    { 
     $str = ''; 
     $trie_ref = $this->trie; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      $trie_ref = $trie_ref->addNode($str); 
     } 
     $trie_ref->value = $val; 
     return true; 
    } 
    public function search($value, $only_words = false) 
    { 
     if($value === '') return $this->trie; 
     $trie_ref = $this->trie; 
     $str = ''; 
     foreach(str_split($value) as $char) 
     { 
      $str .= $char; 
      if($trie_ref = $trie_ref->getNode($str)) 
      { 
       if($str === $value) return ($only_words ? $this->extractWords($trie_ref) : new self($trie_ref)); 
       continue; 
      } 
      return false; 
     } 
     return false; 
    } 

为什么你甚至不需要$str .= $char(我想是附加)?这本身会改变你的O(n)时间加法/搜索到Omega(n^2)(n的长度为$value)而不是O(n)。

在线索中,通常在走绳时走线,即找到基于当前字符的下一个节点,而不是当前前缀。

+0

好点,绝对没有必要。但会增加速度吗?但内存肯定会更容易。无论如何,我会明天实施并在此发布结果。 – 2010-07-20 18:59:37

+0

@ Dennis:是的,IMO。这实际上是尝试可以如此快速的主要原因之一 – 2010-07-20 20:36:12

+0

@Dennis:我很好奇你要发布的结果:-) – 2010-08-09 21:00:22

1

我想这个实现是针对键值类型的插入和查找?这是处理[英文]单词的单词。

class Trie { 


static function insert_word(Node $root, $text) 
{ 
    $v = $root; 
    foreach(str_split($text) as $char) { 
    $next = $v->children[$char]; 
     if ($next === null) 
     { 
      $v->children[$char] = $next = new Node(); 
     } 
     $v = $next; 
    } 

    $v->leaf = true; 
} 


static function get_words_sorted(Node $node, $text) 
{ 

    $res = array(); 
    for($ch = 0; $ch < 128; $ch++) { 
    $child = $node->children[chr($ch)]; 

     if ($child !== null) 
     { 
      $res = array_merge($res, Trie::get_words_sorted($child, $text . chr($ch))); 

     } 
    } 
    if ($node->leaf === true) 
    { 
     $res[] = $text; 
    } 
    return $res; 

} 

static function search(Node $root, $text) 
{ 
    $v = $root; 
    while($v !== null) 
    { 
     foreach(str_split($text) as $char) { 
      $next = $v->children[$char]; 
      if ($next === null) 
      { 
       return false; 
      } 
      else 
      { 
       $v = $next; 
      } 
     } 

     if($v->leaf === true) 
     { 
      return true; 
     } 
     else 
     { 
      return false; 
     } 

    } 
    return false; 

} 

} 


class Node { 

    public $children; 
    public $leaf; 


    function __construct() 
    { 
     $children = Array(); 

    } 
} 

示例用法

$root = new Node(); 
    $words = Array("an", "ant", "all", "allot", "alloy", "aloe", "are", "ate", "be"); 


    for ($i = 0; $i < sizeof($words); $i++) 
    { 

     Trie::insert_word($root, $words[$i]); 
    } 

    $search_words = array("alloy", "ant", "bee", "aren't", "allot"); 

    foreach($search_words as $word) 
    { 
     if(Trie::search($root, $word) === true) 
     { 
      echo $word . " IS in my dictionary<br/>"; 
     } 
     else 
     { 
      echo $word . " is NOT in my dictionary <br/>"; 
     } 
    }