2011-11-03 169 views
3

我正在尝试使用Levenshtein距离函数来比较目录中所有其他文档中指定文档的文本。计算Levenshtein距离

我有一个基本的想法,但我不知道如何在PHP中编写代码。我来自C#背景,所以我会尽可能详细地提供。

class ComputeLevenshtein 
{ 
    public $filePathList = new Array(); //The array that stores the absolute path of all documents within a specified directory 
    public $directory; 
    public $filePath; //This is the document that will be compared for each document in a directory 

    public function __construct() { 
     $this->directory = //; 
     /* I'm stuck here, once a user registers, a separate directory is 
      named after the user. I need to be able to read the username 
      from the Session Variable once the user logs in. 
      I'll just have to pass it in as a parameter. 
      Do I have to create a session wrapper? 
      If it's too complex, 
      then I'll just start off with a static directory */ 
    } 

     // Returns the array containing each filePath for every document in a directory. 
     function computeFilePathList($directory) 
     { 
      for each file in Directory 
      { 
      $filepath = file.FilePath(); //store the filepath in a variable 
      $this->filePathList.add($filePath) //add the filepath to the array 
      } 

     } 

     function ($docFilePath) // returns the Levenshtein Distance 
     { 

      for each path in filePathList 
      { 
       $input= readDoc($docFilePath); 
       $lev = levenshtein($input, readDoc($path)); 
      } 

      return $lev; 
     } 

    function readDoc($docFilePath) // Returns the raw text of that doc 
    { 
     //I Have the code for reading the doc in a seperate function 
     return $text; 
    } 
} 
+4

你重新实现一些PHP已经有存在的理由? [php levenshtein](http://php.net/manual/en/function.levenshtein.php) – birryree

+3

我不是重新实现levenshtein,我只是用它来比较文档中的原始文本和列表中的其他文档文件在目录中。 – user478636

+0

写一个会话包装是恕我直言的方式去。你可以通过创建一个名为User的类来完成。 – greg0ire

回答

1

如何:

class Levenshtein 
{ 
    private $_p = array(); 

    public function __construct($input, $compare) 
    { 
     $this->_p['input'] = $input; 
     $this->_p['compare'] = $compare; // string to check against 
    } 

    public function __get($property) 
    { 
     if (array_key_exists($property, $this->_p)) { 
      return $this->_p[$property]; 
     } 

     if (!isset($this->_p['dist']) && $property === 'dist') { 
      $this->_p['dist'] = levenshtein($this->_p['input'], 
              $this->_p['compare']); 
      return $this->_p['dist']; 
     } 
    } 
} 

class DirectoryLevenshtein 
{ 
    private $_directory; 
    private $_filePath; 
    private $_distances = array(); 

    public function __construct($directoryPath, $filePath = null) 
    { 
     if (!is_dir($directoryPath)) { 
      throw new Exception("Path '$directoryPath' does not exist"); 
     } 

     if (substr($directoryPath, -1) !== '/') { 
      $directoryPath .= '/'; 
     } 

     $this->_directory = $directoryPath; 

     if ($filePath !== null) { 
      if (!$this->setFilePath($filePath)) { 
       throw new Exception("File '$filePath' is not readable"); 
      } 
     } 
    } 

    public function __get($file) 
    { 
     if (array_key_exists($file, $this->_distances)) { 
      return $this->_distances[$file]; 
     } 

     if (is_readable($this->_directory . $file)) { 
      if (empty($this->_filePath)) { 
       return null; 
      } 

      $input = file_get_contents($this->_filePath); 
      $compare = file_get_contents($this->_directory . $file); 
      $this->_distances[$file] = new Levenshtein($input, $compare); 
      return $this->_distances[$file]; 
     } 
    } 

    public function getDirectoryContents() 
    { 
     $files = scandir($this->_directory); 

     while ($files[0] === '.' || $files[0] === '..') { 
      array_shift($files); 
     } 

     return $files; 
    } 

    public function setFilePath($filePath) 
    { 
     if (empty($this->_filePath) && is_readable($filePath)) { 
      $this->_filePath = $filePath; 
      return true; 
     } 

     return false; 
    } 
} 

要使用它做一些类似如下:

// could user session wrapper instead 
$userDir = '/path/to/user/dirs/' . $_SESSION['user']; 
// file to compare all files with 
$filePath = /path/to/file.txt 

$dirLev = new DirectoryLevenshtein($userDir, $filePath); 

// Files in directory 
$files = $dirLev->getDirectoryContents(); 

// Distances 
foreach ($files as $file) { 
    echo "$file: {$dirLev->file->dist}\n"; 
}