2012-04-23 99 views
2

我正在寻找一个可视化显示html结构,字符/单词和样式差异的api。该工具还必须支持双字节字符,并且足够灵活,可以将其添加到我的现有网站以轻松显示比较结果。我目前正在使用组件软件COM实现,它不支持双字节字符,并且在大约六年内没有更新。HTML Diff工具API

回答

0

这是我用什么:

[http://code.google.com/p/google-diff-match-patch/][1]

我不得不写我自己的方法做比较,但一些工作后,它看起来很好。这个实现比较了传入的测试,所以如果你只是比较两个文本字符串,它就可以正常工作。如果你想要做的2串HTML一个比较预览这是一个有点不同

public string diff_prettyHtml(List<Diff> diffs) 
    { 
     StringBuilder html = new StringBuilder(); 
     foreach (Diff aDiff in diffs) 
     { 
      string text = aDiff.text.Replace("&", "&amp;").Replace("<", "&lt;") 
       .Replace(">", "&gt;").Replace("\n", "<br>"); 
      switch (aDiff.operation) 
      { 
       case Operation.INSERT: 
        html.Append("<ins class='diff'>").Append(text) 
         .Append("</ins>"); 
        break; 
       case Operation.DELETE: 
        html.Append("<del class='diff'>").Append(text) 
         .Append("</del>"); 
        break; 
       case Operation.EQUAL: 
        html.Append("<span>").Append(text).Append("</span>"); 
        break; 
      } 
     } 
     return html.ToString(); 
    } 

现在:我diff_prettyHtml电话变更为。这是我做过什么:

DiffMatchPatch.diff_match_patch diff = new DiffMatchPatch.diff_match_patch(); 
       List<DiffMatchPatch.Diff> differences = diff.diff_main(oldHtml, 
        newHtml); 
       return diff.diff_previewHtml(differences); 


public string diff_previewHtml(List<Diff> diffs) { 
     StringBuilder html = new StringBuilder(); 
     foreach (Diff aDiff in diffs) { 
     string text = aDiff.text; 
     switch (aDiff.operation) { 
      case Operation.INSERT: 
       html.Append("<ins class='diff'>").Append(text) 
       .Append("</ins>"); 
      break; 
      case Operation.DELETE: 
      html.Append("<del class='diff'>").Append(text) 
       .Append("</del>"); 
      break; 
      case Operation.EQUAL: 
      html.Append(text); 
      break; 
     } 
     } 
     return html.ToString(); 
    } 

Unicode的类如下:

using System.Collections; 
using System.Collections.Generic; 
using System.Text; 
using System.Text.RegularExpressions; 
using System.Linq; 

namespace HtmlCompare 
{ 
    class Unicoder 
    { 

     private Hashtable _htmlHash = new Hashtable(); 
     private const string _htmlPattern = @"<(S*?)[^>]*>.*?|<.*?\/>"; 
     private List<string> _blockElements = "img,br".Split(',').ToList<string>(); 
     private int _currentHash = 44032; 

     public string pushHash(string tag) 
     { 
      if (_htmlHash[tag] == null) 
      { 
       //_htmlHash[tag] = char.Parse("\\u" + Convert.ToString(_currentHash,16)); 
       _htmlHash[tag] = char.ConvertFromUtf32(_currentHash); 
       _currentHash++; 
      } 
      return _htmlHash[tag].ToString(); 
     } 

     private string tagMatch(Match tag) 
     { 
      return pushHash(tag.Value); 
     } 

     public string html2plain(string html) 
     { 
      MatchEvaluator tagEvaluator = new MatchEvaluator(tagMatch); 
      return Regex.Replace(html, _htmlPattern, tagEvaluator, RegexOptions.IgnoreCase | RegexOptions.Multiline); 
     } 

     private string ProcessDiffTag(string tagStart, string tagEnd, string contents) 
     { 
      ArrayList diffTagParts = new ArrayList(); 
      MatchCollection matches = Regex.Matches(contents, 
        _htmlPattern, 
        RegexOptions.IgnoreCase | RegexOptions.Multiline); 

      if (matches.Count > 0) 
      { 
       int contentsStringIndex = 0; 
       int contentsStringEndIndex = 0; 
       int lastContentStringIndex = 0; 

       bool lastTag = false; 
       TagDefinition definition; 
       foreach (Match currentMatch in matches) 
       { 
        contentsStringIndex = currentMatch.Index; 
        contentsStringEndIndex = contentsStringIndex + currentMatch.Length; 

        lastTag = (currentMatch == matches[matches.Count - 1]); 


        // did we miss text that isn't a tag? 
        if (contentsStringIndex > lastContentStringIndex) 
        { 
         definition = new TagDefinition(); 
         definition.Tag = false; 
         definition.Text = contents.Substring(lastContentStringIndex, contentsStringIndex - lastContentStringIndex); 
         AddTagDefinition(diffTagParts, definition); 
        } 
        else if (lastTag && contents.Length > contentsStringEndIndex) // something after the last tag? 
        { 
         definition = new TagDefinition(); 
         definition.Tag = false; 
         definition.Text = contents.Substring(contentsStringEndIndex, contents.Length - contentsStringEndIndex); 
         AddTagDefinition(diffTagParts, definition); 
        } 

        // work on current tag 
        definition = new TagDefinition(); 
        definition.Tag = true; 
        definition.OpeningTag = !IsClosingTag(currentMatch.Value); 
        definition.TagType = GetTagType(currentMatch.Value); 
        definition.Text = currentMatch.Value; 
        AddTagDefinition(diffTagParts, definition); 

        lastContentStringIndex = contentsStringEndIndex; 
       } 

       return GoThroughDiffParts(diffTagParts, 
         tagStart, 
         tagEnd); 
      } 
      else 
       return string.Concat(tagStart, contents, tagEnd); 
     } 

     private string GetTagType(string tag) 
     { 
      int startIndex = 1; // skip < 
      if (tag.StartsWith("</")) 
       startIndex = 2; // skip </ 
      int endIndex = tag.IndexOf(" "); 
      if (endIndex == -1) 
       endIndex = tag.IndexOf(">"); 

      return tag.Substring(startIndex, endIndex - startIndex); 

     } 

     private string GoThroughDiffParts(ArrayList parts, string startTag, string endTag) 
     { 
      IEnumerator enumerator = parts.GetEnumerator(); 
      StringBuilder before = new StringBuilder(string.Empty); 
      StringBuilder middle = new StringBuilder(string.Empty); 
      StringBuilder after = new StringBuilder(string.Empty); 

      TagDefinition definition; 
      while (enumerator.MoveNext()) 
      { 
       definition = (TagDefinition)enumerator.Current; 
       if (!definition.Used) // have we already used this part? 
       { 
        definition.Used = true; 
        if (_blockElements.Contains(definition.TagType)) 
         middle.Append(definition.Text); 
        else if (definition.MatchingIndex == -1) // no matching tag 
        { 
         if (definition.Tag) // html tag? 
         { 
          if (definition.OpeningTag) 
           before.Append(definition.Text); 
          else 
           after.Append(definition.Text); 
         } 
         else 
          middle.Append(definition.Text); 
        } 
        else 
        { 
         if (!definition.Tag) // text and has a matching tag 
         { 
          TagDefinition matchingTag = (TagDefinition)parts[definition.MatchingIndex]; 
          if (matchingTag.OpeningTag) 
           matchingTag.Text += definition.Text; 
          else 
           matchingTag.Text = string.Concat(definition.Text, matchingTag.Text); 
          definition.Used = true; 
         } 
         else 
          middle.Append(definition.Text); 
        } 
       } 
      } 

      bool includeDiffTag = true; 
      if (string.IsNullOrEmpty(middle.ToString())) 
       includeDiffTag = false; // we don't want the ins/del tag around nothing 
      else if (string.IsNullOrWhiteSpace(middle.ToString())) // spacing should be kept 
       middle = new StringBuilder("&nbsp;" + middle.Replace("\n", "<br />")); 

      if(includeDiffTag) 
       middle.Insert(0, startTag); // <ins>[middle] 
      middle.Insert(0, before); // [before]<ins>[middle] 
      if (includeDiffTag) 
       middle.Append(endTag); // [before]<ins>[middle]</ins> 
      middle.Append(after); // [before]<ins>[middle]</ins>[end] 

      return middle.ToString(); 
     } 

     private string DiffTagMatch(Match tag) 
     { 
      string tagStart = tag.Groups[1].Value; 
      string tagEnd = tag.Groups[5].Value; 
      string contents = tag.Groups[4].Value; 
      if (string.IsNullOrEmpty(contents)) 
       return string.Empty; // we don't want the ins/del tag around nothing 
      else if (string.IsNullOrWhiteSpace(contents)) // spacing should be kept 
       return string.Concat(tagStart, "&nbsp;", contents.Replace("\n", "<br />"), tagEnd); 
      else 
       return ProcessDiffTag(tagStart, 
        tagEnd, 
        contents); 

     } 

     private bool IsClosingTag(string tag) 
     { 
      return tag.Contains("</") && !tag.ToLower().Contains("<img") && !tag.ToLower().Contains("<br"); 
     } 

     public string CleanUpMisplacedDiffTags(string html) 
     { 
      return Regex.Replace(html, @"(\<((ins|del).*?)\>)(.*?)(\<\/((ins|del).*?)\>)", DiffTagMatch, RegexOptions.IgnoreCase | RegexOptions.Multiline); 
     } 

     public string plain2html(string plain) 
     { 
      IDictionaryEnumerator enumerator = _htmlHash.GetEnumerator(); 
      while (enumerator.MoveNext()) 
      { 
       plain = Regex.Replace(plain, 
        _htmlHash[enumerator.Key].ToString(), 
        enumerator.Key.ToString(), 
        RegexOptions.IgnoreCase | RegexOptions.Multiline); 
      } 
      return CleanUpMisplacedDiffTags(plain); 
     } 

     private void AddTagDefinition(ArrayList list, TagDefinition tag) 
     { 
      IEnumerator enumerator = list.GetEnumerator(); 
      TagDefinition currentDefinition; 
      int index = 0; 
      int insertingIndex = list.Count; 
      while (enumerator.MoveNext()) 
      { 

       currentDefinition = (TagDefinition)enumerator.Current; 
       //if (!tag.OpeningTag && currentDefinition.MatchingIndex == -1) 
       // currentDefinition.MatchingIndex = insertingIndex; 

       if (tag.MatchingIndex == -1 && // matching tag not found yet 
         (currentDefinition.OpeningTag && !tag.OpeningTag) && // opening & closing 
         currentDefinition.TagType == currentDefinition.TagType) // same tag type 
       { 
        tag.MatchingIndex = index; 
        currentDefinition.MatchingIndex = insertingIndex; 
       } 
      } 

      list.Add(tag); 
     } 

     private class TagDefinition 
     { 
      public bool Tag { get; set; } 
      public string TagType { get; set; } 
      public string Text { get; set; } 
      public int MatchingIndex { get; set; } 
      public bool OpeningTag { get; set; } 
      public bool Used { get; set; } 

      public TagDefinition() 
      { 
       this.Tag = false; 
       this.Text = string.Empty; 
       this.TagType = string.Empty; 
       this.MatchingIndex = -1; 
       this.OpeningTag = false; 
       this.Used = false; 
      } 
     } 
    } 
} 
0

我发现的唯一可以做这种事情的工具是http://changedetection.comhttp://imnosy.com。两者都可以让你指定一个url并观察它们的变化。

+0

其实我有HTML内容和本地不能暴露于外部网站。我真正需要的是这些服务用来比较他们正在监控的网站的不同版本的工具。最重要的是,我需要它来比较文本,HTML和双字节字符。 – jnoreiga 2013-05-30 19:01:24