我正在寻找一个可视化显示html结构,字符/单词和样式差异的api。该工具还必须支持双字节字符,并且足够灵活,可以将其添加到我的现有网站以轻松显示比较结果。我目前正在使用组件软件COM实现,它不支持双字节字符,并且在大约六年内没有更新。HTML Diff工具API
2
A
回答
0
这是我用什么:
[http://code.google.com/p/google-diff-match-patch/][1]
我不得不写我自己的方法做比较,但一些工作后,它看起来很好。这个实现比较了传入的测试,所以如果你只是比较两个文本字符串,它就可以正常工作。如果你想要做的2串HTML一个比较预览这是一个有点不同
public string diff_prettyHtml(List<Diff> diffs)
{
StringBuilder html = new StringBuilder();
foreach (Diff aDiff in diffs)
{
string text = aDiff.text.Replace("&", "&").Replace("<", "<")
.Replace(">", ">").Replace("\n", "<br>");
switch (aDiff.operation)
{
case Operation.INSERT:
html.Append("<ins class='diff'>").Append(text)
.Append("</ins>");
break;
case Operation.DELETE:
html.Append("<del class='diff'>").Append(text)
.Append("</del>");
break;
case Operation.EQUAL:
html.Append("<span>").Append(text).Append("</span>");
break;
}
}
return html.ToString();
}
现在:我diff_prettyHtml电话变更为。这是我做过什么:
DiffMatchPatch.diff_match_patch diff = new DiffMatchPatch.diff_match_patch();
List<DiffMatchPatch.Diff> differences = diff.diff_main(oldHtml,
newHtml);
return diff.diff_previewHtml(differences);
public string diff_previewHtml(List<Diff> diffs) {
StringBuilder html = new StringBuilder();
foreach (Diff aDiff in diffs) {
string text = aDiff.text;
switch (aDiff.operation) {
case Operation.INSERT:
html.Append("<ins class='diff'>").Append(text)
.Append("</ins>");
break;
case Operation.DELETE:
html.Append("<del class='diff'>").Append(text)
.Append("</del>");
break;
case Operation.EQUAL:
html.Append(text);
break;
}
}
return html.ToString();
}
Unicode的类如下:
using System.Collections;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Linq;
namespace HtmlCompare
{
class Unicoder
{
private Hashtable _htmlHash = new Hashtable();
private const string _htmlPattern = @"<(S*?)[^>]*>.*?|<.*?\/>";
private List<string> _blockElements = "img,br".Split(',').ToList<string>();
private int _currentHash = 44032;
public string pushHash(string tag)
{
if (_htmlHash[tag] == null)
{
//_htmlHash[tag] = char.Parse("\\u" + Convert.ToString(_currentHash,16));
_htmlHash[tag] = char.ConvertFromUtf32(_currentHash);
_currentHash++;
}
return _htmlHash[tag].ToString();
}
private string tagMatch(Match tag)
{
return pushHash(tag.Value);
}
public string html2plain(string html)
{
MatchEvaluator tagEvaluator = new MatchEvaluator(tagMatch);
return Regex.Replace(html, _htmlPattern, tagEvaluator, RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
private string ProcessDiffTag(string tagStart, string tagEnd, string contents)
{
ArrayList diffTagParts = new ArrayList();
MatchCollection matches = Regex.Matches(contents,
_htmlPattern,
RegexOptions.IgnoreCase | RegexOptions.Multiline);
if (matches.Count > 0)
{
int contentsStringIndex = 0;
int contentsStringEndIndex = 0;
int lastContentStringIndex = 0;
bool lastTag = false;
TagDefinition definition;
foreach (Match currentMatch in matches)
{
contentsStringIndex = currentMatch.Index;
contentsStringEndIndex = contentsStringIndex + currentMatch.Length;
lastTag = (currentMatch == matches[matches.Count - 1]);
// did we miss text that isn't a tag?
if (contentsStringIndex > lastContentStringIndex)
{
definition = new TagDefinition();
definition.Tag = false;
definition.Text = contents.Substring(lastContentStringIndex, contentsStringIndex - lastContentStringIndex);
AddTagDefinition(diffTagParts, definition);
}
else if (lastTag && contents.Length > contentsStringEndIndex) // something after the last tag?
{
definition = new TagDefinition();
definition.Tag = false;
definition.Text = contents.Substring(contentsStringEndIndex, contents.Length - contentsStringEndIndex);
AddTagDefinition(diffTagParts, definition);
}
// work on current tag
definition = new TagDefinition();
definition.Tag = true;
definition.OpeningTag = !IsClosingTag(currentMatch.Value);
definition.TagType = GetTagType(currentMatch.Value);
definition.Text = currentMatch.Value;
AddTagDefinition(diffTagParts, definition);
lastContentStringIndex = contentsStringEndIndex;
}
return GoThroughDiffParts(diffTagParts,
tagStart,
tagEnd);
}
else
return string.Concat(tagStart, contents, tagEnd);
}
private string GetTagType(string tag)
{
int startIndex = 1; // skip <
if (tag.StartsWith("</"))
startIndex = 2; // skip </
int endIndex = tag.IndexOf(" ");
if (endIndex == -1)
endIndex = tag.IndexOf(">");
return tag.Substring(startIndex, endIndex - startIndex);
}
private string GoThroughDiffParts(ArrayList parts, string startTag, string endTag)
{
IEnumerator enumerator = parts.GetEnumerator();
StringBuilder before = new StringBuilder(string.Empty);
StringBuilder middle = new StringBuilder(string.Empty);
StringBuilder after = new StringBuilder(string.Empty);
TagDefinition definition;
while (enumerator.MoveNext())
{
definition = (TagDefinition)enumerator.Current;
if (!definition.Used) // have we already used this part?
{
definition.Used = true;
if (_blockElements.Contains(definition.TagType))
middle.Append(definition.Text);
else if (definition.MatchingIndex == -1) // no matching tag
{
if (definition.Tag) // html tag?
{
if (definition.OpeningTag)
before.Append(definition.Text);
else
after.Append(definition.Text);
}
else
middle.Append(definition.Text);
}
else
{
if (!definition.Tag) // text and has a matching tag
{
TagDefinition matchingTag = (TagDefinition)parts[definition.MatchingIndex];
if (matchingTag.OpeningTag)
matchingTag.Text += definition.Text;
else
matchingTag.Text = string.Concat(definition.Text, matchingTag.Text);
definition.Used = true;
}
else
middle.Append(definition.Text);
}
}
}
bool includeDiffTag = true;
if (string.IsNullOrEmpty(middle.ToString()))
includeDiffTag = false; // we don't want the ins/del tag around nothing
else if (string.IsNullOrWhiteSpace(middle.ToString())) // spacing should be kept
middle = new StringBuilder(" " + middle.Replace("\n", "<br />"));
if(includeDiffTag)
middle.Insert(0, startTag); // <ins>[middle]
middle.Insert(0, before); // [before]<ins>[middle]
if (includeDiffTag)
middle.Append(endTag); // [before]<ins>[middle]</ins>
middle.Append(after); // [before]<ins>[middle]</ins>[end]
return middle.ToString();
}
private string DiffTagMatch(Match tag)
{
string tagStart = tag.Groups[1].Value;
string tagEnd = tag.Groups[5].Value;
string contents = tag.Groups[4].Value;
if (string.IsNullOrEmpty(contents))
return string.Empty; // we don't want the ins/del tag around nothing
else if (string.IsNullOrWhiteSpace(contents)) // spacing should be kept
return string.Concat(tagStart, " ", contents.Replace("\n", "<br />"), tagEnd);
else
return ProcessDiffTag(tagStart,
tagEnd,
contents);
}
private bool IsClosingTag(string tag)
{
return tag.Contains("</") && !tag.ToLower().Contains("<img") && !tag.ToLower().Contains("<br");
}
public string CleanUpMisplacedDiffTags(string html)
{
return Regex.Replace(html, @"(\<((ins|del).*?)\>)(.*?)(\<\/((ins|del).*?)\>)", DiffTagMatch, RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
public string plain2html(string plain)
{
IDictionaryEnumerator enumerator = _htmlHash.GetEnumerator();
while (enumerator.MoveNext())
{
plain = Regex.Replace(plain,
_htmlHash[enumerator.Key].ToString(),
enumerator.Key.ToString(),
RegexOptions.IgnoreCase | RegexOptions.Multiline);
}
return CleanUpMisplacedDiffTags(plain);
}
private void AddTagDefinition(ArrayList list, TagDefinition tag)
{
IEnumerator enumerator = list.GetEnumerator();
TagDefinition currentDefinition;
int index = 0;
int insertingIndex = list.Count;
while (enumerator.MoveNext())
{
currentDefinition = (TagDefinition)enumerator.Current;
//if (!tag.OpeningTag && currentDefinition.MatchingIndex == -1)
// currentDefinition.MatchingIndex = insertingIndex;
if (tag.MatchingIndex == -1 && // matching tag not found yet
(currentDefinition.OpeningTag && !tag.OpeningTag) && // opening & closing
currentDefinition.TagType == currentDefinition.TagType) // same tag type
{
tag.MatchingIndex = index;
currentDefinition.MatchingIndex = insertingIndex;
}
}
list.Add(tag);
}
private class TagDefinition
{
public bool Tag { get; set; }
public string TagType { get; set; }
public string Text { get; set; }
public int MatchingIndex { get; set; }
public bool OpeningTag { get; set; }
public bool Used { get; set; }
public TagDefinition()
{
this.Tag = false;
this.Text = string.Empty;
this.TagType = string.Empty;
this.MatchingIndex = -1;
this.OpeningTag = false;
this.Used = false;
}
}
}
}
0
我发现的唯一可以做这种事情的工具是http://changedetection.com和http://imnosy.com。两者都可以让你指定一个url并观察它们的变化。
相关问题
- 1. postgres diff工具
- 2. SQL Server diff工具
- 3. 汇编语言diff工具
- 4. diff工具为整个OS
- 5. UML diff工具有哪些?
- 6. SourceTree中的IntelliJ Diff工具
- 7. 用于Diff工具的JSF组件
- 8. 关于如何构建HTML Diff工具的建议?
- 9. 生成文档的Diff工具?
- 10. Git diff工具重置为git默认
- 11. 从Git GUI启动diff工具
- 12. 基于Emacs的Git Diff工具?
- 13. Git word-diff to html
- 14. HTML中的ClearCase diff
- 15. REST API工具
- 16. 有没有办法将git或Mercurial diff输出传递给GUI Diff工具?
- 17. API监视工具
- 18. HTML开发工具
- 19. 热点使用python diff html
- 20. Cocoa API - 它是否包含diff api?
- 21. HTML- /造型谷歌地图api v3的工具提示
- 22. 如何为Visual Studio 2015设置p4merge作为diff工具| git
- 23. 如何让mercurial外部diff工具处理文件重命名?
- 24. 是否有知道缩进的diff工具(补丁)?
- 25. Mercurial Diff Merge:这是什么工具,我该如何使用它?
- 26. 如何使用p4merge作为Mercurial的merge/diff工具?
- 27. 您的许可证在diff工具中无效
- 28. 是否有可以处理UTF-8字符的diff工具?
- 29. Visual Studio 2015:Git Diff工具窗口为空
- 30. Diff工具,允许保存比较标记
其实我有HTML内容和本地不能暴露于外部网站。我真正需要的是这些服务用来比较他们正在监控的网站的不同版本的工具。最重要的是,我需要它来比较文本,HTML和双字节字符。 – jnoreiga 2013-05-30 19:01:24