Javascript（jQuery）删除长文本的最后一句

我正在寻找一个足够聪明的javascript函数来删除长文本块（实际上是一段）的最后一句。一些示例文本显示的复杂性：Javascript（jQuery）删除长文本的最后一句

<p>Blabla, some more text here. Sometimes <span>basic</span> html code is used but that should not make the "selection" of the sentence any harder! I looked up the window and I saw a plane flying over. I asked the first thing that came to mind: "What is it doing up there?" She did not know, "I think we should move past the fence!", she quickly said. He later described it as: "Something insane."</p>

现在我可能分裂的.和删除数组的最后一个条目但不会与?或!结尾句工作，有些句子引号结束！something: "stuff."

function removeLastSentence(text) { 
    sWithoutLastSentence = ...; // ?? 
    return sWithoutLastSentence; 
}

如何做到这一点？什么是适当的算法？

编辑 - 长文本我的意思是我的段落和一句我的意思是一个实际的句子（非行），所以在我的例子中的最后一句话就是所有的内容：He later described it as: "Something insane."当一个被删除，下一个是She did not know, "I think we should move past the fence!", she quickly said."

来源

2011-09-23 bartolsthoorn

定义“最后一句”和“长字符串”。如果您正在寻找限制文本中行数的方法，请参阅** [此答案]（http://stackoverflow.com/questions/7519337/given-a-textarea-is-there-a-方式对限制长度基于上的线/ 7521855＃7521855）**。 –

编辑我的问题，通过句子我的意思是一个真正的句子，见上文。 :) – bartolsthoorn

***他后来形容为：“疯狂的东西。”***我不是英语专业..但这是正确的吗？或者它应该是***他后来形容它是“疯狂的东西”。*** – rlemon

定义您的规则：[！？] // 1.句子以大写字母 // 2.句子是由什么或之前开始的，但不是[，：;] // 3.如果格式不正确，则可以在引号之前加上引号，例如[“'] // 4.如果引用之后的单词是名称

任何其他规则

定义你的目的： // 1.删除最后一句

假设： 如果从最后一个字符的文本字符串的开始和向后工作，那么你会找出句子的开头： 1。字符前面的文本字符串是[。？！]或 2.字符前面的文本字符串是[“']，前面是大写字母 3.每个[。]前面都有一个空格 4。我们不纠正html标签 5.这些假设不健全，需要定期修改

可能的解决方案： 读入您的字符串并将其拆分到空格字符上，以给我们大量字符串进行反向查看。

var characterGroups = $('#this-paragraph').html().split(' ').reverse();

如果你的字符串是：

BLABLA，这里一些文字。有时使用基本的html代码，但不应该使句子的“选择”更难！我抬头看窗户，我看到一架飞机飞过。我问了第一个想到的事情：“那里有什么？”她不知道，“我认为我们应该越过围栏！”，她很快说。他后来形容为：“疯狂的东西。“

var originalString = 'Blabla, some more text here. Sometimes <span>basic</span> html code is used but that should not make the "selection" of the sentence any harder! I looked up the window and I saw a plane flying over. I asked the first thing that came to mind: "What is it doing up there?" She did not know, "I think we should move past the fence!", she quickly said. He later described it as: "Something insane."';

然后你在characterGroups阵列将是：

["insane."", ""Something", "as:", "it", "described", "later", "He", 
"said.", "quickly", "she", "fence!",", "the", "past", "move", "should", "we", 
"think", ""I", "know,", "not", "did", "She", "there?"", "up", "doing", "it", 
"is", ""What", "mind:", "to", "came", "that", "thing", "first", "the", "asked", 
"I", "over.", "flying", "plane", "a", "saw", "I", "and", "window", "the", "up", 
"looked", "I", "harder!", "any", "sentence", "the", "of", ""selection"", "the", 
"make", "not", "should", "that", "but", "used", "is", "code", "html", "basic", 
"Sometimes", "here.", "text", "more", "some", "Blabla,"]

注：的'标签和其他人使用的.text（）方法来去除jQuery中

每个块后跟一个空格，所以当我们确定了我们的句子开始位置（通过数组索引）时，我们将知道该空间有什么索引，并且我们可以将原始字符串拆分为l这个空间占据了句子结尾的那个索引。

给自己一个变量来标记，如果我们发现与否和一个变量来保存的数组元素的索引位置，我们确定为保持最后一句的开头：

var found = false; 
var index = null;

遍历数组，并寻找任何元素结束[。！？]或“前一个元素开始以大写字母在那里结束。

var position  = 1,//skip the first one since we know that's the end anyway 
    elements  = characterGroups.length, 
    element  = null, 
    prevHadUpper = false, 
    last   = null; 

while(!found && position < elements) { 
    element = characterGroups[position].split(''); 

    if(element.length > 0) { 
     last = element[element.length-1]; 

     // test last character rule 
     if(
      last=='.'      // ends in '.' 
      || last=='!'     // ends in '!' 
      || last=='?'     // ends in '?' 
      || (last=='"' && prevHadUpper) // ends in '"' and previous started [A-Z] 
     ) { 
      found = true; 
      index = position-1; 
      lookFor = last+' '+characterGroups[position-1]; 
     } else { 
      if(element[0] == element[0].toUpperCase()) { 
      prevHadUpper = true; 
      } else { 
      prevHadUpper = false; 
      } 
     } 
    } else { 
     prevHadUpper = false; 
    } 
    position++; 
}

如果你运行上面的脚本会正确识别‘他’为最后一句的开头

0123再次

var trimPosition = originalString.lastIndexOf(lookFor)+1; 
var updatedString = originalString.substr(0,trimPosition); 
console.log(updatedString); 

// Blabla, some more text here. Sometimes <span>basic</span> html code is used but that should not make the "selection" of the sentence any harder! I looked up the window and I saw a plane flying over. I asked the first thing that came to mind: "What is it doing up there?" She did not know, "I think we should move past the fence!", she quickly said.

运行，并得到： BLABLA，这里一些文字现在

console.log(characterGroups[index]); // He at index=6

，你可以通过你收到的字符串运行。有时使用基本的html代码，但不应该使句子的“选择”更难！我抬头看窗户，我看到一架飞机飞过。我问了第一个想到的事情：“那里有什么？”

再次运行它并得到： Blabla，一些更多的文字在这里。有时使用基本的html代码，但不应该使句子的“选择”更难！我抬头看窗户，我看到一架飞机飞过。

再次运行它并得到： Blabla，一些更多的文字在这里。有时使用基本的html代码，但不应该使句子的“选择”更难！

再次运行它并得到： Blabla，一些更多的文字在这里。

所以，我认为这符合你在找什么？

作为一个功能：

function trimSentence(string){ 
    var found = false; 
    var index = null; 

    var characterGroups = string.split(' ').reverse(); 

    var position  = 1,//skip the first one since we know that's the end anyway 
     elements  = characterGroups.length, 
     element  = null, 
     prevHadUpper = false, 
     last   = null, 
     lookFor  = ''; 

    while(!found && position < elements) { 
     element = characterGroups[position].split(''); 

     if(element.length > 0) { 
      last = element[element.length-1]; 

      // test last character rule 
      if(
       last=='.' ||    // ends in '.' 
       last=='!' ||    // ends in '!' 
       last=='?' ||    // ends in '?' 
       (last=='"' && prevHadUpper) // ends in '"' and previous started [A-Z] 
      ) { 
       found = true; 
       index = position-1; 
       lookFor = last+' '+characterGroups[position-1]; 
      } else { 
       if(element[0] == element[0].toUpperCase()) { 
       prevHadUpper = true; 
       } else { 
       prevHadUpper = false; 
       } 
      } 
     } else { 
      prevHadUpper = false; 
     } 
     position++; 
    } 


    var trimPosition = string.lastIndexOf(lookFor)+1; 
    return string.substr(0,trimPosition); 
}

是微不足道的做一个插件，如果，但要小心的假设！ :)

这有帮助吗？

感谢， AE

来源

2011-10-01 08:31:29 MyStream

这是一个很好的。你为什么不创建一个临时变量，将所有'！'和'？'到'。'中，分开那个临时变量，删除最后一个句子，把这个临时数组合并成一个字符串，并把它的长度？然后串原来的段落，直到该长度

来源

2011-09-23 16:02:27 EHorodyski

或者嘿，只是使用正则表达式，它更容易= P – EHorodyski

实际上，通过在一个句子结尾处替换'。“'，我可能只用'/[\.!?]/'，即@omnosis的正则表达式 – bartolsthoorn

您仍然会遇到包含含有结尾标点符号的句子的问题，如您的示例中所示。 – samiz

这应该做到这一点。

/* 
Assumptions: 
- Sentence separators are a combination of terminators (.!?) + doublequote (optional) + spaces + capital letter. 
- I haven't preserved tags if it gets down to removing the last sentence. 
*/ 
function removeLastSentence(text) { 

    lastSeparator = Math.max(
     text.lastIndexOf("."), 
     text.lastIndexOf("!"), 
     text.lastIndexOf("?") 
    ); 

    revtext = text.split('').reverse().join(''); 
    sep = revtext.search(/[A-Z]\s+(\")?[\.\!\?]/); 
    lastTag = text.length-revtext.search(/\/\</) - 2; 

    lastPtr = (lastTag > lastSeparator) ? lastTag : text.length; 

    if (sep > -1) { 
     text1 = revtext.substring(sep+1, revtext.length).trim().split('').reverse().join(''); 
     text2 = text.substring(lastPtr, text.length).replace(/['"]/g,'').trim(); 

     sWithoutLastSentence = text1 + text2; 
    } else { 
     sWithoutLastSentence = ''; 
    } 
    return sWithoutLastSentence; 
} 

/* 
TESTS: 

var text = '<p>Blabla, some more text here. Sometimes <span>basic</span> html code is used but that should not make the "selection" of the text any harder! I looked up the window and I saw a plane flying over. I asked the first thing that came to mind: "What is it doing up there?" She did not know, "I think we should move past the fence!", she quickly said. He later described it as: "Something insane. "</p>'; 

alert(text + '\n\n' + removeLastSentence(text)); 
alert(text + '\n\n' + removeLastSentence(removeLastSentence(text))); 
alert(text + '\n\n' + removeLastSentence(removeLastSentence(removeLastSentence(text)))); 
alert(text + '\n\n' + removeLastSentence(removeLastSentence(removeLastSentence(removeLastSentence(text))))); 
alert(text + '\n\n' + removeLastSentence(removeLastSentence(removeLastSentence(removeLastSentence(removeLastSentence(text)))))); 
alert(text + '\n\n' + removeLastSentence(removeLastSentence(removeLastSentence(removeLastSentence(removeLastSentence(removeLastSentence(text))))))); 
alert(text + '\n\n' + removeLastSentence('<p>Blabla, some more text here. Sometimes <span>basic</span> html code is used but that should not make the "selection" of the text any harder! I looked up the ')); 
*/

来源

2011-10-03 06:48:21 Poojan

谢谢你的代码！ – bartolsthoorn

我已经在coffeescript https://gist.github.com/1270335中重写了你的条目 – bartolsthoorn

Javascript（jQuery）删除长文本的最后一句

回答

相关问题