2012-03-28 69 views
2

我正在研究一个从XML描述中生成HTML的工具。我正在寻找一个scala库,可以帮助我为html元素生成CSS样式。在Scala中寻找CSS解析器

+0

您的CSS和XML(特别是XSL?)解析器有两个单独的要求吗?您是否计划使用Scala网络框架,例如Lift或Play? – aitchnyu 2012-03-28 12:44:58

+1

您是否在寻找一个CSS *解析器*(如标题中所写)或CSS *生成器*(如问题中所述)? – paradigmatic 2012-03-28 12:51:21

+0

我正在使用Scalatra web框架,因为我发现Lift对于我的用例来说太重了。我正在寻找CSS解析器以及Generator。有些像NekoHtml一样存在,用于HTML生成以及解析html。 – 2012-03-29 09:18:52

回答

0

不知道我确切地理解你在找什么,但我知道http://liftweb.net/有很多处理xml,html和css的工具。您可以在不使用完整框架的情况下使用升降机库。

+0

是的请参阅http://liftweb.net/api/25-M3/api/scala/#net.liftweb.util.CSSParser – 2012-11-24 03:13:00

0

您正在寻找这样的事情:https://github.com/axiak/scala-css-parser

运行它像

java -jar CSSRewriter.jar sources/input.css -t where/this/will/go 

https://github.com/axiak/scala-css-parser/blob/master/src/SimpleCSSParser.scala

import annotation.tailrec 
import collection.mutable.ArrayBuffer 
import util.parsing.combinator._ 
import java.io.File 

// See http://www.w3.org/TR/css3-syntax/#grammar0 
class SimpleCSSParser extends JavaTokenParsers { 
    protected override val whiteSpace = """(\s|//.*|(?m)/\*(\*(?!/)|[^*])+\*/)+""".r 

    // Lexical symbols 
    def h = "[0-9a-fA-F]".r 
    def nonascii = "[\200-\377]" 
    def unicode = "\\[0-9a-fA-F]{1,6}".r 
    def escape = unicode | "\\[ -~\200-\377]".r 
    def nmstart = "[a-zA-Z]" | nonascii | escape 
    def nmchar = "[a-zA-Z0-9-]" | nonascii | escape 
    override def stringLiteral = ("\""+"""([^"\p{Cntrl}\\]|\\[\\/bfnrt"]|\\u[a-fA-F0-9]{4})*"""+"\"").r | ("\'"+"""([^'\p{Cntrl}\\]|\\[\\/bfnrt']|\\u[a-fA-F0-9]{4})*"""+"\'").r 
    override def ident = """[*@_]?-?[a-zA-Z_][a-zA-Z0-9_-]*""".r 
    def name = rep1(nmchar) 
    def CDO = "<!--" 
    def CDC = "-->" 
    def INCLUDES = "~=" 
    def DASHMATCH = "|=" 

    def url = rep("[!#$%&*-~".r | nonascii | escape) 
    def IMPORT_SYM = "(?i)@import".r 
    def PAGE_SYM = "(?i)@page".r 
    def MEDIA_SYM = "(?i)@media".r 
    def FONT_FACE_SYM = "(?i)@font-face".r 
    def CHARSET_SYM = "(?i)@charset".r 
    def NAMESPACE_SYM = "(?i)@namespace".r 
    def IMPORTANT_SYM = "!important" | ("!" ~ "important") 
    def EMS = decimalNumber ~ "em" 
    def EXS = decimalNumber ~ "ex" 
    def RESOLUTION = decimalNumber ~ "(?i)dpi".r 
    def LENGTH = decimalNumber ~ "(?i)(?:px|cm|mm|in|pt|pc)".r 
    def ANGLE = decimalNumber ~ "(?i)(?:deg|rad|grad)".r 
    def TIME = decimalNumber ~ "(?i)(?:s|ms)".r 
    def FREQ = decimalNumber ~ "(?i)(?:Hz|kHz)".r 
    def DIMEN = decimalNumber ~ ident 
    def PERCENTAGE = decimalNumber ~ "%" 
    def NUMBER = decimalNumber | "\\" ~ decimalNumber 
    def URI = "url(" ~ ((stringLiteral | "[^)]+".r) ^^ (URL(_))) ~ ")" 

    def hexcolor = "#(?:[0-9A-Fa-f]{3}){1,2}".r 
    def function = "[a-zA-Z:._0-9-]+\\(".r ~ funcexpr ~ ")" 
    def unary_operator = "-" | "+" 
    def term: Parser[Any] = unary_operator | ((PERCENTAGE | LENGTH | EMS | EXS | ANGLE | RESOLUTION | 
          TIME | FREQ | URI | hexcolor | stringLiteral | NUMBER | ie_expression | function | ident) ^^ (NeedsSpace(_))) 
    def expr = rep1(term ~ opt(operator)) 

    def ie_expression_no_paren = "[^\\(\\)]+".r 
    def ie_expression_paren: Parser[Any] = "(" ~ rep(ie_expression_no_paren | ie_expression_paren) ~ ")" 
    def ie_expression = "expression" ~ ie_expression_paren 

    // This is an extension of the css spec to allow filter: alpha(opacity=xx) syntax (kwargs). 
    def funcexpr = rep(opt(ident ~ "=") ~ term ~ opt(operator)) 
    def operator = "/" | "," 
    def combinator = "+" | ">" | "~" 
    def prio = IMPORTANT_SYM 
    def declaration = property ~ ":" ~ expr ~ opt(prio) 
    def transform_declaration = """(?i)(?:from|to)""".r ~ "{" ~ rep1(declaration ~ rep(";")) ~ "}" 
    def nth_expr = ("\\d+".r ~ "n" ~ opt(("+" | "-") ~ "\\d+".r)) | (opt("\\d+".r ~ "n") ~ ("+" | "-") ~ "\\d+".r) | "\\d+".r 
    def pseudo = ":" ~ opt((ident ~ "(" ~ (HASH | class_ | ident | nth_expr | (":" ~ ident)) ~ ")") | ident) 
    def attrib = "[" ~ ident ~ opt(opt("=" | INCLUDES | DASHMATCH) ~ (ident | stringLiteral)) ~ "]" 
    def element_name = "*" | ident | "/**/" 
    def class_ = "." ~ ident 
    def HASH = "#" ~ ident 
    def selector_modifier = HASH | class_ | attrib | pseudo 
    def simple_selector = (element_name ~ rep(selector_modifier)) | (rep1(selector_modifier)) 
    def selector = simple_selector ~ opt(combinator | ",") 
    def declaration_body = "{" ~ rep(transform_declaration | declaration ~ rep(";")) ~ "}" 
    def ruleset = rep1(selector ^^ (NeedsSpace(_))) ~ declaration_body 
    def property = ident 
    def font_face = FONT_FACE_SYM ~ declaration_body 
    def moz_document = ("(?i)@-moz-document".r ^^ (NeedsSpace(_))) ~ opt(function) ~ "{" ~ rep(ruleset) ~ "}" 
    def pseudo_page = ":" ~ ident 
    def medium = ident 
    def media_qualifier = "(" ~ ident ~ ":" ~ term ~ ")" 
    def media_term = (ident | media_qualifier) ~ opt(",") 
    def page = (PAGE_SYM ^^ (NeedsSpace(_))) ~ opt(ident) ~ opt(pseudo_page) ~ "{" ~ rep1sep(declaration, ",") ~ "}" 
    def media = (MEDIA_SYM ^^ (NeedsSpace(_))) ~ rep1(media_term) ~ "{" ~ rep(ruleset) ~ "}" 
    def namespace_prefix = ident 
    def namespace = (NAMESPACE_SYM ^^ (NeedsSpace(_))) ~ opt(namespace_prefix) ~ opt(stringLiteral | URI) ~ ";" 
    def import_ = (IMPORT_SYM ^^ (NeedsSpace(_))) ~ (stringLiteral | URI) ~ repsep(medium, ",") ~ ";" 
    def stylesheet = opt((CHARSET_SYM^^ (NeedsSpace(_))) ~ stringLiteral ~ ";") ~ 
        rep(import_) ~ rep(namespace) ~ 
        rep(media | page | font_face | moz_document | ruleset) 
} 

case class URL(url: String) { 
    val AbsolutePattern = """^(?i)(?:/|(?:http|ftp|https|spdy)://).*""".r 
    val InsideQuote = """^(['\"]?)(.+)\1$""".r 

    def rewrite(prefix: String): String = url match { 
    case InsideQuote(quote, content) => { 
      quote + rewriteInside(content, prefix) + quote 
     } 
    case _ => rewriteInside(url, prefix) 
    } 

    private def rewriteInside(inside: String, prefix: String): String = inside match { 
    case AbsolutePattern() => inside 
    case _ => prefix + inside 
    } 
} 


case class NeedsSpace(token: Any) 


object Main extends SimpleCSSParser { 
    var prefix: String = "" 

    def main(args: Array[String]) { 
    val noSpace = Set(";", "}", ")", "{", "(", ",", ">", "<", "+") 
    val targetIndex = args.zipWithIndex filter {case (arg, idx) => arg == "-t"} 
    val target = if (targetIndex.length > 0) 
     Some(args(targetIndex(0)._2 + 1)) 
    else 
     None 

    val sourceIndex = args.zipWithIndex filter {case (arg, idx) => arg == "-s"} 
    val sourcePath = if (sourceIndex.length > 0) 
     Some(args(sourceIndex(0)._2 + 1)) 
    else 
     None 

    var originalSourcePath = if (sourcePath.isDefined) 
     Some(sourcePath.get) 
    else if (args.length > 0 && args(0) != "-t" && args(0) != "-s") 
     Some(args(0)) 
    else 
     None 

    if (originalSourcePath.isDefined && target.isDefined) { 
     val parent = new File(originalSourcePath.get).getParent 
     val sourceList = if (parent == null || parent == "") 
     Nil 
     else 
     parent.split(File.separator).toList 
     val destParent = new File(target.get).getParent 
     val destList = if (destParent == null || destParent == "") 
     Nil 
     else 
     destParent.split(File.separator).toList 

     this.prefix = computePrefix(sourceList, destList).mkString("/") 
     if (!this.prefix.isEmpty) 
     this.prefix += "/" 
    } 


    val input = if (args.length > 0 && args(0) != "-t" && args(0) != "-s") { 
     io.Source.fromFile(args(0)) 
    } else { 
     io.Source.stdin 
    } 


    val result = parseAll(stylesheet, input.getLines().mkString("\n")) 
    try { 
     val flatResult = flatResultList(result) 
     // Beautify by removing needless spaces. 
     flatResult.zipWithIndex foreach { case (value, idx) => if (idx > 0 && noSpace.contains(value) && flatResult(idx - 1) == " ") flatResult(idx - 1) = "" } 
     print(flatResult.mkString("")) 
    } catch { 
     case e: Exception => System.err.println(result) 
    } 
    } 

    @tailrec 
    def computePrefix(sourceDir: List[String], target: List[String], acc: List[String] = List()): 
      List[String] = (sourceDir, target) match { 
    case (shead :: srest, thead :: trest) => 
     if (shead == thead) 
     computePrefix(srest, trest, acc) 
     else 
     computePrefix(srest, trest, ".." :: acc ::: List(shead)) 
    case (Nil, thead :: trest) => computePrefix(Nil, trest, ".." :: acc) 
    case (shead :: srest, Nil) => computePrefix(srest, Nil, acc ::: List(shead)) 
    case (Nil, Nil) => acc 
    } 

    def flatResultList(result: Any): ArrayBuffer[String] = result match { 
    case a: Some[Any] => flatResultList(a.get) 
    case a: ParseResult[Any] => flatResultList(a.get) 
    case a: ~[Any, Any] => flatResultList(a._1) ++ flatResultList(a._2) 
    case a :: rest => flatResultList(a) ++ flatResultList(rest) 
    case a: String => ArrayBuffer(a) 
    case None => ArrayBuffer() 
    case List() => ArrayBuffer() 


    /* Put any rewrite rule here, and annotate the above tokens with ^^ to do it. */ 
    case url: URL => ArrayBuffer(url.rewrite(this.prefix)) 
    case needsSpace: NeedsSpace => flatResultList(needsSpace.token) ++ ArrayBuffer(" ") 

    } 
} 

https://github.com/axiak/scala-css-parser/blob/master/src/testurl.sh

#!/bin/bash 
curl "$1" | java -cp /opt/scala/lib/scala-library.jar:. Main 
+1

我只是想知道你如何包含解析器的重要部分?附上源代码? – 2012-11-16 14:01:07

+0

@JFFx好主意解析器只有200 SLOC – 2012-11-24 03:10:33