在尝试字节编译时提高球拍代码和错误的性能

9

大概类似于soegaard的解决方案，除了这一次推出了自己的“语法分析器”，所以它是自包含的。它在我的机器上在6秒内生成完整的100年期清单。这个代码使用了一堆技巧，但它并不是真正被称为“优化”的东西：我确信通过一些备忘录可以使它更快，关注最大化树共享等。但是对于这样一个小的域名来说，这是不值得的努力...（同样的质量这个代码...）

顺便说一句，超过解析，原来的解决方案（S）使用eval哪让事情变得更快......对于这样的事情，通常手动编写“评估者”会更好。顺便说一句，这并不意味着Racket比Mathematica快 - 我确信那篇文章中的解决方案也会让它磨碎冗余的CPU周期，而类似的解决方案会更快。

#lang racket 

(define (tuples list n) 
    (let loop ([n n]) 
    (if (zero? n) 
     '(()) 
     (for*/list ([y (in-list (loop (sub1 n)))] [x (in-list list)]) 
     (cons x y))))) 

(define precedence 
    (let ([t (make-hasheq)]) 
    (for ([ops '((#f) (+ -) (* /) (||))] [n (in-naturals)]) 
     (for ([op ops]) (hash-set! t op n))) 
    t)) 

(define (do op x y) 
    (case op 
    [(+) (+ x y)] [(-) (- x y)] [(*) (* x y)] [(/) (/ x y)] 
    [(||) (+ (* 10 x) y)])) 

(define (run ops nums) 
    (unless (= (add1 (length ops)) (length nums)) (error "poof")) 
    (let loop ([nums  (cddr nums)] 
      [ops  (cdr ops)] 
      [numstack (list (cadr nums) (car nums))] 
      [opstack (list (car ops))]) 
    (if (and (null? ops) (null? opstack)) 
     (car numstack) 
     (let ([op (and (pair? ops) (car ops))] 
      [topop (and (pair? opstack) (car opstack))]) 
     (if (> (hash-ref precedence op) 
       (hash-ref precedence topop)) 
      (loop (cdr nums) 
       (cdr ops) 
       (cons (car nums) numstack) 
       (cons op opstack)) 
      (loop nums 
       ops 
       (cons (do topop (cadr numstack) (car numstack)) 
         (cddr numstack)) 
       (cdr opstack))))))) 

(define (expr ops* nums*) 
    (define ops (map symbol->string ops*)) 
    (define nums (map number->string nums*)) 
    (string-append* (cons (car nums) (append-map list ops (cdr nums))))) 

(define nums (for/list ([i (in-range 10 0 -1)]) i)) 
(define year1 2012) 
(define nyears 100) 
(define year2 (+ year1 nyears)) 
(define years (make-vector nyears '())) 
(for ([ops (in-list (tuples '(+ - */||) 9))]) 
    (define r (run ops nums)) 
    (when (and (integer? r) (<= year1 r) (< r year2)) 
    (vector-set! years (- r year1) 
       (cons ops (vector-ref years (- r year1)))))) 

(for ([solutions (in-vector years)] [year (in-range year1 year2)]) 
    (if (pair? solutions) 
    (printf "~a = ~a~a\n" 
      year (expr (car solutions) nums) 
      (if (null? (cdr solutions)) 
       "" 
       (format " (~a more)" (length (cdr solutions))))) 
    (printf "~a: no combination!\n" year)))

来源

2012-04-16 07:54:19

+0

超级！只是为了好奇，我试图对你的代码进行字节编译，希望它比解释器模式更快，但事实并非如此。 – lifebalance 2012-04-16 08:24:39

+0

这是正确的，请参阅[这个答案]（http://stackoverflow.com/questions/10135327/） – 2012-04-16 08:29:33

5

下面是我的实现。我在你的代码中调整和优化了一两件事，在我的笔记本电脑中，大约需要35分钟才能完成（当然是一种改进！）我发现表达式的评估是真正的性能杀手 - 如果它不是用于调用程序，该程序将在一分钟内完成。

我猜想，在本机使用中缀表示法编程语言评价会快得多，但在方案解析成本，然后评估字符串中缀表达式实在太多。

也许有人可以指出一个合适的替代soegaard/infix包？或者直接评估考虑运算符优先级的中缀表达式列表，如 - 其中&代表数字级联并具有最高优先级（例如：4 & 7 = 47），其他算术运算符（+, -, *, /）遵循通常的优先规则。

#lang at-exp racket 

(require (planet soegaard/infix) 
     (planet soegaard/infix/parser)) 

(define (product lst1 lst2) 
    (for*/list ([x (in-list lst1)] 
       [y (in-list lst2)]) 
    (cons x y))) 

(define (tuples lst n) 
    (if (zero? n) 
     '(()) 
     (product lst (tuples lst (sub1 n))))) 

(define (riffle numbers ops) 
    (if (null? ops) 
     (list (car numbers)) 
     (cons (car numbers) 
      (cons (car ops) 
        (riffle (cdr numbers) 
          (cdr ops)))))) 

(define (expression-string numbers optuple) 
    (apply string-append 
     (riffle numbers optuple))) 

(define (to-expression exp-str) 
    (eval 
    (parse-expression 
    #'here (open-input-string exp-str)))) 

(define (make-all-combinations numbers ops) 
    (let loop ((opts (tuples ops (sub1 (length numbers)))) 
      (acc '())) 
    (if (null? opts) 
     acc 
     (let ((exp-str (expression-string numbers (car opts)))) 
      (loop (cdr opts) 
       (cons (cons exp-str (to-expression exp-str)) acc)))))) 

(define (show-n-expressions all-combinations years) 
    (for-each (lambda (year) 
       (for-each (lambda (comb) 
          (when (= (cdr comb) year) 
          (printf "~s ~a~n" year (car comb)))) 
         all-combinations) 
       (printf "~n")) 
      years))

使用它像这样在原blog post复制的结果：

(define numbers '("10" "9" "8" "7" "6" "5" "4" "3" "2" "1")) 
(define ops '("" "+" "-" "*" "/")) 
; beware: this takes around 35 minutes to finish in my laptop 
(define all-combinations (make-all-combinations numbers ops)) 
(show-n-expressions all-combinations 
        (build-list 5 (lambda (n) (+ n 2012))))

UPDATE：

我snarfed礼Barzilay的表达式求值，并将它插入我的解决方案，现在所有组合的预先计算都在5秒左右完成！ show-n-expressions程序仍然需要一些工作来避免每次迭代整个组合列表，但这只是读者的练习。重要的是，现在蛮力强调所有可能的表达组合的价值正在快速发展。

#lang racket 

(define (tuples lst n) 
    (if (zero? n) 
     '(()) 
     (for*/list ((y (in-list (tuples lst (sub1 n)))) 
        (x (in-list lst))) 
     (cons x y)))) 

(define (riffle numbers ops) 
    (if (null? ops) 
     (list (car numbers)) 
     (cons (car numbers) 
      (cons (car ops) 
        (riffle (cdr numbers) 
          (cdr ops)))))) 

(define (expression-string numbers optuple) 
    (string-append* 
    (map (lambda (x) 
      (cond ((eq? x '&) "") 
       ((symbol? x) (symbol->string x)) 
       ((number? x) (number->string x)))) 
     (riffle numbers optuple)))) 

(define eval-ops 
    (let ((precedence (make-hasheq 
        '((& . 3) (/ . 2) (* . 2) 
         (- . 1) (+ . 1) (#f . 0)))) 
     (apply-op (lambda (op x y) 
         (case op 
         ((+) (+ x y)) ((-) (- x y)) 
         ((*) (* x y)) ((/) (/ x y)) 
         ((&) (+ (* 10 x) y)))))) 
    (lambda (nums ops) 
     (let loop ((nums  (cddr nums)) 
       (ops  (cdr ops)) 
       (numstack (list (cadr nums) (car nums))) 
       (opstack (list (car ops)))) 
     (if (and (null? ops) (null? opstack)) 
      (car numstack) 
      (let ((op (and (pair? ops) (car ops))) 
        (topop (and (pair? opstack) (car opstack)))) 
       (if (> (hash-ref precedence op) 
        (hash-ref precedence topop)) 
        (loop (cdr nums) 
         (cdr ops) 
         (cons (car nums) numstack) 
         (cons op opstack)) 
        (loop nums 
         ops 
         (cons (apply-op topop (cadr numstack) (car numstack)) 
           (cddr numstack)) 
         (cdr opstack))))))))) 

(define (make-all-combinations numbers ops) 
    (foldl (lambda (optuple tail) 
      (cons (cons (eval-ops numbers optuple) optuple) tail)) 
     empty (tuples ops (sub1 (length numbers))))) 

(define (show-n-expressions all-combinations numbers years) 
    (for-each (lambda (year) 
       (for-each (lambda (comb) 
          (when (= (car comb) year) 
          (printf "~s ~a~n" 
            year 
            (expression-string numbers (cdr comb))))) 
         all-combinations) 
       (printf "~n")) 
      years))

使用方法如下：

(define numbers '(10 9 8 7 6 5 4 3 2 1)) 
(define ops '(& + - * /)) 
; this is very fast now! 
(define all-combinations (make-all-combinations numbers ops)) 
(show-n-expressions all-combinations numbers 
        (build-list 5 (lambda (n) (+ n 2012))))

来源

2012-04-15 22:42:20

+0

将是不错的有你作为一个替代的解决方案，所以我会等到你决定如何定义优先级的树。 – lifebalance 2012-04-16 12:45:43

+0

@lifebalance我会努力工作，但在几天内。我的工作一周现在开始......我会让你知道！ – 2012-04-16 14:38:10

+0

@lifebalance我无法抗拒诱惑:)我使用Eli的表情评估器更新了我的答案，现在它的速度非常快！ – 2012-04-17 03:54:24

3

这不是一个完整的答案，但我认为这是图书馆奥斯卡·洛佩斯是要求的替代品。不幸的是它在clojure，但希望它足够清楚...

(def default-priorities 
    {'+ 1, '- 1, '* 2, '/ 2, '& 3}) 

(defn- extend-tree [tree priorities operator value] 
    (if (seq? tree) 
    (let [[op left right] tree 
      [old new] (map priorities [op operator])] 
     (if (> new old) 
     (list op left (extend-tree right priorities operator value)) 
     (list operator tree value))) 
    (list operator tree value))) 

(defn priority-tree 
    ([operators values] (priority-tree operators values default-priorities)) 
    ([operators values priorities] (priority-tree operators values priorities nil)) 
    ([operators values priorities tree] 
    (if-let [operators (seq operators)] 
     (if tree 
     (recur 
      (rest operators) (rest values) priorities 
      (extend-tree tree priorities (first operators) (first values))) 
     (let [[v1 v2 & values] values] 
      (recur (rest operators) values priorities (list (first operators) v1 v2)))) 
     tree))) 

; [] [+ & *] [1 2 3 4] 1+23*4 
; [+ 1 2] [& *] [3 4] - initial tree 
; [+ 1 [& 2 3]] [*] [4] - binds more strongly than + so replace right-most node 
; [+ 1 [* [& 2 3] 4]] [] [] - descend until do not bind more tightly, and extend 

(println (priority-tree ['+ '& '*] [1 2 3 4])) ; 1+23*4 
(println (priority-tree ['& '- '* '+ '&] [1 2 3 4 5 6])) ; 12 - 3*4 + 56

输出为：

(+ 1 (* (& 2 3) 4)) 
(+ (- (& 1 2) (* 3 4)) (& 5 6))

[更新]添加以下

(defn & [a b] (+ b (* 10 a))) 

(defn all-combinations [tokens length] 
    (if (> length 0) 
    (for [token tokens 
      smaller (all-combinations tokens (dec length))] 
     (cons token smaller)) 
    [[]])) 

(defn all-expressions [operators digits] 
    (map #(priority-tree % digits) 
    (all-combinations operators (dec (count digits))))) 

(defn all-solutions [target operators digits] 
    (doseq [expression 
      (filter #(= (eval %) target) 
      (all-expressions operators digits))] 
    (println expression))) 

(all-solutions 2012 ['+ '- '* '/ '&] (range 10 0 -1))

解决了这个问题，但它是缓慢的 - 27分钟才能完成。这是一个不错的，相当新的笔记本电脑（i7-2640M）。

(+ (- (+ 10 (* 9 (& 8 7))) (& 6 5)) (* 4 (& (& 3 2) 1))) 
(+ (- (+ (+ (* (* 10 9) 8) 7) 6) 5) (* 4 (& (& 3 2) 1))) 
(- (- (+ (- (& 10 9) (* 8 7)) (* (& (& 6 5) 4) 3)) 2) 1)

（我只打印2012年 - 见上面的代码 - 但它会评估整个序列）。

所以，不幸的是，这并没有真正回答这个问题，因为它不比ÓscarLópez的代码快。我想下一步就是在评估中加入一些智慧，并节省一些时间。但是什么？

[更新2]这里读其他职位后我取代eval与

(defn my-eval [expr] 
    (if (seq? expr) 
    (let [[op left right] expr] 
     (case op 
     + (+ (my-eval left) (my-eval right)) 
     - (- (my-eval left) (my-eval right)) 
     * (* (my-eval left) (my-eval right)) 
     /(/ (my-eval left) (my-eval right)) 
     & (& (my-eval left) (my-eval right)))) 
    expr))

和运行时间降低到45秒。仍然不是很好，但它是一个非常低效的解析/评估。为了完整性，下面是分流码算法（简单的总是左联合）和相关的eval的实现，但是只将时间减少到35s。

(defn shunting-yard 
    ([operators values] (shunting-yard operators values default-priorities)) 
    ([operators values priorities] 
    (let [[value & values] values] 
     (shunting-yard operators values priorities nil (list value)))) 
    ([operators values priorities stack-ops stack-vals] 
; (println operators values stack-ops stack-vals) 
    (if-let [[new & short-operators] operators] 
     (let [[value & short-values] values] 
     (if-let [[old & short-stack-ops] stack-ops] 
      (if (> (priorities new) (priorities old)) 
      (recur short-operators short-values priorities (cons new stack-ops) (cons value stack-vals)) 
      (recur operators values priorities short-stack-ops (cons old stack-vals))) 
      (recur short-operators short-values priorities (list new) (cons value stack-vals)))) 
     (concat (reverse stack-vals) stack-ops)))) 

(defn stack-eval 
    ([stack] (stack-eval (rest stack) (list (first stack)))) 
    ([stack values] 
    (if-let [[op & stack] stack] 
     (let [[right left & tail] values] 
     (case op 
      + (recur stack (cons (+ left right) tail)) 
      - (recur stack (cons (- left right) tail)) 
      * (recur stack (cons (* left right) tail)) 
     /(recur stack (cons (/ left right) tail)) 
      & (recur stack (cons (& left right) tail)) 
      (recur stack (cons op values)))) 
     (first values))))

来源

2012-04-16 01:28:17

+0

+1在clojure做。你能发布你的代码的米奇鼠标基准吗？只是对clojure和球拍之间的相对表现有一个小想法会被痒痒 – lurscher 2012-04-16 02:35:06

+0

28m--见上文。 – 2012-04-16 03:15:58

+0

@andrewcooke我相信，如果我们可以在我的答案中插入你的'priority-tree'过程（请参阅上次编辑），它会比这更快。可悲的是我不能流畅地在clojure中自己做转换。 – 2012-04-16 03:19:54

4

正如Oscar指出的那样，问题在于soegaard/infix对于这类问题是缓慢的。

我发现在GitHub上缀表达式标准分流码解析器和球拍写了下面的程序：

#lang racket 
(require "infix-calc.scm") 

(define operators '("*" "/" "+" "-" "")) 
(time 
(for*/list ([o1 (in-list operators)] 
      [o2 (in-list operators)] 
      [o3 (in-list operators)] 
      [o4 (in-list operators)] 
      [o5 (in-list operators)] 
      [o6 (in-list operators)] 
      [o7 (in-list operators)] 
      [o8 (in-list operators)] 
      [o9 (in-list operators)] 
      [expr (in-value 
        (apply string-append 
         (list "1" o1 "2" o2 "3" o3 "4" o4 "5" o5 "6" o6 "7" o7 "8" o8 "9" o9 "10")))] 
      #:when (= (first (calc expr)) 2012)) 
expr))

了不到3分钟后的结果是：

Welcome to DrRacket, version 5.2.900.2--2012-03-29(8c22c6c/a) [3m]. 
Language: racket; memory limit: 128 MB. 
cpu time: 144768 real time: 148818 gc time: 25252 
'("1*2*3+4*567*8/9-10" 
    "1*2+34*56+7+89+10" 
    "1*23+45*6*7+89+10" 
    "1+2+3/4*5*67*8+9-10" 
    "1+2+3+4*567*8/9-10" 
    "1+2+34*56+7+8+9*10" 
    "1+23+45*6*7+8+9*10" 
    "1-2+345*6-7*8+9-10" 
    "12*34*5+6+7*8-9*10" 
    "12*34*5+6-7-8-9-10" 
    "1234+5-6+789-10")

中缀语法分析器由Andrew Levenson编写。解析器和上面的代码可以在这里找到：

https://github.com/soegaard/Scheme-Infix-Calculator

来源

2012-04-16 07:32:47 soegaard

+1

在这里soegaard/infix的主要原因是缓慢的，是在编译过程中使用scheme/infix的输出。有几种开销来源：首先解析器需要一个端口，所以在解析结果是一个语法对象（通常会被编译）之后，使用open-input-string将字符串转换为端口，但是这里给出了评估（并使用eval很慢）。 – soegaard 2012-04-16 07:40:35

+0

感谢您的洞察力。不幸的是，我只能在很多好的选择中选择一个答案！ – lifebalance 2012-04-17 03:15:56

3

有趣！我不得不尝试它，它是Python，希望你不介意。它运行在大约28秒时，PyPy 1.8，Core 2 Duo处理器1.4

from __future__ import division 
from math import log 
from operator import add, sub, mul 
div = lambda a, b: float(a)/float(b) 

years = set(range(2012, 2113)) 

none = lambda a, b: a * 10 ** (int(log(b, 10)) + 1) + b 
priority = {none: 3, mul: 2, div: 2, add: 1, sub: 1} 
symbols = {none: '', mul: '*', div: '/', add: '+', sub: '-', None: ''} 

def evaluate(numbers, operators): 
    ns, ops = [], [] 
    for n, op in zip(numbers, operators): 
     while ops and (op is None or priority[ops[-1]] >= priority[op]): 
      last_n = ns.pop() 
      last_op = ops.pop() 
      n = last_op(last_n, n) 
     ns.append(n) 
     ops.append(op) 
    return n 

def display(numbers, operators): 
    return ''.join([ 
     i for n, op in zip(numbers, operators) for i in (str(n), symbols[op])]) 

def expressions(years): 
    numbers = 10, 9, 8, 7, 6, 5, 4, 3, 2, 1 
    operators = none, add, sub, mul, div 
    pools = [operators] * (len(numbers) - 1) + [[None]] 
    result = [[]] 
    for pool in pools: 
     result = [x + [y] for x in result for y in pool] 
    for ops in result: 
     expression = evaluate(numbers, ops) 
     if expression in years: 
      yield '%d = %s' % (expression, display(numbers, ops)) 

for year in sorted(expressions(years)): 
    print year

来源

2012-04-16 21:09:03

+0

你能再检查一遍吗？对于2102，即使按照原始博客文章，也不存在组合。你的答案在win32上生成12个组合（至少在Python IDLE，Python 2.7.2（默认，2011年6月12日，14:24:46）[MSC v.1500 64 bit（AMD64）]：2102 = 10 * 9/8 * 765/4-3 + 2 * 1, 2102 = 10 * 9/8 * 765/4-3 + 2/1,2102 = 10 * 9/8 * 765/4-3/2 * 1,2102 = 10 * 9/8 * 765/4-3/2/1,2102 = 10-9 + 876/5 * 4 * 3 + 2-1,2102 = 10/9 * 876/5 * 4 * 3 + 2 * 1 2102 = 10/9 * 876/5 * 4 * 3 + 2/1 2102 = 10/9 + 876/5 * 4 * 3 + 2-1 2102 = 109 * 8 * 7/6 + 543 * 2-1 2102 = 109 * 87/6 + 543-21 2102 = 10987/6 + 543/2 * 1 2102 = 10987/6 + 543/2/1 – lifebalance 2012-04-21 18:04:01

+0

尝试“eval（）上面的组合，例如'eval（'10 -9 + 876/5 * 4 * 3 + 2-1'）'说'2012'。不知道为什么其他方法显示不同的结果。可能它与整数除法有关，不知道是否... – 2012-04-24 14:19:03

+0

好吧，我认为这是因为整数除法。用运算符import add，sub，mul'和'div = lambda a，b：float（a）/ float（b）'，用两行代替第二行'from operator import add，sub，mul，div'应该像其他方法一样工作。更多在这里：http://www.python.org/dev/peps/pep-0238/ – 2012-04-24 14:30:12

在尝试字节编译时提高球拍代码和错误的性能

回答

相关问题