13

任务仅仅是看您可以计算n选择n / 2（甚至n）比python中的内置函数快多少。当然，对于大n来说，这是一个相当大的数字，因此您应该输出数字的总和，而不是输出整数。例如，n = 100000答案为135702。因为n=1000000是1354815。

这是python代码：

from scipy.misc import comb
def sum_digits(n):
   r = 0
   while n:
       r, n = r + n % 10, n / 10
   return r
sum_digits(comb(n,n/2,exact=True))

你的分数是(highest n on your machine using your code)/(highest n on your machine using my code)。您的代码必须在60秒或更短时间内终止。

您的程序必须为所有偶数n提供正确的输出：2 <= n <=（您的最高n）

您不能使用任何内置代码或库来计算二项式系数或可以快速转换为二项式系数的值。

您可以使用任何选择的语言。

领先答案 当前领先的680.09领先答案是justhalf。

fastest-code number-theory

2

我们应该使用python还是选择的语言提交解决方案？

可以编写一个在现代计算机上执行此操作的例程，并将其n带入数百万个例程，而我怀疑Python函数会处理比n = 1e5不阻塞更强大的功能。

— COTO 2014年

@Alessandro您可以使用任何选择的语言。唯一的限制是您不能使用内置函数来计算系数。

2

是否允许阶乘函数？我认为不是因为可以将它们“迅速转换为二项式系数”（整个事情只是一个阶乘除以另一个阶乘平方），但是由于答案现在正在使用，因此清晰度会很好。

— Geobits 2014年

1

@Comintern：我已成功在1分钟内以2.87亿或35秒内以1.69亿复制了该参考点！:)

— justhalf 2014年

9

C ++（GMP）-（287,000,000 / 422,000）= 680.09

无耻地将xnor的Kummer定理和qwr的GMP相结合。 ~~甚至还不能接近Go解决方案，不确定为什么。~~

编辑：感谢Keith Randall的提醒，如果数字大小相似，则乘法运算会更快。我实现了多级乘法，类似于内存管理中的内存合并概念。结果令人印象深刻。以前需要51秒，现在只需0.5秒（即提高了100倍！！）

旧代码（n = 14,000,000）
在0.343秒内完成筛分
在51.929年代完成了计算二项式
完成0.901秒的求和
14000000：18954729

真正的0m53.194s
用户0m53.116s
sys 0m0.060s

新代码（n = 14,000,000）
在0.343秒内完成筛分
在0.552秒内完成计算二项式
完成总计0.902秒
14000000：18954729

真正的0m1.804s
用户0m1.776s
SYS 0分0.023秒

奔跑 n=287,000,000

在4.211秒内完成筛分
在17.934年代完成了计算二项式
在37.677秒内完成求和
287000000：388788354

真正的0m59.928s
用户0m58.759s
sys 0m1.116s

代码。编译-lgmp -lgmpxx -O3

#include <gmpxx.h>
#include <iostream>
#include <time.h>
#include <cstdio>

const int MAX=287000000;
const int PRIME_COUNT=15700000;

int primes[PRIME_COUNT], factors[PRIME_COUNT], count;
bool sieve[MAX];
int max_idx=0;

void run_sieve(){
    sieve[2] = true;
    primes[0] = 2;
    count = 1;
    for(int i=3; i<MAX; i+=2){
        sieve[i] = true;
    }
    for(int i=3; i<17000; i+=2){
        if(!sieve[i]) continue;
        for(int j = i*i; j<MAX; j+=i){
            sieve[j] = false;
        }
    }
    for(int i=3; i<MAX; i+=2){
        if(sieve[i]) primes[count++] = i;
    }
}

mpz_class sum_digits(mpz_class n){
    clock_t t = clock();
    char* str = mpz_get_str(NULL, 10, n.get_mpz_t());
    int result = 0;
    for(int i=0;str[i]>0;i++){
        result+=str[i]-48;
    }
    printf("Done summing in %.3fs\n", ((float)(clock()-t))/CLOCKS_PER_SEC);
    return result;
}

mpz_class nc2_fast(const mpz_class &x){
    clock_t t = clock();
    int prime;
    const unsigned int n = mpz_get_ui(x.get_mpz_t());
    const unsigned int n2 = n/2;
    unsigned int m;
    unsigned int digit;
    unsigned int carry=0;
    unsigned int carries=0;
    mpz_class result = 1;
    mpz_class prime_prods = 1;
    mpz_class tmp;
    mpz_class tmp_prods[32], tmp_prime_prods[32];
    for(int i=0; i<32; i++){
        tmp_prods[i] = (mpz_class)NULL;
        tmp_prime_prods[i] = (mpz_class)NULL;
    }
    for(int i=0; i< count; i++){
        prime = primes[i];
        carry=0;
        carries=0;
        if(prime > n) break;
        if(prime > n2){
            tmp = prime;
            for(int j=0; j<32; j++){
                if(tmp_prime_prods[j] == NULL){
                    tmp_prime_prods[j] = tmp;
                    break;
                } else {
                    mpz_mul(tmp.get_mpz_t(), tmp.get_mpz_t(), tmp_prime_prods[j].get_mpz_t());
                    tmp_prime_prods[j] = (mpz_class)NULL;
                }
            }
            continue;
        }
        m=n2;
        while(m>0){
            digit = m%prime;
            carry = (2*digit + carry >= prime) ? 1 : 0;
            carries += carry;
            m/=prime;
        }
        if(carries>0){
            tmp = 0;
            mpz_ui_pow_ui(tmp.get_mpz_t(), prime, carries);
            for(int j=0; j<32; j++){
                if(tmp_prods[j] == NULL){
                    tmp_prods[j] = tmp;
                    break;
                } else {
                    mpz_mul(tmp.get_mpz_t(), tmp.get_mpz_t(), tmp_prods[j].get_mpz_t());
                    tmp_prods[j] = (mpz_class)NULL;
                }
            }
        }
    }
    result = 1;
    prime_prods = 1;
    for(int j=0; j<32; j++){
        if(tmp_prods[j] != NULL){
            mpz_mul(result.get_mpz_t(), result.get_mpz_t(), tmp_prods[j].get_mpz_t());
        }
        if(tmp_prime_prods[j] != NULL){
            mpz_mul(prime_prods.get_mpz_t(), prime_prods.get_mpz_t(), tmp_prime_prods[j].get_mpz_t());
        }
    }
    mpz_mul(result.get_mpz_t(), result.get_mpz_t(), prime_prods.get_mpz_t());
    printf("Done calculating binom in %.3fs\n", ((float)(clock()-t))/CLOCKS_PER_SEC);
    return result;
}

int main(int argc, char* argv[]){
    const mpz_class n = atoi(argv[1]);
    clock_t t = clock();
    run_sieve();
    printf("Done sieving in %.3fs\n", ((float)(clock()-t))/CLOCKS_PER_SEC);
    std::cout << n << ": " << sum_digits(nc2_fast(n)) << std::endl;
    return 0;
}

— 正义的
source

2

如果两个操作数的大小相同，则乘法效率更高。你总是乘大数字乘小数字。如果反复将小数字成对组合，则可能会更快（但会占用更多内存）。

— 基思·兰德尔

哇，那有很大的不同。它成倍地快。我现在可以在35秒内达到1.69亿。

— justhalf 2014年

哇！您的代码不同部分在时间上的细分是什么？

我已经在回答中提出了。生成质数最多为4s n，计算中心二项式系数为18s，其余37s为将结果转换为字符串并累加数字。

— justhalf 2014年

1

我觉得这个答案应该对任何计算二项式系数的开源库都有帮助。我简直不敢相信别人有这么快的代码！

7

转到33.96 =（16300000/480000）

package main

import "math/big"

const n = 16300000

var (
    sieve     [n + 1]bool
    remaining [n + 1]int
    count     [n + 1]int
)

func main() {
    println("finding primes")
    for p := 2; p <= n; p++ {
        if sieve[p] {
            continue
        }
        for i := p * p; i <= n; i += p {
            sieve[i] = true
        }
    }

    // count net number of times each prime appears in the result.
    println("counting factors")
    for i := 2; i <= n; i++ {
        remaining[i] = i
    }
    for p := 2; p <= n; p++ {
        if sieve[p] {
            continue
        }

        for i := p; i <= n; i += p {
            for remaining[i]%p == 0 { // may have multiple factors of p
                remaining[i] /= p

                // count positive for n!
                count[p]++
                // count negative twice for ((n/2)!)^2
                if i <= n/2 {
                    count[p] -= 2
                }
            }
        }
    }

    // ignore all the trailing zeros
    count[2] -= count[5]
    count[5] = 0

    println("listing factors")
    var m []uint64
    for i := 0; i <= n; i++ {
        for count[i] > 0 {
            m = append(m, uint64(i))
            count[i]--
        }
    }

    println("grouping factors")
    m = group(m)

    println("multiplying")
    x := mul(m)

    println("converting to base 10")
    d := 0
    for _, c := range x.String() {
        d += int(c - '0')
    }
    println("sum of digits:", d)
}

// Return product of elements in a.
func mul(a []uint64) *big.Int {
    if len(a) == 1 {
        x := big.NewInt(0)
        x.SetUint64(a[0])
        return x
    }
    m := len(a) / 2
    x := mul(a[:m])
    y := mul(a[m:])
    x.Mul(x, y) // fast because x and y are about the same length
    return x
}

// return a slice whose members have the same product
// as the input slice, but hopefully shorter.
func group(a []uint64) []uint64 {
    var g []uint64
    r := uint64(1)
    b := 1
    for _, x := range a {
        c := bits(x)
        if b+c <= 64 {
            r *= x
            b += c
        } else {
            g = append(g, r)
            r = x
            b = c
        }
    }
    g = append(g, r)
    return g
}

// bits returns the number of bits in the representation of x
func bits(x uint64) int {
    n := 0
    for x != 0 {
        n++
        x >>= 1
    }
    return n
}

通过计算分子和分母中的所有素因子并消除匹配因子来工作。将剩菜剩饭乘以得到结果。

转换为基数10的时间超过了80％。必须有一种更好的方法来实现这一目标。

— 基思·兰德尔
source

对于需要在基数10中打印大量数字的问题，我通常会发现编写自己的BigInteger类（将数字存储在基数1E9〜2 ^ 30中）很有帮助。

— 彼得·泰勒

正如他们所说，您目前正在赢得一英里的胜利。

@PeterTaylor：我尝试过，但是在乘法代码中需要很多％1e9，这会使乘法变慢。

— 基思·兰德尔

6

Python 3（8.8 = 220万/ 25万）

这是用Python编写的，它不以速度闻名，因此您最好将其移植到另一种语言。

从此StackOverflow竞赛中提取的Prime生成器。

import numpy
import time

def primesfrom2to(n):
    """ Input n>=6, Returns a array of primes, 2 <= p < n """
    sieve = numpy.ones(n//3 + (n%6==2), dtype=numpy.bool)
    for i in range(1,int(n**0.5)//3+1):
        if sieve[i]:
            k=3*i+1|1
            sieve[       k*k/3     ::2*k] = False
            sieve[k*(k-2*(i&1)+4)/3::2*k] = False
    return numpy.r_[2,3,((3*numpy.nonzero(sieve)[0][1:]+1)|1)]

t0 = time.clock()

N=220*10**4
n=N//2

print("N = %d" % N)
print()

print("Generating primes.")
primes = primesfrom2to(N)

t1 = time.clock()
print ("Time taken: %f" % (t1-t0))

print("Computing product.")
product = 1

for p in primes:
    p=int(p)
    carries = 0 
    carry = 0

    if p>n:
        product*=p
        continue

    m=n

    #Count carries of n+n in base p as per Kummer's Theorem
    while m:
        digit = m%p
        carry = (2*digit + carry >= p)
        carries += carry
        m//=p

    if carries >0:
        for _ in range(carries):
            product *= p

    #print(p,carries,product)

t2 = time.clock()
print ("Time taken: %f" % (t2-t1))

print("Converting number to string.")

# digit_sum = 0
# result=product

# while result:
    # digit_sum+=result%10
    # result//=10

digit_sum = 0
digit_string = str(product)

t3 = time.clock()
print ("Time taken: %f" % (t3-t2))

print("Summing digits.")
for d in str(digit_string):digit_sum+=int(d)

t4 = time.clock()
print ("Time taken: %f" % (t4-t3))
print ()

print ("Total time: %f" % (t4-t0))
print()
print("Sum of digits = %d" % digit_sum)

该算法的主要思想是使用Kummer定理来获得二项式的素因式分解。对于每个素数，我们将学习其中最大的幂，该幂将答案除，然后将乘积乘以素的幂。这样，我们只需要为答案的素数分解中的每个素数乘一次即可。

显示时间分解的输出：

N = 2200000
Generating primes.
Time taken: 0.046408
Computing product.
Time taken: 17.931472
Converting number to string.
Time taken: 39.083390
Summing digits.
Time taken: 1.502393

Total time: 58.563664

Sum of digits = 2980107

令人惊讶的是，大部分时间都花在将数字转换为字符串以求其数字加和上。同样令人惊讶的是，即使必须将整个字符串保存在内存中，转换为字符串也要比从重复的%10和获取数字快得多//10。

生成素数的时间可以忽略不计（因此，复制现有代码不会感到不公平）。数字求和很快。实际的乘法需要三分之一的时间。

考虑到数字求和似乎是一个限制因素，也许一种通过将二进制/十进制转换捷径来将数字乘以十进制表示的算法可以节省总时间。

— 异或
source

这非常令人印象深刻，使您想知道为什么cpython不使用您的实现！

3

Java（分数22500/365000 = 0.062）

我在这台计算机上没有Python，所以如果有人能对此评分，我将不胜感激。如果没有，则必须等待。

此实现的基础是

(\binom{2 n}{n}) = \sum_{k = 0}^{n} {(\binom{n}{k})}^{2}

$\binom{2n}{n} = \sum_{k=0}^n \binom{n}{k}^2$

瓶颈是计算Pascal三角形的相关部分（运行时间的90％）的附加项，因此使用更好的乘法算法实际上并没有帮助。

请注意，问题称为n我所说的2n。命令行参数是问题所调用的n。

public class CodeGolf37270 {
    public static void main(String[] args) {
        if (args.length != 1) {
            System.err.println("Usage: java CodeGolf37270 <n>");
            System.exit(1);
        }

        int two_n = Integer.parseInt(args[0]);
        // \binom{2n}{n} = \sum_{k=0}^n \binom{n}{k}^2
        // Two cases:
        //   n = 2m: \binom{4m}{2m} = \binom{2m}{m}^2 + 2\sum_{k=0}^{m-1} \binom{2m}{k}^2
        //   n = 2m+1: \binom{4m+2}{2m+1} = 2\sum_{k=0}^{m} \binom{2m+1}{k}^2
        int n = two_n / 2;
        BigInt[] nCk = new BigInt[n/2 + 1];
        nCk[0] = new BigInt(1);
        for (int k = 1; k < nCk.length; k++) nCk[k] = nCk[0];
        for (int row = 2; row <= n; row++) {
            BigInt tmp = nCk[0];
            for (int col = 1; col < row && col < nCk.length; col++) {
                BigInt replacement = tmp.add(nCk[col]);
                tmp = nCk[col];
                nCk[col] = replacement;
            }
        }

        BigInt central = nCk[0]; // 1^2 = 1
        int lim = (n & 1) == 1 ? nCk.length : (nCk.length - 1);
        for (int k = 1; k < lim; k++) central = central.add(nCk[k].sq());
        central = central.add(central);
        if ((n & 1) == 0) central = central.add(nCk[nCk.length - 1].sq());

        System.out.println(central.digsum());
    }

    private static class BigInt {
        static final int B = 1000000000;
        private int[] val;

        public BigInt(int x) {
            val = new int[] { x };
        }

        private BigInt(int[] val) {
            this.val = val;
        }

        public BigInt add(BigInt that) {
            int[] left, right;
            if (val.length < that.val.length) {
                left = that.val;
                right = val;
            }
            else {
                left = val;
                right = that.val;
            }

            int[] sum = left.clone();
            int carry = 0, k = 0;
            for (; k < right.length; k++) {
                int a = sum[k] + right[k] + carry;
                sum[k] = a % B;
                carry = a / B;
            }
            while (carry > 0 && k < sum.length) {
                int a = sum[k] + carry;
                sum[k] = a % B;
                carry = a / B;
                k++;
            }
            if (carry > 0) {
                int[] wider = new int[sum.length + 1];
                System.arraycopy(sum, 0, wider, 0, sum.length);
                wider[sum.length] = carry;
                sum = wider;
            }

            return new BigInt(sum);
        }

        public BigInt sq() {
            int[] rv = new int[2 * val.length];
            // Naive multiplication
            for (int i = 0; i < val.length; i++) {
                for (int j = i; j < val.length; j++) {
                    int k = i+j;
                    long c = val[i] * (long)val[j];
                    if (j > i) c <<= 1;
                    while (c > 0) {
                        c += rv[k];
                        rv[k] = (int)(c % B);
                        c /= B;
                        k++;
                    }
                }
            }

            int len = rv.length;
            while (len > 1 && rv[len - 1] == 0) len--;
            if (len < rv.length) {
                int[] rv2 = new int[len];
                System.arraycopy(rv, 0, rv2, 0, len);
                rv = rv2;
            }

            return new BigInt(rv);
        }

        public long digsum() {
            long rv = 0;
            for (int i = 0; i < val.length; i++) {
                int x = val[i];
                while (x > 0) {
                    rv += x % 10;
                    x /= 10;
                }
            }
            return rv;
        }
    }
}

— 彼得·泰勒
source

我为您的程序获得29,500，为参考程序获得440,000，因此，您的得分将为0.067。它使用Java 1.7（javac CodeGolf37270.java）进行编译，并使用Java 1.8（java CodeGolf37270 n）执行。我不确定是否有我不知道的优化选项。我无法尝试使用Java 1.8进行编译，因为它没有随Java包一起安装...

— Dennis

有趣的方法。您为什么认为迭代计算比使用简单公式更快？

— justhalf 2014年

@justhalf，我对是否会更快没有直觉，也没有尝试进行复杂度计算。我浏览了中央二项式系数的身份列表，试图找到使用为提取基10位数而优化的自定义大整数类易于实现的公式。并且发现它不是很有效之后，我不妨将其发布并保存其他人以免重复实验。（FWIW我正在研究Toom乘法，但是不确定何时进行测试和调试）。

— 彼得·泰勒

2

GMP-1500000/300000 = 5.0

尽管此答案无法与筛子竞争，但有时短代码仍然可以得到结果。

#include <gmpxx.h>
#include <iostream>

mpz_class sum_digits(mpz_class n)
{
    char* str = mpz_get_str(NULL, 10, n.get_mpz_t());
    int result = 0;
    for(int i=0; str[i]>0; i++)

    result += str[i] - 48;

    return result;
}


mpz_class comb_2(const mpz_class &x)
{
    const unsigned int k = mpz_get_ui(x.get_mpz_t()) / 2;
    mpz_class result = k + 1;

    for(int i=2; i<=k; i++)
    {
        result *= k + i;
        mpz_divexact_ui(result.get_mpz_t(), result.get_mpz_t(), i);
    }

    return result;
}

int main()
{
    const mpz_class n = 1500000;
    std::cout << sum_digits(comb_2(n)) << std::endl;

    return 0;
}

— wr
source

2

Java，自定义大整数类：32.9（120000000/365000）

主类非常简单：

import java.util.*;

public class PPCG37270 {
    public static void main(String[] args) {
        long start = System.nanoTime();

        int n = 12000000;
        if (args.length == 1) n = Integer.parseInt(args[0]);

        boolean[] sieve = new boolean[n + 1];
        int[] remaining = new int[n + 1];
        int[] count = new int[n + 1];

        for (int p = 2; p <= n; p++) {
            if (sieve[p]) continue;
            long p2 = p * (long)p;
            if (p2 > n) continue;
            for (int i = (int)p2; i <= n; i += p) sieve[i] = true;
        }

        for (int i = 2; i <= n; i++) remaining[i] = i;
        for (int p = 2; p <= n; p++) {
            if (sieve[p]) continue;
            for (int i = p; i <= n; i += p) {
                while (remaining[i] % p == 0) {
                    remaining[i] /= p;
                    count[p]++;
                    if (i <= n/2) count[p] -= 2;
                }
            }
        }

        count[2] -= count[5];
        count[5] = 0;

        List<BigInt> partialProd = new ArrayList<BigInt>();
        long accum = 1;
        for (int i = 2; i <= n; i++) {
            for (int j = count[i]; j > 0; j--) {
                long tmp = accum * i;
                if (tmp < 1000000000L) accum = tmp;
                else {
                    partialProd.add(new BigInt((int)accum));
                    accum = i;
                }
            }
        }
        partialProd.add(new BigInt((int)accum));
        System.out.println(prod(partialProd).digsum());
        System.out.println((System.nanoTime() - start) / 1000000 + "ms");
    }

    private static BigInt prod(List<BigInt> vals) {
        while (vals.size() > 1) {
            int n = vals.size();
            List<BigInt> next = new ArrayList<BigInt>();
            for (int i = 0; i < n; i += 2) {
                if (i == n - 1) next.add(vals.get(i));
                else next.add(vals.get(i).mul(vals.get(i+1)));
            }
            vals = next;
        }
        return vals.get(0);
    }
}

它依赖于为乘法而优化的大整数类toString()，两者都是使用java.math.BigInteger。实现的重要瓶颈。

/**
 * A big integer class which is optimised for conversion to decimal.
 * For use in simple applications where BigInteger.toString() is a bottleneck.
 */
public class BigInt {
    // The base of the representation.
    private static final int B = 1000000000;
    // The number of decimal digits per digit of the representation.
    private static final int LOG10_B = 9;

    public static final BigInt ZERO = new BigInt(0);
    public static final BigInt ONE = new BigInt(1);

    // We use sign-magnitude representation.
    private final boolean negative;

    // Least significant digit is at val[off]; most significant is at val[off + len - 1]
    // Unless len == 1 we guarantee that val[off + len - 1] is non-zero.
    private final int[] val;
    private final int off;
    private final int len;

    // Toom-style multiplication parameters from
    // Zuras, D. (1994). More on squaring and multiplying large integers. IEEE Transactions on Computers, 43(8), 899-908.
    private static final int[][][] Q = new int[][][]{
        {},
        {},
        {{1, -1}},
        {{4, 2, 1}, {1, 1, 1}, {1, 2, 4}},
        {{8, 4, 2, 1}, {-8, 4, -2, 1}, {1, 1, 1, 1}, {1, -2, 4, -8}, {1, 2, 4, 8}}
    };
    private static final int[][][] R = new int[][][]{
        {},
        {},
        {{1, -1, 1}},
        {{-21, 2, -12, 1, -6}, {7, -1, 10, -1, 7}, {-6, 1, -12, 2, -21}},
        {{-180, 6, 2, -80, 1, 3, -180}, {-510, 4, 4, 0, -1, -1, 120}, {1530, -27, -7, 680, -7, -27, 1530}, {120, -1, -1, 0, 4, 4, -510}, {-180, 3, 1, -80, 2, 6, -180}}
    };
    private static final int[][] S = new int[][]{
        {},
        {},
        {1, 1, 1},
        {1, 6, 2, 6, 1},
        {1, 180, 120, 360, 120, 180, 1}
    };

    /**
     * Constructs a big version of an integer value.
     * @param x The value to represent.
     */
    public BigInt(int x) {
        this(Integer.toString(x));
    }

    /**
     * Constructs a big version of a long value.
     * @param x The value to represent.
     */
    public BigInt(long x) {
        this(Long.toString(x));
    }

    /**
     * Parses a decimal representation of an integer.
     * @param str The value to represent.
     */
    public BigInt(String str) {
        this(str.charAt(0) == '-', split(str));
    }

    /**
     * Constructs a sign-magnitude representation taking the entire span of the array as the range of interest.
     * @param neg Is the value negative?
     * @param val The base-B digits, least significant first.
     */
    private BigInt(boolean neg, int[] val) {
        this(neg, val, 0, val.length);
    }

    /**
     * Constructs a sign-magnitude representation taking a range of an array as the magnitude.
     * @param neg Is the value negative?
     * @param val The base-B digits, least significant at offset off, most significant at off + val - 1.
     * @param off The offset within the array.
     * @param len The number of base-B digits.
     */
    private BigInt(boolean neg, int[] val, int off, int len) {
        // Bounds checks
        if (val == null) throw new IllegalArgumentException("val");
        if (off < 0 || off >= val.length) throw new IllegalArgumentException("off");
        if (len < 1 || off + len > val.length) throw new IllegalArgumentException("len");

        this.negative = neg;
        this.val = val;
        this.off = off;
        // Enforce the invariant that this.len is 1 or val[off + len - 1] is non-zero.
        while (len > 1 && val[off + len - 1] == 0) len--;
        this.len = len;

        // Sanity check
        for (int i = 0; i < len; i++) {
            if (val[off + i] < 0) throw new IllegalArgumentException("val contains negative digits");
        }
    }

    /**
     * Splits a string into base-B digits.
     * @param str The string to parse.
     * @return An array which can be passed to the (boolean, int[]) constructor.
     */
    private static int[] split(String str) {
        if (str.charAt(0) == '-') str = str.substring(1);

        int[] arr = new int[(str.length() + LOG10_B - 1) / LOG10_B];
        int i, off;
        // Each element of arr represents LOG10_B characters except (probably) the last one.
        for (i = 0, off = str.length() - LOG10_B; off > 0; off -= LOG10_B) {
            arr[i++] = Integer.parseInt(str.substring(off, off + LOG10_B));
        }
        arr[i] = Integer.parseInt(str.substring(0, off + LOG10_B));
        return arr;
    }

    public boolean isZero() {
        return len == 1 && val[off] == 0;
    }

    public BigInt negate() {
        return new BigInt(!negative, val, off, len);
    }

    public BigInt add(BigInt that) {
        // If the signs differ, then since we use sign-magnitude representation we want to do a subtraction.
        boolean isSubtraction = negative ^ that.negative;

        BigInt left, right;
        if (len < that.len) {
            left = that;
            right = this;
        }
        else {
            left = this;
            right = that;

            // For addition I just care about the lengths of the arrays.
            // For subtraction I want the largest absolute value on the left.
            if (isSubtraction && len == that.len) {
                int cmp = compareAbsolute(that);
                if (cmp == 0) return ZERO; // Cheap special case
                if (cmp < 0) {
                    left = that;
                    right = this;
                }
            }
        }

        if (right.isZero()) return left;

        BigInt result;
        if (!isSubtraction) {
            int[] sum = new int[left.len + 1];
            // A copy here rather than using left.val in the main loops and copying remaining values
            // at the end gives a small performance boost, probably due to cache locality.
            System.arraycopy(left.val, left.off, sum, 0, left.len);

            int carry = 0, k = 0;
            for (; k < right.len; k++) {
                int a = sum[k] + right.val[right.off + k] + carry;
                sum[k] = a % B;
                carry = a / B;
            }
            for (; carry > 0 && k < left.len; k++) {
                int a = sum[k] + carry;
                sum[k] = a % B;
                carry = a / B;
            }
            sum[left.len] = carry;

            result = new BigInt(negative, sum);
        }
        else {
            int[] diff = new int[left.len];
            System.arraycopy(left.val, left.off, diff, 0, left.len);

            int carry = 0, k = 0;
            for (; k < right.len; k++) {
                int a = diff[k] - right.val[right.off + k] + carry;
                // Why did anyone ever think that rounding positive and negative divisions differently made sense?
                if (a < 0) {
                    diff[k] = a + B;
                    carry = -1;
                }
                else {
                    diff[k] = a % B;
                    carry = a / B;
                }
            }
            for (; carry != 0 && k < left.len; k++) {
                int a = diff[k] + carry;
                if (a < 0) {
                    diff[k] = a + B;
                    carry = -1;
                }
                else {
                    diff[k] = a % B;
                    carry = a / B;
                }
            }

            result = new BigInt(left.negative, diff, 0, k > left.len ? k : left.len);
        }

        return result;
    }

    private int compareAbsolute(BigInt that) {
        if (len > that.len) return 1;
        if (len < that.len) return -1;

        for (int i = len - 1; i >= 0; i--) {
            if (val[off + i] > that.val[that.off + i]) return 1;
            if (val[off + i] < that.val[that.off + i]) return -1;
        }

        return 0;
    }

    public BigInt mul(BigInt that) {
        if (isZero() || that.isZero()) return ZERO;

        if (len == 1) return that.mulSmall(negative ? -val[off] : val[off]);
        if (that.len == 1) return mulSmall(that.negative ? -that.val[that.off] : that.val[that.off]);

        int shorter = len < that.len ? len : that.len;
        BigInt result;
        // Cutoffs have been hand-tuned.
        if (shorter > 300) result = mulToom(3, that);
        else if (shorter > 28) result = mulToom(2, that);
        else result = mulNaive(that);

        return result;
    }

    BigInt mulSmall(int m) {
        if (m == 0) return ZERO;
        if (m == 1) return this;
        if (m == -1) return negate();

        // We want to do the magnitude calculation with a positive multiplicand.
        boolean neg = negative;
        if (m < 0) {
            neg = !neg;
            m = -m;
        }

        int[] pr = new int[len + 1];
        int carry = 0;
        for (int i = 0; i < len; i++) {
            long t = val[off + i] * (long)m + carry;
            pr[i] = (int)(t % B);
            carry = (int)(t / B);
        }
        pr[len] = carry;
        return new BigInt(neg, pr);
    }

    // NB This truncates.
    BigInt divSmall(int d) {
        if (d == 0) throw new ArithmeticException();
        if (d == 1) return this;
        if (d == -1) return negate();

        // We want to do the magnitude calculation with a positive divisor.
        boolean neg = negative;
        if (d < 0) {
            neg = !neg;
            d = -d;
        }

        int[] div = new int[len];
        int rem = 0;
        for (int i = len - 1; i >= 0; i--) {
            long t = val[off + i] + rem * (long)B;
            div[i] = (int)(t / d);
            rem = (int)(t % d);
        }

        return new BigInt(neg, div);
    }

    BigInt mulNaive(BigInt that) {
        int[] rv = new int[len + that.len];
        // Naive multiplication
        for (int i = 0; i < len; i++) {
            for (int j = 0; j < that.len; j++) {
                int k = i + j;
                long c = val[off + i] * (long)that.val[that.off + j];
                while (c > 0) {
                    c += rv[k];
                    rv[k] = (int)(c % B);
                    c /= B;
                    k++;
                }
            }
        }

        return new BigInt(this.negative ^ that.negative, rv);
    }

    private BigInt mulToom(int k, BigInt that) {
        // We split each number into k parts of m base-B digits each.
        // m = ceil(longer / k)
        int m = ((len > that.len ? len : that.len) + k - 1) / k;

        // Perform the splitting and evaluation steps of Toom-Cook.
        BigInt[] f1 = this.toomFwd(k, m);
        BigInt[] f2 = that.toomFwd(k, m);

        // Pointwise multiplication.
        for (int i = 0; i < f1.length; i++) f1[i] = f1[i].mul(f2[i]);

        // Inverse (or interpolation) and recomposition.
        return toomBk(k, m, f1, negative ^ that.negative, val[off], that.val[that.off]);
    }

    // Splits a number into k parts of m base-B digits each and does the polynomial evaluation.
    private BigInt[] toomFwd(int k, int m) {
        // Split.
        BigInt[] a = new BigInt[k];
        for (int i = 0; i < k; i++) {
            int o = i * m;
            if (o >= len) a[i] = ZERO;
            else {
                int l = m;
                if (o + l > len) l = len - o;
                // Ignore signs for now.
                a[i] = new BigInt(false, val, off + o, l);
            }
        }

        // Evaluate
        return transform(Q[k], a);
    }

    private BigInt toomBk(int k, int m, BigInt[] f, boolean neg, int lsd1, int lsd2) {
        // Inverse (or interpolation).
        BigInt[] b = transform(R[k], f);

        // Recomposition: add at suitable offsets, dividing by the normalisation factors
        BigInt prod = ZERO;
        int[] s = S[k];
        for (int i = 0; i < b.length; i++) {
            int[] shifted = new int[i * m + b[i].len];
            System.arraycopy(b[i].val, b[i].off, shifted, i * m, b[i].len);
            prod = prod.add(new BigInt(neg ^ b[i].negative, shifted).divSmall(s[i]));
        }

        // Handle the remainders.
        // In the worst case the absolute value of the sum of the remainders is s.length, so pretty small.
        // It should be easy enough to work out whether to go up or down.
        int lsd = (int)((lsd1 * (long)lsd2) % B);
        int err = lsd - prod.val[prod.off];
        if (err > B / 2) err -= B / 2;
        if (err < -B / 2) err += B / 2;
        return prod.add(new BigInt(err));
    }

    /**
     * Multiplies a matrix of small integers and a vector of big ones.
     * The matrix has a implicit leading row [1 0 ... 0] and an implicit trailing row [0 ... 0 1].
     * @param m The matrix.
     * @param v The vector.
     * @return m v
     */
    private BigInt[] transform(int[][] m, BigInt[] v) {
        BigInt[] b = new BigInt[m.length + 2];
        b[0] = v[0];
        for (int i = 0; i < m.length; i++) {
            BigInt s = ZERO;
            for (int j = 0; j < m[i].length; j++) s = s.add(v[j].mulSmall(m[i][j]));
            b[i + 1] = s;
        }
        b[b.length - 1] = v[v.length - 1];

        return b;
    }

    /**
     * Sums the digits of this integer.
     * @return The sum of the digits of this integer.
     */
    public long digsum() {
        long rv = 0;
        for (int i = 0; i < len; i++) {
            int x = val[off + i];
            while (x > 0) {
                rv += x % 10;
                x /= 10;
            }
        }
        return rv;
    }
}

最大的瓶颈是朴素的乘法（60％），其次是其他乘法（37％）和筛分（3％）。这个digsum()电话微不足道。

使用OpenJDK 7（64位）测量的性能。

— 彼得·泰勒
source

非常好。谢谢。

1

Python 2（PyPy），1,134,000 / 486,000 = 2.32

#/!usr/bin/pypy
n=input(); a, b, c=1, 1, 2**((n+2)/4)
for i in range(n-1, n/2, -2): a*=i
for i in range(2, n/4+1): b*=i
print sum(map(int, str(a*c/b)))

结果：1,537,506

有趣的事实：代码的瓶颈在于添加数字，而不是计算二项式系数。

— 丹尼斯
source

为什么python添加数字这么慢？您和xnor都说是。这让我感到好奇，所以我给我打了个钟。总和（Java）不到一秒钟。

— Geobits 2014年

@Geobits嗯，很好奇。Java是否也可以同样快速地进行二进制十进制转换？它确实以二进制表示整数，对不对？

— xnor 2014年

这是个好问题。对于整数/整数/长整数/长整数，我知道它是二进制的。我不确定BigInteger的内部表示形式是什么。如果是小数，那肯定可以解释为什么数学运算速度慢，但转换为字符串速度却很快。明天可能会查找。

— Geobits 2014年

@ Geobits，BigInteger的内部表示形式是2。–

— Peter Taylor

我一直都这么认为，但这让我感到奇怪。看起来至少要在OpenJDK中将其分解为大块，然后以这种方式进行转换。

— Geobits 2014年

1

爪哇（2,020,000 / 491,000）= 4.11

已更新，以前是2.24

Java BigInteger不是最快的数字处理器，但是总比没有好。

这个的基本公式似乎是n! / ((n/2)!^2)，但这似乎是一堆多余的乘法。

通过消除分子和分母中发现的所有素数，可以显着提高速度。为此，我首先运行一个简单的主筛。然后，对于每个素数，我都会计算需要提高的能力。每当我看到分子中有一个因数时，就增加，而分母则减少。

我将二（和第一）分开处理，因为在进行分解之前很容易计算/消除二。

完成此操作后，您将具有所需的最少乘法量，这很不错，因为BigInt乘法很慢。

import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;

public class CentBiCo {
    public static void main(String[] args) {
        int n = 2020000;
        long time = System.currentTimeMillis();
        sieve(n);
        System.out.println(sumDigits(cbc(n)));
        System.out.println(System.currentTimeMillis()-time);
    }

    static boolean[] sieve;
    static List<Integer> primes;
    static void sieve(int n){
        primes = new ArrayList<Integer>((int)(Math.sqrt(n)));
        sieve = new boolean[n];
        sieve[2]=true;
        for(int i=3;i<sieve.length;i+=2)
            if(i%2==1)
                sieve[i] = true;
        for(int i=3;i<sieve.length;i+=2){
            if(!sieve[i])
                continue;
            for(int j=i*2;j<sieve.length;j+=i)
                sieve[j] = false;
        }
        for(int i=2;i<sieve.length;i++)
            if(sieve[i])
                primes.add(i);
    }

    static int[] factors;
    static void addFactors(int n, int flip){
        for(int k=0;primes.get(k)<=n;){
            int i = primes.get(k);
            if(n%i==0){
                factors[i] += flip;
                n /= i;
            } else {
                if(++k == primes.size())
                    break;
            }
        }
        factors[n]++;
    }

    static BigInteger cbc(int n){
        factors = new int[n+1];
        int x = n/2;
        for(int i=x%2<1?x+1:x+2;i<n;i+=2)
            addFactors(i,1);
        factors[2] = x;
        for(int i=1;i<=x/2;i++){
            int j=i;
            while(j%2<1 && factors[2] > 1){
                j=j/2;
                factors[2]--;
            }
            addFactors(j,-1);
            factors[2]--;
        }
        BigInteger cbc = BigInteger.ONE;
        for(int i=3;i<factors.length;i++){
            if(factors[i]>0)
                cbc = cbc.multiply(BigInteger.valueOf(i).pow(factors[i]));
        }
        return cbc.shiftLeft(factors[2]);
    }

    static long sumDigits(BigInteger in){
        long sum = 0;
        String str = in.toString();
        for(int i=0;i<str.length();i++)
            sum += str.charAt(i)-'0';
        return sum;
    }
}

哦，2735298出于验证目的，n = 2020000的输出总和是。

— 地位
source

中心二项式系数的数字总和

C ++（GMP）-（287,000,000 / 422,000）= 680.09

转到33.96 =（16300000/480000）

Python 3（8.8 = 220万/ 25万）

Java（分数22500/365000 = 0.062）

GMP-1500000/300000 = 5.0

Java，自定义大整数类：32.9（120000000/365000）

Python 2（PyPy），1,134,000 / 486,000 = 2.32

结果：1,537,506

爪哇（2,020,000 / 491,000）= 4.11