我想除了标准功能之外)。计时时,应使用PyPy JIT,因为它的计时速度是cPython的 4-5倍。
from my_math import *
from math import log
from time import clock
from argparse import ArgumentParser
# Multiple Polynomial Quadratic Sieve
def mpqs(n, verbose=False):
if verbose:
time1 = clock()
root_n = isqrt(n)
root_2n = isqrt(n+n)
# formula chosen by experimentation
# seems to be close to optimal for n < 10^50
bound = int(5 * log(n, 10)**2)
prime = []
mod_root = []
log_p = []
num_prime = 0
# find a number of small primes for which n is a quadratic residue
p = 2
while p < bound or num_prime < 3:
# legendre (n|p) is only defined for odd p
if p > 2:
leg = legendre(n, p)
leg = n & 1
if leg == 1:
prime += [p]
mod_root += [int(mod_sqrt(n, p))]
log_p += [log(p, 10)]
num_prime += 1
elif leg == 0:
if verbose:
print 'trial division found factors:'
print p, 'x', n/p
return p
p = next_prime(p)
# size of the sieve
x_max = len(prime)*60
# maximum value on the sieved range
m_val = (x_max * root_2n) >> 1
# fudging the threshold down a bit makes it easier to find powers of primes as factors
# as well as partial-partial relationships, but it also makes the smoothness check slower.
# there's a happy medium somewhere, depending on how efficient the smoothness check is
thresh = log(m_val, 10) * 0.735
# skip small primes. they contribute very little to the log sum
# and add a lot of unnecessary entries to the table
# instead, fudge the threshold down a bit, assuming ~1/4 of them pass
min_prime = int(thresh*3)
fudge = sum(log_p[i] for i,p in enumerate(prime) if p < min_prime)/4
thresh -= fudge
if verbose:
print 'smoothness bound:', bound
print 'sieve size:', x_max
print 'log threshold:', thresh
print 'skipping primes less than:', min_prime
smooth = []
used_prime = set()
partial = {}
num_smooth = 0
num_used_prime = 0
num_partial = 0
num_poly = 0
root_A = isqrt(root_2n / x_max)
if verbose:
print 'sieving for smooths...'
while True:
# find an integer value A such that:
# A is =~ sqrt(2*n) / x_max
# A is a perfect square
# sqrt(A) is prime, and n is a quadratic residue mod sqrt(A)
while True:
root_A = next_prime(root_A)
leg = legendre(n, root_A)
if leg == 1:
elif leg == 0:
if verbose:
print 'dumb luck found factors:'
print root_A, 'x', n/root_A
return root_A
A = root_A * root_A
# solve for an adequate B
# B*B is a quadratic residue mod n, such that B*B-A*C = n
# this is unsolvable if n is not a quadratic residue mod sqrt(A)
b = mod_sqrt(n, root_A)
B = (b + (n - b*b) * mod_inv(b + b, root_A))%A
# B*B-A*C = n <=> C = (B*B-n)/A
C = (B*B - n) / A
num_poly += 1
# sieve for prime factors
sums = [0.0]*(2*x_max)
i = 0
for p in prime:
if p < min_prime:
i += 1
logp = log_p[i]
inv_A = mod_inv(A, p)
# modular root of the quadratic
a = int(((mod_root[i] - B) * inv_A)%p)
b = int(((p - mod_root[i] - B) * inv_A)%p)
k = 0
while k < x_max:
if k+a < x_max:
sums[k+a] += logp
if k+b < x_max:
sums[k+b] += logp
if k:
sums[k-a+x_max] += logp
sums[k-b+x_max] += logp
k += p
i += 1
# check for smooths
i = 0
for v in sums:
if v > thresh:
x = x_max-i if i > x_max else i
vec = set()
sqr = []
# because B*B-n = A*C
# (A*x+B)^2 - n = A*A*x*x+2*A*B*x + B*B - n
# = A*(A*x*x+2*B*x+C)
# gives the congruency
# (A*x+B)^2 = A*(A*x*x+2*B*x+C) (mod n)
# because A is chosen to be square, it doesn't need to be sieved
val = sieve_val = A*x*x + 2*B*x + C
if sieve_val < 0:
vec = set([-1])
sieve_val = -sieve_val
for p in prime:
while sieve_val%p == 0:
if p in vec:
# keep track of perfect square factors
# to avoid taking the sqrt of a gigantic number at the end
sqr += [p]
vec ^= set([p])
sieve_val = int(sieve_val / p)
if sieve_val == 1:
# smooth
smooth += [(vec, (sqr, (A*x+B), root_A))]
used_prime |= vec
elif sieve_val in partial:
# combine two partials to make a (xor) smooth
# that is, every prime factor with an odd power is in our factor base
pair_vec, pair_vals = partial[sieve_val]
sqr += list(vec & pair_vec) + [sieve_val]
vec ^= pair_vec
smooth += [(vec, (sqr + pair_vals[0], (A*x+B)*pair_vals[1], root_A*pair_vals[2]))]
used_prime |= vec
num_partial += 1
# save partial for later pairing
partial[sieve_val] = (vec, (sqr, A*x+B, root_A))
i += 1
num_smooth = len(smooth)
num_used_prime = len(used_prime)
if verbose:
print 100 * num_smooth / num_prime, 'percent complete\r',
if num_smooth > num_used_prime:
if verbose:
print '%d polynomials sieved (%d values)'%(num_poly, num_poly*x_max*2)
print 'found %d smooths (%d from partials) in %f seconds'%(num_smooth, num_partial, clock()-time1)
print 'solving for non-trivial congruencies...'
used_prime_list = sorted(list(used_prime))
# set up bit fields for gaussian elimination
masks = []
mask = 1
bit_fields = [0]*num_used_prime
for vec, vals in smooth:
masks += [mask]
i = 0
for p in used_prime_list:
if p in vec: bit_fields[i] |= mask
i += 1
mask <<= 1
# row echelon form
col_offset = 0
null_cols = []
for col in xrange(num_smooth):
pivot = col-col_offset == num_used_prime or bit_fields[col-col_offset] & masks[col] == 0
for row in xrange(col+1-col_offset, num_used_prime):
if bit_fields[row] & masks[col]:
if pivot:
bit_fields[col-col_offset], bit_fields[row] = bit_fields[row], bit_fields[col-col_offset]
pivot = False
bit_fields[row] ^= bit_fields[col-col_offset]
if pivot:
null_cols += [col]
col_offset += 1
# reduced row echelon form
for row in xrange(num_used_prime):
# lowest set bit
mask = bit_fields[row] & -bit_fields[row]
for up_row in xrange(row):
if bit_fields[up_row] & mask:
bit_fields[up_row] ^= bit_fields[row]
# check for non-trivial congruencies
for col in null_cols:
all_vec, (lh, rh, rA) = smooth[col]
lhs = lh # sieved values (left hand side)
rhs = [rh] # sieved values - n (right hand side)
rAs = [rA] # root_As (cofactor of lhs)
i = 0
for field in bit_fields:
if field & masks[col]:
vec, (lh, rh, rA) = smooth[i]
lhs += list(all_vec & vec) + lh
all_vec ^= vec
rhs += [rh]
rAs += [rA]
i += 1
factor = gcd(list_prod(rAs)*list_prod(lhs) - list_prod(rhs), n)
if factor != 1 and factor != n:
if verbose:
print 'none found.'
if verbose:
print 'factors found:'
print factor, 'x', n/factor
print 'time elapsed: %f seconds'%(clock()-time1)
return factor
if __name__ == "__main__":
parser =ArgumentParser(description='Uses a MPQS to factor a composite number')
parser.add_argument('composite', metavar='number_to_factor', type=long,
help='the composite number to factor')
parser.add_argument('--verbose', dest='verbose', action='store_true',
help="enable verbose output")
args = parser.parse_args()
if args.verbose:
mpqs(args.composite, args.verbose)
time1 = clock()
print mpqs(args.composite)
print 'time elapsed: %f seconds'%(clock()-time1)
# divide and conquer list product
def list_prod(a):
size = len(a)
if size == 1:
return a[0]
return list_prod(a[:size>>1]) * list_prod(a[size>>1:])
# greatest common divisor of a and b
def gcd(a, b):
while b:
a, b = b, a%b
return a
# modular inverse of a mod m
def mod_inv(a, m):
a = int(a%m)
x, u = 0, 1
while a:
x, u = u, x - (m/a)*u
m, a = a, m%a
return x
# legendre symbol (a|m)
# note: returns m-1 if a is a non-residue, instead of -1
def legendre(a, m):
return pow(a, (m-1) >> 1, m)
# modular sqrt(n) mod p
# p must be prime
def mod_sqrt(n, p):
a = n%p
if p%4 == 3:
return pow(a, (p+1) >> 2, p)
elif p%8 == 5:
v = pow(a << 1, (p-5) >> 3, p)
i = ((a*v*v << 1) % p) - 1
return (a*v*i)%p
elif p%8 == 1:
# Shank's method
q = p-1
e = 0
while q&1 == 0:
e += 1
q >>= 1
n = 2
while legendre(n, p) != p-1:
n += 1
w = pow(a, q, p)
x = pow(a, (q+1) >> 1, p)
y = pow(n, q, p)
r = e
while True:
if w == 1:
return x
v = w
k = 0
while v != 1 and k+1 < r:
v = (v*v)%p
k += 1
if k == 0:
return x
d = pow(y, 1 << (r-k-1), p)
x = (x*d)%p
y = (d*d)%p
w = (w*y)%p
r = k
else: # p == 2
return a
#integer sqrt of n
def isqrt(n):
c = n*4/3
d = c.bit_length()
a = d>>1
if d&1:
x = 1 << a
y = (x + (n >> a)) >> 1
x = (3 << a) >> 2
y = (x + (c >> a)) >> 1
if x != y:
x = y
y = (x + n/x) >> 1
while y < x:
x = y
y = (x + n/x) >> 1
return x
# strong probable prime
def is_sprp(n, b=2):
if n < 2: return False
d = n-1
s = 0
while d&1 == 0:
s += 1
d >>= 1
x = pow(b, d, n)
if x == 1 or x == n-1:
return True
for r in xrange(1, s):
x = (x * x)%n
if x == 1:
return False
elif x == n-1:
return True
return False
# lucas probable prime
# assumes D = 1 (mod 4), (D|n) = -1
def is_lucas_prp(n, D):
P = 1
Q = (1-D) >> 2
# n+1 = 2**r*s where s is odd
s = n+1
r = 0
while s&1 == 0:
r += 1
s >>= 1
# calculate the bit reversal of (odd) s
# e.g. 19 (10011) <=> 25 (11001)
t = 0
while s:
if s&1:
t += 1
s -= 1
t <<= 1
s >>= 1
# use the same bit reversal process to calculate the sth Lucas number
# keep track of q = Q**n as we go
U = 0
V = 2
q = 1
# mod_inv(2, n)
inv_2 = (n+1) >> 1
while t:
if t&1:
# U, V of n+1
U, V = ((U + V) * inv_2)%n, ((D*U + V) * inv_2)%n
q = (q * Q)%n
t -= 1
# U, V of n*2
U, V = (U * V)%n, (V * V - 2 * q)%n
q = (q * q)%n
t >>= 1
# double s until we have the 2**r*sth Lucas number
while r:
U, V = (U * V)%n, (V * V - 2 * q)%n
q = (q * q)%n
r -= 1
# primality check
# if n is prime, n divides the n+1st Lucas number, given the assumptions
return U == 0
# primes less than 212
small_primes = set([
2, 3, 5, 7, 11, 13, 17, 19, 23, 29,
31, 37, 41, 43, 47, 53, 59, 61, 67, 71,
73, 79, 83, 89, 97,101,103,107,109,113,
# pre-calced sieve of eratosthenes for n = 2, 3, 5, 7
indices = [
1, 11, 13, 17, 19, 23, 29, 31, 37, 41,
43, 47, 53, 59, 61, 67, 71, 73, 79, 83,
89, 97,101,103,107,109,113,121,127,131,
# distances between sieve values
offsets = [
10, 2, 4, 2, 4, 6, 2, 6, 4, 2, 4, 6,
6, 2, 6, 4, 2, 6, 4, 6, 8, 4, 2, 4,
2, 4, 8, 6, 4, 6, 2, 4, 6, 2, 6, 6,
4, 2, 4, 6, 2, 6, 4, 2, 4, 2,10, 2]
max_int = 2147483647
# an 'almost certain' primality check
def is_prime(n):
if n < 212:
return n in small_primes
for p in small_primes:
if n%p == 0:
return False
# if n is a 32-bit integer, perform full trial division
if n <= max_int:
i = 211
while i*i < n:
for o in offsets:
i += o
if n%i == 0:
return False
return True
# Baillie-PSW
# this is technically a probabalistic test, but there are no known pseudoprimes
if not is_sprp(n, 2): return False
# idea shamelessly stolen from Mathmatica
# if n is a 2-sprp and a 3-sprp, n is necessarily square-free
if not is_sprp(n, 3): return False
a = 5
s = 2
# if n is a perfect square, this will never terminate
while legendre(a, n) != n-1:
s = -s
a = s-a
return is_lucas_prp(n, a)
# next prime strictly larger than n
def next_prime(n):
if n < 2:
return 2
# first odd larger than n
n = (n + 1) | 1
if n < 212:
while True:
if n in small_primes:
return n
n += 2
# find our position in the sieve rotation via binary search
x = int(n%210)
s = 0
e = 47
m = 24
while m != e:
if indices[m] < x:
s = m
m = (s + e + 1) >> 1
e = m
m = (s + e) >> 1
i = int(n + (indices[m] - x))
# adjust offsets
offs = offsets[m:] + offsets[:m]
while True:
for o in offs:
if is_prime(i):
return i
i += o
样本I / O:
$ pypy mpqs.py --verbose 94968915845307373740134800567566911
smoothness bound: 6117
sieve size: 24360
log threshold: 14.3081031579
skipping primes less than: 47
sieving for smooths...
144 polynomials sieved (7015680 values)
found 405 smooths (168 from partials) in 0.513794 seconds
solving for non-trivial congruencies...
factors found:
216366620575959221 x 438925910071081891
time elapsed: 0.685765 seconds
$ pypy mpqs.py --verbose 523022617466601111760007224100074291200000001
smoothness bound: 9998
sieve size: 37440
log threshold: 15.2376302725
skipping primes less than: 59
sieving for smooths...
428 polynomials sieved (32048640 values)
found 617 smooths (272 from partials) in 1.912131 seconds
solving for non-trivial congruencies...
factors found:
14029308060317546154181 x 37280713718589679646221
time elapsed: 2.064387 seconds
$ pypy mpqs.py 94968915845307373740134800567566911
time elapsed: 0.630235 seconds
$ pypy mpqs.py 523022617466601111760007224100074291200000001
time elapsed: 1.886068 seconds
请注意,如果可以找到任何这样的x和d,则将立即导致n(不一定是质数)因子n,因为x + d和x-d都按定义将n除。由于允许潜在的微不足道的一致性,这种关系可以进一步减弱为以下形式:
因此,通常来说,如果我们找到两个等于mod n的完美平方,那么很可能我们可以直接产生n a la gcd(x±d,n)的因数。看起来很简单,对吧?
除非不是。如果我们预期在所有可能进行穷举搜索X,我们需要从[搜索整个范围√ Ñ,√(2N) ],它是稍微比全试除法较小,而且还需要昂贵的is_square
操作中的每个迭代确认d的值。除非预先知道ň具有因素非常接近√ ñ,审判部门很可能会更快。
这意味着在求解多项式mod p的根后-也就是说,您找到了一个x,使得y(x)≡0(mod p),ergo y可被p整除-然后您发现了一个无限数的x。这样,您可以筛选x的范围,确定y的较小素数,希望找到一些所有素数均较小的素数。这样的数字称为k-smooth,其中k是使用的最大素数。
但是,这种方法存在一些问题。并不是所有值X是足够的,其实这里只有极少数人的,是围绕√ ñ。较小的值将在很大程度上变为负数(由于-n项),而较大的值将变得太大,因此它们的素因数分解不可能仅由小素数组成。会有很多这样的x,但是除非要分解的复合数很小,否则很难找到足够的平滑度来进行分解。因此,对于更大的n,有必要筛选给定形式的多个多项式。
可以的 请注意,A和B实际上可以是任何整数值,并且数学仍然成立。我们需要做的就是选择一些随机值,求解多项式的根,然后筛分接近零的值。在这一点上,我们可以说它足够好:如果您在随机方向上扔了足够多的石头,那么早晚一定会打碎窗户的。
此外,如果选择A为理想正方形,则在筛选时可以忽略前导A项,从而得到更小的值和更平坦的曲线。对于这样的解决方案存在,Ñ必须是一个二次剩余 MOD √ 甲,其可立即通过计算被称为勒让德符号:
(N |√A)= 1。请注意,为了求解B,需要知道√A的完整素数分解(以取模平方根√n(mod√A)),这就是为什么通常选择√A为素数。
然后可以看出,如果,那么对于所有的值X ∈[ -M中,M ]:
然后问题就变成了找到向量v,使得vM = ⦳ (mod 2),其中⦳是空向量。也就是说,求解M的左零空间。这可以通过多种方式完成,最简单的方法是对M T执行高斯消除,用行xor代替行加法运算。这将导致许多空空间基向量,将它们的任何组合都将产生有效的解。
x的构造相当简单。它只是每个使用的y的Ax + B的乘积。d的构造稍微复杂一些。如果要取所有y的乘积,则最终将得到一个10千(如果不是10千)个数字的值,需要找到其平方根。这种计算是不切实际的昂贵的。相反,我们可以在筛选过程中跟踪的素数甚至权力,然后用与和XOR对非方形因素的向量运算来重建平方根。
但是,使用8980935344490257(86028157 * 104395301),我的脚本在家用计算机(2.61GHz AMD Phenom 9950)上管理的时间为25.963秒。比我的工作计算机快得多,在2.93GHz Core 2 Duo上工作将近31秒。
function getTime() {
$t = explode( ' ', microtime() );
$t = $t[1] + $t[0];
return $t;
function isDecimal($val){ return is_numeric($val) && floor($val) != $val;}
$start = getTime();
$semi_prime = 8980935344490257;
$slice = round(strlen($semi_prime)/2);
$max = (pow(10, ($slice))-1);
$i = 3;
echo "\nFactoring the semi-prime:\n$semi_prime\n\n";
while ($i < $max) {
$sec_factor = ($semi_prime/$i);
if (isDecimal($sec_factor) != 1) {
$mod_f = bcmod($i, 1);
$mod_s = bcmod($sec_factor, 1);
if ($mod_f == 0 && $mod_s == 0) {
echo "First factor = $i\n";
echo "Second factor = $sec_factor\n";
$xtime=round($end-$start,4).' seconds';
echo "\n$xtime\n";
$i += 2;
lcm(2, 3, 5, 7) == 210