使用SAT求解器(Python)查找特定区域内的游离多氨基酸的所有组合


15

我是SAT求解器领域的新手,并且需要有关以下问题的一些指导。

考虑到:

❶我选择了4 * 4网格中的14个相邻单元格

❷我有5个大小分别为4、2、5、2和1的多氨基酸(A,B,C,D,E)

poly这些多氨基酸是自由的,即它们的形状不固定,可以形成不同的图案

在此处输入图片说明

如何使用SAT求解器计算选定区域内(灰色单元格)的这5个自由多聚氨基酸的所有可能组合

借用@spinkus有洞察力的答案和OR工具文档,我可以编写以下示例代码(在Jupyter Notebook中运行):

from ortools.sat.python import cp_model

import numpy as np
import more_itertools as mit
import matplotlib.pyplot as plt
%matplotlib inline


W, H = 4, 4 #Dimensions of grid
sizes = (4, 2, 5, 2, 1) #Size of each polyomino
labels = np.arange(len(sizes))  #Label of each polyomino

colors = ('#FA5454', '#21D3B6', '#3384FA', '#FFD256', '#62ECFA')
cdict = dict(zip(labels, colors)) #Color dictionary for plotting

inactiveCells = (0, 1) #Indices of disabled cells (in 1D)
activeCells = set(np.arange(W*H)).difference(inactiveCells) #Cells where polyominoes can be fitted
ranges = [(next(g), list(g)[-1]) for g in mit.consecutive_groups(activeCells)] #All intervals in the stack of active cells



def main():
    model = cp_model.CpModel()


    #Create an Int var for each cell of each polyomino constrained to be within Width and Height of grid.
    pminos = [[] for s in sizes]
    for idx, s in enumerate(sizes):
        for i in range(s):
            pminos[idx].append([model.NewIntVar(0, W-1, 'p%i'%idx + 'c%i'%i + 'x'), model.NewIntVar(0, H-1, 'p%i'%idx + 'c%i'%i + 'y')])



    #Define the shapes by constraining the cells relative to each other

    ## 1st polyomino -> tetromino ##
    #                              #      
    #                              # 
    #            #                 # 
    #           ###                # 
    #                              # 
    ################################

    p0 = pminos[0]
    model.Add(p0[1][0] == p0[0][0] + 1) #'x' of 2nd cell == 'x' of 1st cell + 1
    model.Add(p0[2][0] == p0[1][0] + 1) #'x' of 3rd cell == 'x' of 2nd cell + 1
    model.Add(p0[3][0] == p0[0][0] + 1) #'x' of 4th cell == 'x' of 1st cell + 1

    model.Add(p0[1][1] == p0[0][1]) #'y' of 2nd cell = 'y' of 1st cell
    model.Add(p0[2][1] == p0[1][1]) #'y' of 3rd cell = 'y' of 2nd cell
    model.Add(p0[3][1] == p0[1][1] - 1) #'y' of 3rd cell = 'y' of 2nd cell - 1



    ## 2nd polyomino -> domino ##
    #                           #      
    #                           # 
    #           #               # 
    #           #               # 
    #                           # 
    #############################

    p1 = pminos[1]
    model.Add(p1[1][0] == p1[0][0])
    model.Add(p1[1][1] == p1[0][1] + 1)



    ## 3rd polyomino -> pentomino ##
    #                              #      
    #            ##                # 
    #            ##                # 
    #            #                 # 
    #                              #
    ################################

    p2 = pminos[2]
    model.Add(p2[1][0] == p2[0][0] + 1)
    model.Add(p2[2][0] == p2[0][0])
    model.Add(p2[3][0] == p2[0][0] + 1)
    model.Add(p2[4][0] == p2[0][0])

    model.Add(p2[1][1] == p2[0][1])
    model.Add(p2[2][1] == p2[0][1] + 1)
    model.Add(p2[3][1] == p2[0][1] + 1)
    model.Add(p2[4][1] == p2[0][1] + 2)



    ## 4th polyomino -> domino ##
    #                           #      
    #                           # 
    #           #               #   
    #           #               # 
    #                           # 
    #############################

    p3 = pminos[3]
    model.Add(p3[1][0] == p3[0][0])
    model.Add(p3[1][1] == p3[0][1] + 1)



    ## 5th polyomino -> monomino ##
    #                             #      
    #                             # 
    #           #                 # 
    #                             # 
    #                             # 
    ###############################
    #No constraints because 1 cell only



    #No blocks can overlap:
    block_addresses = []
    n = 0
    for p in pminos:
        for c in p:
            n += 1
            block_address = model.NewIntVarFromDomain(cp_model.Domain.FromIntervals(ranges),'%i' % n)
                model.Add(c[0] + c[1] * W == block_address)
                block_addresses.append(block_address)

    model.AddAllDifferent(block_addresses)



    #Solve and print solutions as we find them
    solver = cp_model.CpSolver()

    solution_printer = SolutionPrinter(pminos)
    status = solver.SearchForAllSolutions(model, solution_printer)

    print('Status = %s' % solver.StatusName(status))
    print('Number of solutions found: %i' % solution_printer.count)




class SolutionPrinter(cp_model.CpSolverSolutionCallback):
    ''' Print a solution. '''

    def __init__(self, variables):
        cp_model.CpSolverSolutionCallback.__init__(self)
        self.variables = variables
        self.count = 0

    def on_solution_callback(self):
        self.count += 1


        plt.figure(figsize = (2, 2))
        plt.grid(True)
        plt.axis([0,W,H,0])
        plt.yticks(np.arange(0, H, 1.0))
        plt.xticks(np.arange(0, W, 1.0))


        for i, p in enumerate(self.variables):
            for c in p:
                x = self.Value(c[0])
                y = self.Value(c[1])
                rect = plt.Rectangle((x, y), 1, 1, fc = cdict[i])
                plt.gca().add_patch(rect)

        for i in inactiveCells:
            x = i%W
            y = i//W
            rect = plt.Rectangle((x, y), 1, 1, fc = 'None', hatch = '///')
            plt.gca().add_patch(rect)

在此处输入图片说明

问题是我已经硬编码了5个唯一/固定的多米诺骨牌,而且我不知道如何定义约束,因此考虑到了每个多米诺骨牌的每种可能模式(假设可以)。


我是第一次听说Google或工具。是否可以使用标准的Python库,例如itertoolsnumpynetworkx
mathfux

我更喜欢使用卫星求解器或工具。
solub

@solub可以很容易地使用MiniZinc语言对这种问题进行建模/解决,因为在曲面上放置不规则物体存在高层约束。如果您在Coursera上完成免费课程“离散优化的高级建模”,则实际上将被教导如何做到这一点,并提供一些实用(且更复杂)的示例。Or-Tools具有MiniZinc语言的界面,因此您仍然可以利用其功能来找到快速的解决方案。
Patrick Trentin

1
似乎很有趣,感谢您的指导。不确定它是否会回答我遇到的特定问题(定义涉及自由多氨基酸的约束,而不是静态约束),但我一定会看一下。
solub

1
我必须道歉,我已经完全忘记了这个问题。标签中有一个相关问题minizinc带有详细答案,涵盖了我之前有关使用的建议minizinc
Patrick Trentin

Answers:


10

编辑:我错过了原始答案中的“免费”一词,并使用OR-工具针对固定的多米诺骨牌给出了答案。添加了一个回答部分,以包括免费的多氨基酸解决方案-AFAICT证明在使用OR-Tools进行约束编程时很难精确表达该解决方案。

带有工具的固定多面体:

是的,您可以使用OR-Tools中的约束编程来做到这一点。OR-Tools对2D网格几何形状一无所知,因此您必须根据位置约束对每个形状的几何形状进行编码。也就是说,形状是块/单元的集合,这些块/单元必须彼此具有一定的关系,必须在网格的范围内并且不能重叠。一旦有了约束模型,您就可以要求CP-SAT解算器为所有可能的解决方案求解。

这是一个非常简单的概念证明,在4x4网格上具有两个矩形形状(您可能还希望添加某种解释器代码,以便从形状描述转到一组OR-Tools变量和更大范围的约束中的约束)因为手动输入约束有点繁琐)。

from ortools.sat.python import cp_model

(W, H) = (3, 3) # Width and height of our grid.
(X, Y) = (0, 1) # Convenience constants.


def main():
  model = cp_model.CpModel()
  # Create an Int var for each block of each shape constrained to be within width and height of grid.
  shapes = [
    [
      [ model.NewIntVar(0, W, 's1b1_x'), model.NewIntVar(0, H, 's1b1_y') ],
      [ model.NewIntVar(0, W, 's1b2_x'), model.NewIntVar(0, H, 's1b2_y') ],
      [ model.NewIntVar(0, W, 's1b3_x'), model.NewIntVar(0, H, 's1b3_y') ],
    ],
    [
      [ model.NewIntVar(0, W, 's2b1_x'), model.NewIntVar(0, H, 's2b1_y') ],
      [ model.NewIntVar(0, W, 's2b2_x'), model.NewIntVar(0, H, 's2b2_y') ],
    ]
  ]

  # Define the shapes by constraining the blocks relative to each other.
  # 3x1 rectangle:
  s0 = shapes[0]
  model.Add(s0[0][Y] == s0[1][Y])
  model.Add(s0[0][Y] == s0[2][Y])
  model.Add(s0[0][X] == s0[1][X] - 1)
  model.Add(s0[0][X] == s0[2][X] - 2)
  # 1x2 rectangle:
  s1 = shapes[1]
  model.Add(s1[0][X] == s1[1][X])
  model.Add(s1[0][Y] == s1[1][Y] - 1)

  # No blocks can overlap:
  block_addresses = []
  for i, block in enumerate(blocks(shapes)):
    block_address = model.NewIntVar(0, (W+1)*(H+1), 'b%d' % (i,))
    model.Add(block[X] + (H+1)*block[Y] == block_address)
    block_addresses.append(block_address)
  model.AddAllDifferent(block_addresses)

  # Solve and print solutions as we find them
  solver = cp_model.CpSolver()
  solution_printer = SolutionPrinter(shapes)
  status = solver.SearchForAllSolutions(model, solution_printer)
  print('Status = %s' % solver.StatusName(status))
  print('Number of solutions found: %i' % solution_printer.count)


def blocks(shapes):
  ''' Helper to enumerate all blocks. '''
  for shape in shapes:
    for block in shape:
      yield block


class SolutionPrinter(cp_model.CpSolverSolutionCallback):
    ''' Print a solution. '''

    def __init__(self, variables):
        cp_model.CpSolverSolutionCallback.__init__(self)
        self.variables = variables
        self.count = 0

    def on_solution_callback(self):
      self.count += 1
      solution = [(self.Value(block[X]), self.Value(block[Y])) for shape in self.variables for block in shape]
      print((W+3)*'-')
      for y in range(0, H+1):
        print('|' + ''.join(['#' if (x,y) in solution else ' ' for x in range(0, W+1)]) + '|')
      print((W+3)*'-')


if __name__ == '__main__':
  main()

给出:

...
------
|    |
| ###|
|  # |
|  # |
------
------
|    |
| ###|
|   #|
|   #|
------
Status = OPTIMAL
Number of solutions found: 60

免费的多态性:

如果我们将单元格网格视为图形,则可以将问题重新解释为找到网格单元格的k分区,其中每个分区都有特定的大小,此外每个分区都是一个连接的组件。即,AFAICT连接的组件和多米诺骨牌之间没有区别,其余的答案就是这个假设。

在OR-Tools约束编程中很难找到所有可能的“每个分区都有特定大小的网格单元的k分区”。但是连接性部分是困难的 AFAICT(我尝试了很长时间失败了...)。我认为OR-Tools约束编程不是正确的方法。我注意到网络优化库的OR-Tools C ++参考在连接的组件上有一些内容,可能值得一看,但我并不熟悉。另一方面,Python中的幼稚递归搜索解决方案是可行的。

这是一个“手工”的幼稚解决方案。这很慢,但是可以承受4x4的情况。地址用于标识网格中的每个单元。(还请注意,Wiki页面在某种程度上暗示了这种算法作为一种幼稚的解决方案,并且它似乎为解决类似的多米诺骨牌问题建议了一些更有效的方法)。

import numpy as np
from copy import copy
from tabulate import tabulate

D = 4 # Dimension of square grid.
KCC = [5,4,2,2] # List of the sizes of the required k connected components (KCCs).
assert(sum(KCC) <= D*D)
VALID_CELLS = range(2,D*D)

def search():
  solutions = set() # Stash of unique solutions.
  for start in VALID_CELLS: # Try starting search from each possible starting point and expand out.
    marked = np.zeros(D*D).tolist()
    _search(start, marked, set(), solutions, 0, 0)
  for solution in solutions:  # Print results.
    print(tabulate(np.array(solution).reshape(D, D)))
  print('Number of solutions found:', len(solutions))

def _search(i, marked, fringe, solutions, curr_count, curr_part):
  ''' Recursively find each possible KCC in the remaining available cells the find the next, until none left '''
  marked[i] = curr_part+1
  curr_count += 1
  if curr_count == KCC[curr_part]: # If marked K cells for the current CC move onto the next one.
    curr_part += 1
    if curr_part == len(KCC): # If marked K cells and there's no more CCs left we have a solution - not necessarily unique.
      solutions.add(tuple(marked))
    else:
      for start in VALID_CELLS:
        if marked[start] == 0:
          _search(start, copy(marked), set(), solutions, 0, curr_part)
  else:
    fringe.update(neighbours(i, D))
    while(len(fringe)):
      j = fringe.pop()
      if marked[j] == 0:
        _search(j, copy(marked), copy(fringe), solutions, curr_count, curr_part)

def neighbours(i, D):
  ''' Find the address of all cells neighbouring the i-th cell in a DxD grid. '''
  row = int(i/D)
  n = []
  n += [i-1] if int((i-1)/D) == row and (i-1) >= 0 else []
  n += [i+1] if int((i+1)/D) == row and (i+1) < D**2 else []
  n += [i-D] if (i-D) >=0 else []
  n += [i+D] if (i+D) < D**2 else []
  return filter(lambda x: x in VALID_CELLS, n)

if __name__ == '__main__':
  search()

给出:

...
-  -  -  -
0  0  1  1
2  2  1  1
4  2  3  1
4  2  3  0
-  -  -  -
-  -  -  -
0  0  4  3
1  1  4  3
1  2  2  2
1  1  0  2
-  -  -  -
Number of solutions found: 3884

这非常有帮助,非常感谢。有问题的一件事是您的示例仅适用于固定形状的多米诺骨牌,问题是关于自由多米诺骨牌(固定数目的细胞,但形状不同,为清晰起见,将对此问题进行编辑)。按照您的示例,我们将不得不为每个不可行的大小为S的多米诺骨牌硬编码每种可能的形状(+旋转+反射)。问题仍然存在,是否可以使用OR工具实现此类约束?
solub

哦,错过了“免费”部分。嗯,问题可以这样解决:“找到25个omino的5个分区,其中25个omino被约束到WxH网格,并且对于X =(7,6,6,每个5个分区也是X-omino ,4,2)..”。我想可以在OR-Tools中进行操作,但是闻起来好像只是直接执行CSP回溯深度优先搜索就更容易了:找到可能的25个omino。对于每个可能的25个omino,通过选择一个在25个domino中构建X-omino的X来进行CSP搜索回溯,直到找到完整的解决方案或必须回溯为止。
spinkus

添加了一些内容,例如出于完整性的考虑,我在之前的评论中提到了基于天真的直接搜索的解决方案。
spinkus

5

在OR-Tools中约束简单连接区域的一种相对直接的方法是将其边界约束为电路。如果您所有的多米诺骨牌的大小都小于8,我们就不必担心非简单连接的多米诺骨牌。

此代码找到所有3884解决方案:

from ortools.sat.python import cp_model

cells = {(x, y) for x in range(4) for y in range(4) if x > 1 or y > 0}
sizes = [4, 2, 5, 2, 1]
num_polyominos = len(sizes)
model = cp_model.CpModel()

# Each cell is a member of one polyomino
member = {
    (cell, p): model.NewBoolVar(f"member{cell, p}")
    for cell in cells
    for p in range(num_polyominos)
}
for cell in cells:
    model.Add(sum(member[cell, p] for p in range(num_polyominos)) == 1)

# Each polyomino contains the given number of cells
for p, size in enumerate(sizes):
    model.Add(sum(member[cell, p] for cell in cells) == size)

# Find the border of each polyomino
vertices = {
    v: i
    for i, v in enumerate(
        {(x + i, y + j) for x, y in cells for i in [0, 1] for j in [0, 1]}
    )
}
edges = [
    edge
    for x, y in cells
    for edge in [
        ((x, y), (x + 1, y)),
        ((x + 1, y), (x + 1, y + 1)),
        ((x + 1, y + 1), (x, y + 1)),
        ((x, y + 1), (x, y)),
    ]
]
border = {
    (edge, p): model.NewBoolVar(f"border{edge, p}")
    for edge in edges
    for p in range(num_polyominos)
}
for (((x0, y0), (x1, y1)), p), border_var in border.items():
    left_cell = ((x0 + x1 + y0 - y1) // 2, (y0 + y1 - x0 + x1) // 2)
    right_cell = ((x0 + x1 - y0 + y1) // 2, (y0 + y1 + x0 - x1) // 2)
    left_var = member[left_cell, p]
    model.AddBoolOr([border_var.Not(), left_var])
    if (right_cell, p) in member:
        right_var = member[right_cell, p]
        model.AddBoolOr([border_var.Not(), right_var.Not()])
        model.AddBoolOr([border_var, left_var.Not(), right_var])
    else:
        model.AddBoolOr([border_var, left_var.Not()])

# Each border is a circuit
for p in range(num_polyominos):
    model.AddCircuit(
        [(vertices[v0], vertices[v1], border[(v0, v1), p]) for v0, v1 in edges]
        + [(i, i, model.NewBoolVar(f"vertex_loop{v, p}")) for v, i in vertices.items()]
    )

# Print all solutions
x_range = range(min(x for x, y in cells), max(x for x, y in cells) + 1)
y_range = range(min(y for x, y in cells), max(y for x, y in cells) + 1)
solutions = 0


class SolutionPrinter(cp_model.CpSolverSolutionCallback):
    def OnSolutionCallback(self):
        global solutions
        solutions += 1
        for y in y_range:
            print(
                *(
                    next(
                        p
                        for p in range(num_polyominos)
                        if self.Value(member[(x, y), p])
                    )
                    if (x, y) in cells
                    else "-"
                    for x in x_range
                )
            )
        print()


solver = cp_model.CpSolver()
solver.SearchForAllSolutions(model, SolutionPrinter())
print("Number of solutions found:", solutions)

4

对于每个polyonomino和每个可能的左上角单元格,都有一个布尔变量,指示该单元格是否是封闭矩形的左上角部分。

对于每个单元格和每个多米诺骨牌,您都有一个布尔变量,指示该单元格是否被该多米诺骨牌占据。

现在,对于每个单元格和每个多米诺骨牌,您都有一系列含义:选择左上角的单元格意味着每个单元格实际上都被该多米诺骨牌占据。

然后是约束:对于每个单元,每个多胺最多容纳一个多胺,它的左上部分就是一个单元。

这是一个纯粹的布尔问题。


非常感谢你的回复 !老实说,我不知道如何使用or-tools来实现这一点,您是否建议(从提供的可用python示例中)提供任何示例来帮助我入门?
solub

非常抱歉,因为我不太了解您的答案。不确定“封闭的矩形”是指什么,或者“对于每个单元格和每个多义胺”将如何在代码中翻译(嵌套“ for”循环?)。无论如何,您介意告诉我您的解释是否解决了游离多氨基酸的问题(为清楚起见,对问题进行了编辑)。
solub
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.