对于任何好奇的人,我自己使用PySAL的region.Maxp算法提出了一个解决方案。本质上,Max-p允许我生成一组符合我的第一个条件(每个区域的最小雇主数量)的区域,然后将其放入while循环中,这将拒绝Max-p的任何解决方案满足第二个标准(一个地区最大的雇主所贡献的就业百分比)。我已经将其实现为ArcGIS工具。
我决定取消之前完成的工作,以标记块/块组/区域,然后在块上运行Max-p(尽管我一直在对区域进行所有测试,因为输入多边形的数量有所增加对处理时间的巨大影响)。我的代码的相关部分如下。该generate_regions()
功能的输入所需要的“ shapefile” (作为包含shapefile完整路径的字符串传递)是已经将雇主要点要素在空间上连接到其中的雇主,其中雇主人数是单个雇主的最大雇员,并将雇员总数存储为每个输入功能的属性。
import arcpy, math, pysal, random
import numpy as np
# Suppression criteria:
MIN_EMP_CT = 3 # Minimum number of employers per polygon feature
MAX_EMP_FRAC = 0.8 # Maximum ratio of employees working for a single employer per polygon feature
def generate_regions(shapefile, min_emp_ct=MIN_EMP_CT, max_emp_frac=MAX_EMP_FRAC):
'''Use pysal's region.Maxp method to generate regions that meet suppression criteria.'''
w = pysal.rook_from_shapefile(shapefile, idVariable='GEOID10')
dbf = pysal.open(shapefile[:-4] + '.dbf')
ids = np.array((dbf.by_col['GEOID10']))
vars = np.array((dbf.by_col[employer_count_fieldname],dbf.by_col[max_employees_fieldname],dbf.by_col[total_employees_fieldname]))
employers = vars[0]
vars = vars.transpose()
vars_dict = {}
for i in range(len(ids)):
vars_dict[ids[i]] = [int(vars[i][0]),float(vars[i][1]),float(vars[i][2])]
random.seed(100) # Using non-random seeds ensures repeatability of results
np.random.seed(100) # Using non-random seeds ensures repeatability of results
bump_iter = int(arcpy.GetParameterAsText(3)) # Number of failed iterations after which to increment the minimum number of employers per region (otherwise we could be stuck in the loop literally forever).
iteration = 0
tests_failed = 1
while tests_failed:
floor = int(min_emp_ct + math.floor(iteration / bump_iter))
solution = pysal.region.Maxp(w,vars,floor,employers)
regions_failed = 0
for region in solution.regions:
SUM_emp10sum = 0
MAX_emp10max = 0
for geo in region:
emp10max = vars_dict[geo][1]
emp10sum = vars_dict[geo][2]
SUM_emp10sum += emp10sum
MAX_emp10max = max(MAX_emp10max, emp10max)
if SUM_emp10sum > 0:
ratio = MAX_emp10max / SUM_emp10sum
else:
ratio = 1
if ratio >= max_emp_frac:
regions_failed += 1
iteration += 1
if regions_failed == 0:
arcpy.AddMessage('Iteration ' + str(iteration) + ' (MIN_EMP_CT = ' + str(floor) +') - PASSED!')
tests_failed = 0
else:
arcpy.AddMessage('Iteration ' + str(iteration) + ' (MIN_EMP_CT = ' + str(floor) +') - failed...')
return solution
solution = generate_regions(spatially_joined_shapefile)
regions = solution.regions
### Write input-to-region conversion table to a CSV file.
csv = open(conversion_table,'w')
csv.write('"GEOID10","REGION_ID"\n')
for i in range(len(regions)):
for geo in regions[i]:
csv.write('"' + geo + '","' + str(i+1) + '"\n')
csv.close()