合奏
这运行了相关模型的集合。各个模型考虑不同的历史记录量,并且可以选择始终选择将优化预期支付差异的移动,或者随机选择与预期支付差异成比例的移动。
然后,合奏的每个成员都对他们的首选动作进行投票。他们获得的票数等于他们比对手获胜的票数更多(这意味着糟糕的模型将获得否定票)。然后选择赢得选票的任何举动。
(他们可能应该按照自己的赞成票数对票进行分配,但是我现在不在乎这样做。)
它击败了到目前为止发布的所有内容,除了EvaluaterBot和PatternFinder。(一对一,它击败了EvaluaterBot并输给了PatternFinder)。
from collections import defaultdict
import random
class Number6:
class Choices:
def __init__(self, C = 0, N = 0, D = 0):
self.C = C
self.N = N
self.D = D
def __init__(self, strategy = "maxExpected", markov_order = 3):
self.MARKOV_ORDER = markov_order;
self.my_choices = ""
self.opponent = defaultdict(lambda: self.Choices())
self.choice = None # previous choice
self.payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
self.total_payoff = 0
# if random, will choose in proportion to payoff.
# otherwise, will always choose argmax
self.strategy = strategy
# maxExpected: maximize expected relative payoff
# random: like maxExpected, but it chooses in proportion to E[payoff]
# argmax: always choose the option that is optimal for expected opponent choice
def update_opponent_model(self, last):
for i in range(0, self.MARKOV_ORDER):
hist = self.my_choices[i:]
self.opponent[hist].C += ("C" == last)
self.opponent[hist].N += ("N" == last)
self.opponent[hist].D += ("D" == last)
def normalize(self, counts):
sum = float(counts.C + counts.N + counts.D)
if 0 == sum:
return self.Choices(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)
return self.Choices(
counts.C / sum, counts.N / sum, counts.D / sum)
def get_distribution(self):
for i in range(0, self.MARKOV_ORDER):
hist = self.my_choices[i:]
#print "check hist = " + hist
if hist in self.opponent:
return self.normalize(self.opponent[hist])
return self.Choices(1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0)
def choose(self, dist):
payoff = self.Choices()
# We're interested in *beating the opponent*, not
# maximizing our score, so we optimize the difference
payoff.C = (3-3) * dist.C + (4-1) * dist.N + (0-5) * dist.D
payoff.N = (1-4) * dist.C + (2-2) * dist.N + (3-2) * dist.D
payoff.D = (5-0) * dist.C + (2-3) * dist.N + (1-1) * dist.D
# D has slightly better payoff on uniform opponent,
# so we select it on ties
if self.strategy == "maxExpected":
if payoff.C > payoff.N:
return "C" if payoff.C > payoff.D else "D"
return "N" if payoff.N > payoff.D else "D"
elif self.strategy == "randomize":
payoff = self.normalize(payoff)
r = random.uniform(0.0, 1.0)
if (r < payoff.C): return "C"
return "N" if (r < payoff.N) else "D"
elif self.strategy == "argMax":
if dist.C > dist.N:
return "D" if dist.C > dist.D else "N"
return "C" if dist.N > dist.D else "N"
assert(0) #, "I am not a number! I am a free man!")
def update_history(self):
self.my_choices += self.choice
if len(self.my_choices) > self.MARKOV_ORDER:
assert(len(self.my_choices) == self.MARKOV_ORDER + 1)
self.my_choices = self.my_choices[1:]
def round(self, last):
if last: self.update_opponent_model(last)
dist = self.get_distribution()
self.choice = self.choose(dist)
self.update_history()
return self.choice
class Ensemble:
def __init__(self):
self.models = []
self.votes = []
self.prev_choice = []
for order in range(0, 6):
self.models.append(Number6("maxExpected", order))
self.models.append(Number6("randomize", order))
#self.models.append(Number6("argMax", order))
for i in range(0, len(self.models)):
self.votes.append(0)
self.prev_choice.append("D")
self.payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
def round(self, last):
if last:
for i in range(0, len(self.models)):
self.votes[i] += self.payoff[self.prev_choice[i]][last]
# vote. Sufficiently terrible models get negative votes
C = 0
N = 0
D = 0
for i in range(0, len(self.models)):
choice = self.models[i].round(last)
if "C" == choice: C += self.votes[i]
if "N" == choice: N += self.votes[i]
if "D" == choice: D += self.votes[i]
self.prev_choice[i] = choice
if C > D and C > N: return "C"
elif N > D: return "N"
else: return "D"
测试框架
万一其他人觉得它有用,这里是一个测试框架,用于查看各个对决。Python2。只需将所有您感兴趣的对手放入“ expanders.py”,然后将对“ Ensemble”的引用更改为您自己的即可。
import sys, inspect
import opponents
from ensemble import Ensemble
def count_payoff(label, them):
if None == them: return
me = choices[label]
payoff = {
"C": { "C": 3-3, "N": 4-1, "D": 0-5 },
"N": { "C": 1-4, "N": 2-2, "D": 3-2 },
"D": { "C": 5-0, "N": 2-3, "D": 1-1 },
}
if label not in total_payoff: total_payoff[label] = 0
total_payoff[label] += payoff[me][them]
def update_hist(label, choice):
choices[label] = choice
opponents = [ x[1] for x
in inspect.getmembers(sys.modules['opponents'], inspect.isclass)]
for k in opponents:
total_payoff = {}
for j in range(0, 100):
A = Ensemble()
B = k()
choices = {}
aChoice = None
bChoice = None
for i in range(0, 100):
count_payoff(A.__class__.__name__, bChoice)
a = A.round(bChoice)
update_hist(A.__class__.__name__, a)
count_payoff(B.__class__.__name__, aChoice)
b = B.round(aChoice)
update_hist(B.__class__.__name__, b)
aChoice = a
bChoice = b
print total_payoff