由于长度被列为标准,因此这是高尔夫球版的1681个字符(可能仍可以提高10%):
import java.io.*;import java.util.*;public class W{public static void main(String[]
a)throws Exception{int n=a.length<1?5:a[0].length(),p,q;String f,t,l;S w=new S();Scanner
s=new Scanner(new
File("sowpods"));while(s.hasNext()){f=s.next();if(f.length()==n)w.add(f);}if(a.length<1){String[]x=w.toArray(new
String[0]);Random
r=new Random();q=x.length;p=r.nextInt(q);q=r.nextInt(q-1);f=x[p];t=x[p>q?q:q+1];}else{f=a[0];t=a[1];}H<S>
A=new H(),B=new H(),C=new H();for(String W:w){A.put(W,new
S());for(p=0;p<n;p++){char[]c=W.toCharArray();c[p]='.';l=new
String(c);A.get(W).add(l);S z=B.get(l);if(z==null)B.put(l,z=new
S());z.add(W);}}for(String W:A.keySet()){C.put(W,w=new S());for(String
L:A.get(W))for(String b:B.get(L))if(b!=W)w.add(b);}N m,o,ñ;H<N> N=new H();N.put(f,m=new
N(f,t));N.put(t,o=new N(t,t));m.k=0;N[]H=new
N[3];H[0]=m;p=H[0].h;while(0<1){if(H[0]==null){if(H[1]==H[2])break;H[0]=H[1];H[1]=H[2];H[2]=null;p++;continue;}if(p>=o.k-1)break;m=H[0];H[0]=m.x();if(H[0]==m)H[0]=null;for(String
v:C.get(m.s)){ñ=N.get(v);if(ñ==null)N.put(v,ñ=new N(v,t));if(m.k+1<ñ.k){if(ñ.k<ñ.I){q=ñ.k+ñ.h-p;N
Ñ=ñ.x();if(H[q]==ñ)H[q]=Ñ==ñ?null:Ñ;}ñ.b=m;ñ.k=m.k+1;q=ñ.k+ñ.h-p;if(H[q]==null)H[q]=ñ;else{ñ.n=H[q];ñ.p=ñ.n.p;ñ.n.p=ñ.p.n=ñ;}}}}if(o.b==null)System.out.println(f+"\n"+t+"\nOY");else{String[]P=new
String[o.k+2];P[o.k+1]=o.k-1+"";m=o;for(q=m.k;q>=0;q--){P[q]=m.s;m=m.b;}for(String
W:P)System.out.println(W);}}}class N{String s;int k,h,I=(1<<30)-1;N b,p,n;N(String S,String
d){s=S;for(k=0;k<d.length();k++)if(d.charAt(k)!=S.charAt(k))h++;k=I;p=n=this;}N
x(){N r=n;n.p=p;p.n=n;n=p=this;return r;}}class S extends HashSet<String>{}class H<V>extends
HashMap<String,V>{}
不使用版本的版本是:使用软件包名称和方法,并且不给出警告或扩展类以使其别名。
package com.akshor.pjt33;
import java.io.*;
import java.util.*;
// WordLadder partially golfed and with reduced dependencies
//
// Variables used in complexity analysis:
// n is the word length
// V is the number of words (vertex count of the graph)
// E is the number of edges
// hash is the cost of a hash insert / lookup - I will assume it's constant, but without completely brushing it under the carpet
public class WordLadder2
{
private Map<String, Set<String>> wordsToWords = new HashMap<String, Set<String>>();
// Initialisation cost: O(V * n * (n + hash) + E * hash)
private WordLadder2(Set<String> words)
{
Map<String, Set<String>> wordsToLinks = new HashMap<String, Set<String>>();
Map<String, Set<String>> linksToWords = new HashMap<String, Set<String>>();
// Cost: O(Vn * (n + hash))
for (String word : words)
{
// Cost: O(n*(n + hash))
for (int i = 0; i < word.length(); i++)
{
// Cost: O(n + hash)
char[] ch = word.toCharArray();
ch[i] = '.';
String link = new String(ch).intern();
add(wordsToLinks, word, link);
add(linksToWords, link, word);
}
}
// Cost: O(V * n * hash + E * hash)
for (Map.Entry<String, Set<String>> from : wordsToLinks.entrySet()) {
String src = from.getKey();
wordsToWords.put(src, new HashSet<String>());
for (String link : from.getValue()) {
Set<String> to = linksToWords.get(link);
for (String snk : to) {
// Note: equality test is safe here. Cost is O(hash)
if (snk != src) add(wordsToWords, src, snk);
}
}
}
}
public static void main(String[] args) throws IOException
{
// Cost: O(filelength + num_words * hash)
Map<Integer, Set<String>> wordsByLength = new HashMap<Integer, Set<String>>();
BufferedReader br = new BufferedReader(new FileReader("sowpods"), 8192);
String line;
while ((line = br.readLine()) != null) add(wordsByLength, line.length(), line);
if (args.length == 2) {
String from = args[0].toUpperCase();
String to = args[1].toUpperCase();
new WordLadder2(wordsByLength.get(from.length())).findPath(from, to);
}
else {
// 5-letter words are the most interesting.
String[] _5 = wordsByLength.get(5).toArray(new String[0]);
Random rnd = new Random();
int f = rnd.nextInt(_5.length), g = rnd.nextInt(_5.length - 1);
if (g >= f) g++;
new WordLadder2(wordsByLength.get(5)).findPath(_5[f], _5[g]);
}
}
// O(E * hash)
private void findPath(String start, String dest) {
Node startNode = new Node(start, dest);
startNode.cost = 0; startNode.backpointer = startNode;
Node endNode = new Node(dest, dest);
// Node lookup
Map<String, Node> nodes = new HashMap<String, Node>();
nodes.put(start, startNode);
nodes.put(dest, endNode);
// Heap
Node[] heap = new Node[3];
heap[0] = startNode;
int base = heap[0].heuristic;
// O(E * hash)
while (true) {
if (heap[0] == null) {
if (heap[1] == heap[2]) break;
heap[0] = heap[1]; heap[1] = heap[2]; heap[2] = null; base++;
continue;
}
// If the lowest cost isn't at least 1 less than the current cost for the destination,
// it can't improve the best path to the destination.
if (base >= endNode.cost - 1) break;
// Get the cheapest node from the heap.
Node v0 = heap[0];
heap[0] = v0.remove();
if (heap[0] == v0) heap[0] = null;
// Relax the edges from v0.
int g_v0 = v0.cost;
// O(hash * #neighbours)
for (String v1Str : wordsToWords.get(v0.key))
{
Node v1 = nodes.get(v1Str);
if (v1 == null) {
v1 = new Node(v1Str, dest);
nodes.put(v1Str, v1);
}
// If it's an improvement, use it.
if (g_v0 + 1 < v1.cost)
{
// Update the heap.
if (v1.cost < Node.INFINITY)
{
int bucket = v1.cost + v1.heuristic - base;
Node t = v1.remove();
if (heap[bucket] == v1) heap[bucket] = t == v1 ? null : t;
}
// Next update the backpointer and the costs map.
v1.backpointer = v0;
v1.cost = g_v0 + 1;
int bucket = v1.cost + v1.heuristic - base;
if (heap[bucket] == null) {
heap[bucket] = v1;
}
else {
v1.next = heap[bucket];
v1.prev = v1.next.prev;
v1.next.prev = v1.prev.next = v1;
}
}
}
}
if (endNode.backpointer == null) {
System.out.println(start);
System.out.println(dest);
System.out.println("OY");
}
else {
String[] path = new String[endNode.cost + 1];
Node t = endNode;
for (int i = t.cost; i >= 0; i--) {
path[i] = t.key;
t = t.backpointer;
}
for (String str : path) System.out.println(str);
System.out.println(path.length - 2);
}
}
private static <K, V> void add(Map<K, Set<V>> map, K key, V value) {
Set<V> vals = map.get(key);
if (vals == null) map.put(key, vals = new HashSet<V>());
vals.add(value);
}
private static class Node
{
public static int INFINITY = Integer.MAX_VALUE >> 1;
public String key;
public int cost;
public int heuristic;
public Node backpointer;
public Node prev = this;
public Node next = this;
public Node(String key, String dest) {
this.key = key;
cost = INFINITY;
for (int i = 0; i < dest.length(); i++) if (dest.charAt(i) != key.charAt(i)) heuristic++;
}
public Node remove() {
Node rv = next;
next.prev = prev;
prev.next = next;
next = prev = this;
return rv;
}
}
}
如您所见,运行成本分析为O(filelength + num_words * hash + V * n * (n + hash) + E * hash)
。如果您接受我的假设,即哈希表插入/查找是恒定时间,那就是O(filelength + V n^2 + E)
。SOWPODS中图表的特殊统计数据意味着大多数情况下O(V n^2)
实际上都是占主导地位O(E)
的n
。
样本输出:
IDOLA,IDOLS,IDYLS,ODYLS,ODALS,OVALS,OVELS,OVENS,EVENS,ETENS,STENS,SKENS,SKINS,SPINS,SPINE,13
WICCA,PROSY,OY
布林妮,BRINS,TRINS,TAINS,TARNS,YARNS,YAWNS,YAWPS,YAPPS,7
GALES,GASE,GASTS,GEST,GESTE,GESSE,DESSE,5
SURES,DURES,沙丘,种类,丁类,橡皮泥,4
轻,轻,高低,大,大,大,小鸟,女孩,女孩,手枪,关西,关纳,鲁阿纳,10
SARGE,SERGE,SERRE,SERRS,SEERS,DEERS,DYERS,OYERS,OVERS,OVELS,OVALS,ODALS,ODYLS,IDYLS,12
串,串,串,啤酒,啤酒,布雷,布雷,奶油,绉布,7
这是最长路径最短的6对中的一对:
最高,最便宜,最晚,最晚,最晚,最晚,最晚,最晚,最早,最慢,最晚,最晚,最晚,最晚,最晚,最晚,最晚,最晚,最晚,最晚,最慢,最慢袋,小袋,小袋,小袋鼠,慕斯,慕斯,慕斯,布袋,加号,小圆饼,小袋,压片,小袋,尿素,尿布,未装袋,未装箱,未装箱,未装箱,未装箱,未装满,未入网,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已结,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,已贴,封号,封号,封号,标记,结尾,索引,入侵,入侵,入侵,感染,感染,感染,56
和最坏情况的可溶8字母对之一:
涂胶,涂胶,展开,展开,脱帽,松开,缠结,卷入,卷入,涂装,涂胶,松开,放样,打球,撒布,喷涂,散布,撒布,涂抹,涂抹,起重,起重,起重压接,压接,剪接,接缝,接缝,接缝,爬接器,夹板,夹板,夹板,夹板,便鞋,便鞋,钩针,粘手,平滑鞋,安抚奶嘴,南方人,粘嘴,粘鞋,粘鞋,教练,偷猎者,荷包,荷包,荷包午餐,林奇斯,林奇斯,林奇斯,52岁
现在,我认为我已经解决了问题的所有要求,即我的讨论。
对于CompSci,问题显然减少到图G中的最短路径,图G的顶点是单词,并且其边缘连接单词不同的单词。有效地生成图形并非易事-实际上,我需要重新考虑一下以降低O(V n hash + E)的复杂度。我这样做的方式涉及创建一个图,该图插入额外的顶点(对应于带有一个通配符的单词),并且与所讨论的图同胚。我确实考虑过使用该图,而不是减少到G-我认为应该从打高尔夫球的角度出发-基于具有3个以上边的通配符节点会减少图中的边数,并且最短路径算法的标准最坏情况运行时间为O(V heap-op + E)
。
但是,我要做的第一件事是对不同单词长度的图形G进行一些分析,我发现它们对于5个或更多字母的单词非常稀疏。5个字母的图具有12478个顶点和40759个边;添加链接节点会使图形更糟。到您使用8个字母时,边的数量少于节点的数量,并且3/7个单词是“独白”。因此,我拒绝了这种优化想法,因为它没有太大帮助。
确实有用的想法是检查堆。老实说,过去我已经实现了一些适度的奇异堆,但没有一个像这样奇特。我使用A-star(因为给定我正在使用的堆,C不能提供任何好处)具有明显不同于目标的字母数启发法,并且一点分析表明,在任何时候,优先级都不超过3个在堆里。当我弹出一个优先级为(成本+启发式)的节点并查看其邻居时,我在考虑三种情况:1)邻居的成本为cost + 1;邻居的启发式方法是heuristic-1(因为它更改的字母变为“正确”);2)cost + 1和heuristic + 0(因为它更改的字母从“错误”变为“仍然错误”;3)cost + 1和heuristic + 1(因为它更改的字母从“正确”变为“错误”)。因此,如果我放松邻居,我将以相同的优先级,优先级+1或优先级+2插入它。结果,我可以为堆使用链接列表的3元素数组。
我应该添加一个关于我的假设,即哈希查找是恒定的。您可能会说很好,但是哈希计算又如何呢?答案是我要摊销它们:java.lang.String
缓存其hashCode()
,因此计算散列所花费的总时间为O(V n^2)
(生成图形时)。
还有另一个影响复杂性的变化,但是是否是优化的问题取决于您对统计的假设。(IMO将“最佳Big O解决方案”作为标准是错误的,因为没有最佳复杂性,原因很简单:没有一个变量)。此更改会影响图形生成步骤。在上面的代码中,它是:
Map<String, Set<String>> wordsToLinks = new HashMap<String, Set<String>>();
Map<String, Set<String>> linksToWords = new HashMap<String, Set<String>>();
// Cost: O(Vn * (n + hash))
for (String word : words)
{
// Cost: O(n*(n + hash))
for (int i = 0; i < word.length(); i++)
{
// Cost: O(n + hash)
char[] ch = word.toCharArray();
ch[i] = '.';
String link = new String(ch).intern();
add(wordsToLinks, word, link);
add(linksToWords, link, word);
}
}
// Cost: O(V * n * hash + E * hash)
for (Map.Entry<String, Set<String>> from : wordsToLinks.entrySet()) {
String src = from.getKey();
wordsToWords.put(src, new HashSet<String>());
for (String link : from.getValue()) {
Set<String> to = linksToWords.get(link);
for (String snk : to) {
// Note: equality test is safe here. Cost is O(hash)
if (snk != src) add(wordsToWords, src, snk);
}
}
}
那是O(V * n * (n + hash) + E * hash)
。但是O(V * n^2)
部分来自为每个链接生成一个新的n字符字符串,然后计算其哈希码。可以通过帮助程序类避免这种情况:
private static class Link
{
private String str;
private int hash;
private int missingIdx;
public Link(String str, int hash, int missingIdx) {
this.str = str;
this.hash = hash;
this.missingIdx = missingIdx;
}
@Override
public int hashCode() { return hash; }
@Override
public boolean equals(Object obj) {
Link l = (Link)obj; // Unsafe, but I know the contexts where I'm using this class...
if (this == l) return true; // Essential
if (hash != l.hash || missingIdx != l.missingIdx) return false;
for (int i = 0; i < str.length(); i++) {
if (i != missingIdx && str.charAt(i) != l.str.charAt(i)) return false;
}
return true;
}
}
然后,图生成的前半部分变为
Map<String, Set<Link>> wordsToLinks = new HashMap<String, Set<Link>>();
Map<Link, Set<String>> linksToWords = new HashMap<Link, Set<String>>();
// Cost: O(V * n * hash)
for (String word : words)
{
// apidoc: The hash code for a String object is computed as
// s[0]*31^(n-1) + s[1]*31^(n-2) + ... + s[n-1]
// Cost: O(n * hash)
int hashCode = word.hashCode();
int pow = 1;
for (int j = word.length() - 1; j >= 0; j--) {
Link link = new Link(word, hashCode - word.charAt(j) * pow, j);
add(wordsToLinks, word, link);
add(linksToWords, link, word);
pow *= 31;
}
}
通过使用哈希码的结构,我们可以在中生成链接O(V * n)
。但是,这具有连锁效应。我的假设是哈希查找是恒定时间,这是假设比较对象是否相等便宜。但是,Link的相等性测试是O(n)
最坏的情况。最坏的情况是当我们从不同的单词生成的两个相等的链接之间发生哈希冲突时-即它发生O(E)
在图生成的后半部分。除此之外,除非在非相等链接之间发生哈希冲突的可能性很小,否则我们很好。所以我们用O(V * n^2)
换了O(E * n * hash)
。请参阅我先前关于统计的观点。
HOUSE
到的GORGE
操作的报告为2 似乎让我感到奇怪。我意识到有2个中间词,因此确实有道理,但是操作#会更直观。