二叉树编码


12

假设您有一棵完整的二叉树(即每个内部节点恰好有两个非空后代)。每个节点包含一个非零整数。您将获得将树编码为整数列表或从整数列表中解码树的任务。

该树在内部存储如下:

struct node {
  int data;
  struct node *left, *right;
};

您必须实现两个功能:

int *encode(struct node *root);
struct node *decode(int *array);

由您决定如何编码和解码。

要点:

  • 最小编码长度
  • 复杂度(理想情况下,节点数量呈线性关系)
  • 独创性

源代码长度没有意义,您不限于C。

树示例:

     5
    / \
   3   2
      / \
     2   1
    / \
   9   9

1
输入和输出要求不会受到影响。
Yasir Arsanukaev'2

2
@Yasir:编码算法是您的工作,因此我无法提供任何输入和输出。int *是用户的黑匣子。
亚历山德鲁

整数范围是否有任何限制?更具体地说,如果我们使用具有任意大整数的语言,是否可以使用该语言?编码数据的大小是以整数或字节数衡量的吗?
sepp2k 2011年

编码和解码功能是否需要无副作用(除了内存分配)?还是例如将数据存储在全局变量中?
sepp2k 2011年

1
假设数据整数本身是实际的32位整数,则有一个仅使用32 * n位的简单编码。
Anon。

Answers:


2

〜1.03牛

到目前为止,所有答案似乎都需要至少2 * N * 32位来存储。(除了Haskell和Ruby解决方案之类的允许整数值大于32位的语言的解决方案之外,但是只要数据大于16K,这些解决方案仍将占用额外的字节来进行编码。)

这是一个仅占用N + ceiling(N / 32)+1 int存储的解决方案。对于较大的N,此值接近1.03125 N,对于大于20的所有N,此值均低于1.1N。

这个想法是为每个节点存储一个额外的位,其中1是“ hasChildren”。这些位预先打包成N / 32个字。

int* encodeHelper(Node* n, int* code, int* pos, int* flag)
{
   int hasKids = (n->left!=0);
   code[*flag/32]|=hasKids<<(*flag&31);
   *flag+=1;
   if (hasKids) bencodeHelper(n->left, code, pos, flag);
   code[*pos]=n->data;
   *pos+=1;
   if (hasKids) bencodeHelper(n->right, code, pos, flag);
   return code;
}

int* encode(Node* h, int* sizeOut)
{
   int nnodes=countNodes(h);
   int nflags = (int)ceil(nnodes/32.0);
   int pos=nflags+1;
   int flag=32;
   int* out;
   *sizeOut = 1+nnodes+nflags;
   out = calloc(*sizeOut, sizeof(int));
   if (!h) return out;
   out[0]=nflags+1; //store start of data
   return encodeHelper(h,out,&pos,&flag);
}

Node* decodeHelper(int* code, int* pos, int* flag)
{
   Node*n = calloc(1, sizeof(Node));
   int hasKids = code[*flag/32]>>(*flag&31)&1;
   *flag+=1;
   if (hasKids) n->left = bdecodeHelper(code, pos, flag);
   n->data = code[*pos];
   *pos+=1;
   if (hasKids) n->right = bdecodeHelper(code, pos, flag);
   return n;
}

Node* decode(int* code)
{
   int flag=32;
   int pos=code[0];
   if (!pos) return NULL;
   return decodeHelper(code, &pos, &flag);
}

(在此处完成实现)


5

该Haskell程序对n个Integer中的n个节点的树进行编码。诀窍是它对节点的数据加倍编码,然后使用低位指示它是叶节点还是内部节点。

从技术上讲,Parser这里的monad太过强大了,因为只创建了一个解析器,decoder我本可以将解析器链接逻辑直接放在那儿。但是这种方法使解码器非常清晰,Parser尽管它的体积很小,但它是一个合理的简单解析框架。

import Control.Monad (ap)

data Tree = Leaf Integer | Node Integer Tree Tree
  deriving (Eq, Show)

encode :: Tree -> [Integer]
encode (Leaf n)     = [n*2]
encode (Node n t u) = (n*2+1) : encode t ++ encode u

decode :: [Integer] -> Maybe Tree
decode = fullyParse decoder
  where
    decoder :: Parser Integer Tree
    decoder = do
      i <- next
      let n = i `div` 2
      if even i
        then return (Leaf n)
        else return (Node n) `ap` decoder `ap` decoder

-- A simple Parsing Monad
data Parser a b = P { runParser :: [a] -> Maybe (b, [a]) }

instance Monad (Parser a) where
  return a = P ( \ts -> Just (a, ts) )
  p >>= q  = P ( \ts -> runParser p ts >>= (\(v,ts') -> runParser (q v) ts') )
  fail _   = P ( const Nothing )

next :: Parser a a
next = P n
 where n (t:ts) = Just (t,ts)
       n _      = Nothing

fullyParse :: Parser a b -> [a] -> Maybe b
fullyParse p ts = runParser p ts >>= consumedResult
  where
    consumedResult (v,[]) = Just v
    consumedResult _      = Nothing

-- Example
main :: IO ()
main = do
    putStrLn $ "example:  " ++ show ex
    putStrLn $ "encoding: " ++ show encEx
    putStrLn $ "decoding: " ++ show decEx
    putStrLn $ "worked?   " ++ show worked
  where
    ex = Node 5
          (Leaf 3)
          (Node 2
            (Node 2
              (Leaf 9)
              (Leaf 9)
            )
            (Leaf 1)
          )
    encEx = encode ex
    decEx = decode encEx
    worked = maybe False (ex ==) decEx

运行此命令可以使您:

> runhaskell TreeEncoding.hs 
example:  Node 5 (Leaf 3) (Node 2 (Node 2 (Leaf 9) (Leaf 9)) (Leaf 1))
encoding: [11,6,5,5,18,18,2]
decoding: Just (Node 5 (Leaf 3) (Node 2 (Node 2 (Leaf 9) (Leaf 9)) (Leaf 1)))
worked?   True

4

在C中

#include <stdlib.h>
#include <stdio.h>

struct Node;
typedef struct Node Node;

struct Node
{
    int   data;
    Node* left;
    Node* right;
};
/* Private Functions */
static int*  encodeNode(Node* tree, int* store);
static Node* decodeNode(int** store);

/* Public Functions */
Node*   newNode(int data,Node* left,Node* right);
void    deleteTree(Node* tree);
int     countNodesTree(Node* tree);
int*    encode(Node *tree);
Node*   decode(int* store);
void    printTree(Node* tree);

Node* newNode(int data,Node* left,Node* right)
{
    Node* result    = (Node*)malloc(sizeof(Node));
    result->data    = data;
    result->left    = left;
    result->right   = right;

    return result;
}

void deleteTree(Node* tree)
{
    if (tree == NULL)
    {   return;
    }

    deleteTree(tree->left);
    deleteTree(tree->right);
    free(tree);
}

int countNodesTree(Node* tree)
{
    if (tree == NULL)
    {   return 0;
    }

    return    countNodesTree(tree->left)
            + countNodesTree(tree->right)
            + 1;
}

void printTree(Node* tree)
{
    if (tree == NULL)
    {
        fprintf(stdout, "- ");
    }
    else
    {
        fprintf(stdout, "%d ", tree->data);
        printTree(tree->left);
        printTree(tree->right);
    }
};

编码:

int* encode(Node *tree)
{
    int     nodeCount   = countNodesTree(tree);
    int*    result      = (int*)malloc(sizeof(int) * (nodeCount * 2 + 1));

    // Put the node count in the first element.
    // This makes it easy to also serialize this object for transport
    // i.e. you can put it in a file or a stream (socket) and easily recover it.
    result[0]           = nodeCount;
    encodeNode(tree, result + 1);
    return result;
}

int* encodeNode(Node* tree, int* store)
{
    if (tree != NULL)
    {
        store[0]    = tree->data;
        /*
         * Slight overkill. for this question.
         * But works and makes future enhancement easy
         */
        store[1]    = (tree->left  == NULL ? 0 : 1)
                    + (tree->right == NULL ? 0 : 2);
        store += 2;

        store       = encodeNode(tree->left,  store);
        store       = encodeNode(tree->right, store);
    }
    return store;
}

解码:

Node* decode(int* store)
{
    if (store == NULL)
    { fprintf(stderr, "Bad Input terminating: encode() always return non NULL\n");
      exit(1);
    }

    if (store[0] == 0)
    {
        return NULL;
    }

    store++;
    return decodeNode(&store);
}

Node* decodeNode(int** store)
{
    int     value   = (*store)[0];
    int     flag    = (*store)[1];
    (*store) += 2;

    Node*   left    = flag & 1 ? decodeNode(store) : NULL;
    Node*   right   = flag & 2 ? decodeNode(store) : NULL;

    return newNode(value, left, right);
}

主要:

int main()
{
    Node*   t = newNode(5,
                        newNode(3, NULL, NULL),
                        newNode(2,
                                newNode(2,
                                        newNode(9, NULL, NULL),
                                        newNode(9, NULL, NULL)
                                       ),
                                newNode(1, NULL, NULL)
                               )
                       );

    printTree(t);
    fprintf(stdout,"\n");

    int*    e   = encode(t);
    Node*   d   = decode(e);
    printTree(d);
    fprintf(stdout,"\n");

    free(e);
    deleteTree(d);
    deleteTree(t);
}

注意。每个节点被编码为两个整数(节点数加一个整数)。
因此,提供的树编码如下:

 7, 5, 3, 3, 0, 2, 3, 2, 3, 9, 0, 9, 0 1, 0
 ^  ^
 ^  ^ Node 1
 ^
 Count

3

在Ruby中,其编码与@MtnViewMark相同:

class Node
        def initialize(data, left = nil, right = nil)
                @data, @left, @right = data, left, right
        end

        def encode
                "%d %s %s" % [@data<<1|1, @left.encode, @right.encode]
        end

        class << self
                def decode(str)
                        _decode(str.split.map &:to_i)
                end

                private

                def _decode(a)
                        n = a.shift
                        if n & 1 == 1
                                Node.new(n>>1, _decode(a), _decode(a))
                        else
                                Leaf.new(n>>1)
                        end
                end
        end
end

class Leaf < Node
        def encode
                (@data<<1).to_s
        end
end

tree=Node.decode("11 6 5 5 18 18 2")
print tree.encode

每个节点的成本是一个整数(data << 1 | has_childs):

11 6 5 5 18 18 2

哇-看起来很苗条优雅。但是,它不需要int数组,对吗?
用户未知

2

给定一个带有n节点的二叉树,它会将其编码为2n + 1整数列表。编码和解码算法都具有O(n)复杂性。

编码时,我使用0整数作为前哨标记,指示何时展开递归。然后,当我解码时,我将要创建的树节点放在(某种)堆栈上,并使用列表中的0来跟踪添加下一个节点的位置。我没有尝试过,但是我很确定如果树不完整,解码会中断。

#include <math.h>
#include <stdio.h>
#include <stdlib.h>

// Prototypes
struct BTnode;
struct BTnode * bt_add_left(struct BTnode * node, int data);
struct BTnode * bt_add_right(struct BTnode * node, int data);
int             bt_depth(struct BTnode * tree);
int             bt_encode_preorder(int * list, struct BTnode * tree, int index);
struct BTnode * bt_node_create(int data);
int             bt_node_delete(struct BTnode * node);
void            bt_print_preorder(struct BTnode * tree);
int *           encode(struct BTnode * tree);
struct BTnode * decode(int * list);

// Binary tree node
struct BTnode
{
  int data;
  struct BTnode *left, *right;
};

// Add node to this node's left
struct BTnode * bt_add_left(struct BTnode * node, int data)
{
  struct BTnode * newnode = bt_node_create(data);
  node->left = newnode;
  return newnode;
}

// Add node to this node's right
struct BTnode * bt_add_right(struct BTnode * node, int data)
{
  struct BTnode * newnode = bt_node_create(data);
  node->right = newnode;
  return newnode;
}

// Determine depth of the tree
int bt_depth(struct BTnode * tree)
{
  int depth;
  int leftdepth = 0;
  int  rightdepth = 0;
  if( tree == NULL ) return 0;

  if( tree->left != NULL )
    leftdepth = bt_depth(tree->left);
  if( tree->right != NULL )
    rightdepth = bt_depth(tree->right);

  depth = leftdepth;
  if(rightdepth > leftdepth)
    depth = rightdepth;

  return depth + 1;
}

// Recursively add node values to integer list, using 0 as an unfolding sentinel
int bt_encode_preorder(int * list, struct BTnode * tree, int index)
{
  list[ index++ ] = tree->data;

  // This assumes the tree is complete (i.e., if the current node does not have
  // a left child, then it does not have a right child either)
  if( tree->left != NULL )
  {
    index = bt_encode_preorder(list, tree->left, index);
    index = bt_encode_preorder(list, tree->right, index);
  }

  // Add sentinel
  list[ index++ ] = 0;
  return index;
}

// Allocate memory for a node
struct BTnode * bt_node_create(int data)
{
  struct BTnode * newnode = (struct BTnode *) malloc(sizeof(struct BTnode));
  newnode->left = NULL;
  newnode->right = NULL;
  newnode->data = data;
  return newnode;
}

// Free node memory
int bt_node_delete(struct BTnode * node)
{
  int data;
  if(node == NULL)
    return 0;
  data = node->data;

  if(node->left != NULL)
    bt_node_delete(node->left);
  if(node->right != NULL)
    bt_node_delete(node->right);

  free(node);
  return data;
}

// Print all values from the tree in pre-order
void bt_print_preorder(struct BTnode * tree)
{
  printf("%d ", tree->data);
  if(tree->left != NULL)
    bt_print_preorder(tree->left);
  if(tree->right != NULL)
    bt_print_preorder(tree->right);
}

// Decode binary tree structure from a list of integers
struct BTnode * decode(int * list)
{
  struct BTnode * tree;
  struct BTnode * nodestack[ list[0] ];
  int i,j;

  // Handle trivial case
  if( list == NULL ) return NULL;

  tree = bt_node_create( list[1] );
  nodestack[ 1 ] = tree;

  j = 1;
  for(i = 2; i < list[0]; i++)
  {
    if( list[i] == 0 )
    {
      //printf("popping\n");
      j--;
    }
    else
    {
      if( nodestack[j]->left == NULL )
      {
        //printf("Adding %d to left of %d\n", list[i], nodestack[j]->data);
        nodestack[ j+1 ] = bt_add_left(nodestack[j], list[i]);
        j++;
      }
      else
      {
        //printf("Adding %d to right of %d\n", list[i], nodestack[j]->data);
        nodestack[ j+1 ] = bt_add_right(nodestack[j], list[i]);
        j++;
      }
    }
  }

  return tree;
}

// Encode binary tree structure as a list of integers
int * encode(struct BTnode * tree)
{
  int maxnodes, depth, length;
  int * list;
  int j;

  // Handle trivial case
  if(tree == NULL) return NULL;

  // Calculate maximum number of nodes in the tree from the tree depth
  maxnodes = 1;
  depth = bt_depth(tree);
  for(j = 0; j < depth; j++)
  {
    maxnodes += pow(2, j);
  }

  // Allocate memory for the list; we need two ints for each value plus the
  // first value in the list to indicate length
  list = (int *) malloc( ((maxnodes * 2)+1) * sizeof(int));
  length = bt_encode_preorder(list, tree, 1);
  list[ 0 ] = length;
  return list;
}

int main()
{
  struct BTnode * tree;
  struct BTnode * newtree;
  int * list;
  int i;

  /* Provided example

        5
       / \
      3   2
         / \
        2   1
       / \
      9   9
  */
  tree = bt_node_create(5);
  bt_add_left(tree, 3);
  struct BTnode * temp = bt_add_right(tree, 2);
  bt_add_right(temp, 1);
  temp = bt_add_left(temp, 2);
  bt_add_left(temp, 9);
  bt_add_right(temp, 9);
  printf("T (traversed in pre-order):  ");
  bt_print_preorder(tree);
  printf("\n");

  list = encode(tree);
  printf("T (encoded as integer list): ");
  for(i = 1; i < list[0]; i++)
    printf("%d ", list[i]);
  printf("\n");

  newtree = decode(list);
  printf("T' (decoded from int list):  ");
  bt_print_preorder(newtree);
  printf("\n\n");


  // Free memory
  bt_node_delete(tree);
  bt_node_delete(newtree);
  free(list);
  return 0;
}

将其保存为encode.c然后编译并执行。该程序使用您提供的示例树,并且我已经在其他一些树上成功进行了测试。

$ gcc -Wall -lm -o encode encode.c
$ ./encode 
T (traversed in pre-order):  5 3 2 2 9 9 1 
T (encoded as integer list): 5 3 0 2 2 9 0 9 0 0 1 0 0 0 
T' (decoded from int list):  5 3 2 2 9 9 1

这几乎是我的初衷:)。
亚历山德鲁

如果数据包含0,解码不会失败吗?
AShelly 2011年

@AShelly他明确表示树中将不包含0。如果是这样,那么它将失败。
丹尼尔·斯坦迪奇

2

我的代码以预遍历遍历对树进行编码,每个叶子以两个int(其数据后跟0),每个内部节点以一个int(其数据后跟它的左子节点,然后是右)进行编码。对于具有n个节点的完整二叉树(根据您的定义),n必须是奇数,并且有(n + 1)/ 2个叶子和(n-1)/ 2个内部节点,因此为3n / 2 + 1 / 2个整数用于编码。

警告:未经测试,只需输入即可。

struct node {
  int data;
  struct node *left, *right;
};

void encodeInternal(struct node *root, vector<int> *buf) {
  buf->push_back(root->data);
  if (root->left) {
    encodeInternal(root->left, buf);
    encodeInternal(root->right, buf);
  } else {
    buf->push_back(0);
  }
}
int *encode(struct node *root) {
  vector<int> buf;
  encodeInternal(root, &buf);
  return &buf[0];
}

struct decodeResult {
  int encoded_size;
  struct node *n;
}
struct decodeResult decodeInternal(int *array) {
  struct node *n = (struct node*)malloc(sizeof(struct node));
  n->data = array[0];
  if (array[1] == 0) {
    n->left = n->right = NULL;
    return (decodeResult){2, n};
  } else {
    decodeResult L = decodeInternal(array + 1);
    decodeResult R = decodeInternal(array + 1 + L.encoded_size);
    n->left = L.n;
    n->right = R.n;
    return (decodeResult){1 + L.encoded_size + R.encoded_size, n};
  }
}
struct node *decode(int *array) {
  return decodeInternal(array).n;
}

1

这是我的尝试。它将树存储在大小为2 ** depth + 1的数组中。它用于a[0]保留大小,并a[size]保留在深度优先遍历中遇到的第一个“空节点”的索引。(如果父节点有一个,则一个空节点将存储一个子节点)。每个空节点保存将要遇到的下一个空节点的索引。

这种方案避免了保留位以标记有义子项,因此每个节点都可以使用完整的整数范围。它还允许不平衡的树木-父母只能有一个孩子。

输出:

empty tree:  [0]
head node only:  [2,5,0]
example tree: [16,5,3,2,5,14,2,1,0,0, 0,0,9,9,15,0,4];

编码器:

//utility
 int findDepth(Node* n) {
    int l = 0 ,r = 0;
    if (n) {
       l = 1 + findDepth(n->left);
       r = 1 + findDepth(n->right);
    }
    return ( l > r ) ? l : r;
 }

//Encode Function
 int* encodeTree(Node* head) {
    int* out;
    int depth = findDepth(head);
    int size = depth>0;
    while (depth--) size*=2;
    out = calloc(size+1,sizeof(int));
    out[0]=size;
    encodeNode(head, out,1, out+size);
    return out;
 }

 void encodeNode(Node* n, int* a, int idx, int* pEmpty) {
    if (n) {
       a[idx]=n->data;
       encodeNode(n->left,a,idx*2,pEmpty);
       encodeNode(n->right,a,idx*2+1,pEmpty);
    }
    else if (idx<a[0]) {
       *pEmpty = idx;
       pEmpty = a+idx;
    }
 }

解码器:

 //Decode Function
 Node* decodeArray(int* a) {
    return (a[0]) ?  decodeNode(a,1,a+a[0]) : NULL;
 }

 Node* decodeNode(int* a, int idx, int* pEmpty) {
    Node* n = NULL;
    if (idx== *pEmpty)
       *pEmpty=a[idx];
    else {
       n = calloc(1,sizeof(Node));
       n->data = a[idx];
       if (idx*2<a[0]) {
          n->left = decodeNode(a, idx*2, pEmpty);
          n->right = decodeNode(a, idx*2+1, pEmpty);
       }
    }
    return n;
 }

(感谢@daniel sobral修复格式)


1

Scala:

trait Node {
  def encode (): Array[Int]
}

case object Node {
  def decode (a: Array[Int]): InnerNode = {
    if (a.length == 1) InnerNode (a(0)) else {
      val r = InnerNode (a(1)) 
      val l = decode (a.tail.tail) 
      InnerNode (a(0), l, r) 
    }
  }
}

case object Leaf extends Node {
  def encode (): Array[Int] = Array.empty
}

case class InnerNode (val data: Int, var l: Node=Leaf, var r: Node=Leaf) extends Node {
  def encode (): Array[Int] = Array (data) ++ r.encode () ++ l.encode () 
}

object BinTreeTest extends App {
  println (Node.decode (Array (1, 2, 3, 4, 5)).encode.mkString (", "))
}

这是一种使用过时语法的方法,但是在Scala 2.9.1中编译时没有错误。它生成一个Tree并将每个编码的Tree解码为与编码使用的相同Array。也许我今天摆脱了过时的警告。

哇-那很简单。第一个想法立即生效。

By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.