C ++中的base64解码代码段


Answers:


91

Google是你的朋友

这是该页面的实现:

/* 
   base64.cpp and base64.h

   Copyright (C) 2004-2008 René Nyffenegger

   This source code is provided 'as-is', without any express or implied
   warranty. In no event will the author be held liable for any damages
   arising from the use of this software.

   Permission is granted to anyone to use this software for any purpose,
   including commercial applications, and to alter it and redistribute it
   freely, subject to the following restrictions:

   1. The origin of this source code must not be misrepresented; you must not
      claim that you wrote the original source code. If you use this source code
      in a product, an acknowledgment in the product documentation would be
      appreciated but is not required.

   2. Altered source versions must be plainly marked as such, and must not be
      misrepresented as being the original source code.

   3. This notice may not be removed or altered from any source distribution.

   René Nyffenegger rene.nyffenegger@adp-gmbh.ch

*/

static const std::string base64_chars = 
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


static inline bool is_base64(unsigned char c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
}

std::string base64_encode(unsigned char const* bytes_to_encode, unsigned int in_len) {
  std::string ret;
  int i = 0;
  int j = 0;
  unsigned char char_array_3[3];
  unsigned char char_array_4[4];

  while (in_len--) {
    char_array_3[i++] = *(bytes_to_encode++);
    if (i == 3) {
      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
      char_array_4[3] = char_array_3[2] & 0x3f;

      for(i = 0; (i <4) ; i++)
        ret += base64_chars[char_array_4[i]];
      i = 0;
    }
  }

  if (i)
  {
    for(j = i; j < 3; j++)
      char_array_3[j] = '\0';

    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
    char_array_4[3] = char_array_3[2] & 0x3f;

    for (j = 0; (j < i + 1); j++)
      ret += base64_chars[char_array_4[j]];

    while((i++ < 3))
      ret += '=';

  }

  return ret;

}
std::string base64_decode(std::string const& encoded_string) {
  int in_len = encoded_string.size();
  int i = 0;
  int j = 0;
  int in_ = 0;
  unsigned char char_array_4[4], char_array_3[3];
  std::string ret;

  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
    char_array_4[i++] = encoded_string[in_]; in_++;
    if (i ==4) {
      for (i = 0; i <4; i++)
        char_array_4[i] = base64_chars.find(char_array_4[i]);

      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

      for (i = 0; (i < 3); i++)
        ret += char_array_3[i];
      i = 0;
    }
  }

  if (i) {
    for (j = i; j <4; j++)
      char_array_4[j] = 0;

    for (j = 0; j <4; j++)
      char_array_4[j] = base64_chars.find(char_array_4[j]);

    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

    for (j = 0; (j < i - 1); j++) ret += char_array_3[j];
  }

  return ret;
}

18
出于好奇,这是“ base64解码c ++”的首个
创举

1
是的,我以某种方式错过了这一点。我尝试了Google代码搜索,而不是普通的旧版Google。谢谢。
08年

12
现在这个问题有2500个视图,所以我想我不是唯一一个。:)
参加了2012年

2
我们应该避免所有这些不必要的字符串连接-因为我们知道in_len,我们知道的长度ret,为什么不在初始化时给它固定的长度?

3
Google并不总是您的朋友。此实现几乎是您可能会选择的最糟糕的实现。看到这个:stackoverflow.com/questions/342409/…–
DaedalusAlpha,

108

这是我最初由RenéNyffenegger编写的实现的修改。为什么要修改它?好吧,因为我认为我不应该使用存储在对象中的二进制数据;)std::string

base64.h

#ifndef _BASE64_H_
#define _BASE64_H_

#include <vector>
#include <string>
typedef unsigned char BYTE;

std::string base64_encode(BYTE const* buf, unsigned int bufLen);
std::vector<BYTE> base64_decode(std::string const&);

#endif

base64.cpp

#include "base64.h"
#include <iostream>

static const std::string base64_chars = 
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


static inline bool is_base64(BYTE c) {
  return (isalnum(c) || (c == '+') || (c == '/'));
}

std::string base64_encode(BYTE const* buf, unsigned int bufLen) {
  std::string ret;
  int i = 0;
  int j = 0;
  BYTE char_array_3[3];
  BYTE char_array_4[4];

  while (bufLen--) {
    char_array_3[i++] = *(buf++);
    if (i == 3) {
      char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
      char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
      char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
      char_array_4[3] = char_array_3[2] & 0x3f;

      for(i = 0; (i <4) ; i++)
        ret += base64_chars[char_array_4[i]];
      i = 0;
    }
  }

  if (i)
  {
    for(j = i; j < 3; j++)
      char_array_3[j] = '\0';

    char_array_4[0] = (char_array_3[0] & 0xfc) >> 2;
    char_array_4[1] = ((char_array_3[0] & 0x03) << 4) + ((char_array_3[1] & 0xf0) >> 4);
    char_array_4[2] = ((char_array_3[1] & 0x0f) << 2) + ((char_array_3[2] & 0xc0) >> 6);
    char_array_4[3] = char_array_3[2] & 0x3f;

    for (j = 0; (j < i + 1); j++)
      ret += base64_chars[char_array_4[j]];

    while((i++ < 3))
      ret += '=';
  }

  return ret;
}

std::vector<BYTE> base64_decode(std::string const& encoded_string) {
  int in_len = encoded_string.size();
  int i = 0;
  int j = 0;
  int in_ = 0;
  BYTE char_array_4[4], char_array_3[3];
  std::vector<BYTE> ret;

  while (in_len-- && ( encoded_string[in_] != '=') && is_base64(encoded_string[in_])) {
    char_array_4[i++] = encoded_string[in_]; in_++;
    if (i ==4) {
      for (i = 0; i <4; i++)
        char_array_4[i] = base64_chars.find(char_array_4[i]);

      char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
      char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
      char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

      for (i = 0; (i < 3); i++)
          ret.push_back(char_array_3[i]);
      i = 0;
    }
  }

  if (i) {
    for (j = i; j <4; j++)
      char_array_4[j] = 0;

    for (j = 0; j <4; j++)
      char_array_4[j] = base64_chars.find(char_array_4[j]);

    char_array_3[0] = (char_array_4[0] << 2) + ((char_array_4[1] & 0x30) >> 4);
    char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
    char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];

    for (j = 0; (j < i - 1); j++) ret.push_back(char_array_3[j]);
  }

  return ret;
}

这是用法:

std::vector<BYTE> myData;
...
std::string encodedData = base64_encode(&myData[0], myData.size());
std::vector<BYTE> decodedData = base64_decode(encodedData);

我希望有人会对这个答案有所帮助^^


2
谢谢。使用代码有任何限制吗?(我的个人情况将是学术性的CFD代码)。
Azrael3000

1
我试图使用您的方法通过将整个矢量写入具有其大小的CFile来解码jpg文件,但是图像头损坏了也就不足为奇了。大小相等。有什么更好的主意如何还原图像文件?
masche

3
@masche:这是关于在字节级别上对任何类型的数据进行编码和解码。图像->原始数据(字节)->编码为base64字符串,然后回溯为base64字符串->解码为原始数据(字节)->建立一些输入流或对象或在其之上的任何东西以将其用作图像再次...
LihO

1
抱歉-您的代码可以正常运行,甚至可以还原图像。我只是简单地将整个向量压缩到一个CFile中是愚蠢的,这是行不通的!如果我对vecteor进行迭代并将每个字节写入文件,它将起作用。也许文件流是一个更好的解决方案。
masche

4
在std :: string中存储二进制数据有什么问题?
GaspardP

46

这里有几个片段。但是,这是紧凑,高效且对c ++ 11友好的:

static std::string base64_encode(const std::string &in) {

    std::string out;

    int val=0, valb=-6;
    for (uchar c : in) {
        val = (val<<8) + c;
        valb += 8;
        while (valb>=0) {
            out.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[(val>>valb)&0x3F]);
            valb-=6;
        }
    }
    if (valb>-6) out.push_back("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[((val<<8)>>(valb+8))&0x3F]);
    while (out.size()%4) out.push_back('=');
    return out;
}

static std::string base64_decode(const std::string &in) {

    std::string out;

    std::vector<int> T(256,-1);
    for (int i=0; i<64; i++) T["ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"[i]] = i; 

    int val=0, valb=-8;
    for (uchar c : in) {
        if (T[c] == -1) break;
        val = (val<<6) + T[c];
        valb += 6;
        if (valb>=0) {
            out.push_back(char((val>>valb)&0xFF));
            valb-=8;
        }
    }
    return out;
}

1
我应该收回这一点...对不起,我不应该在调试模式下进行性能测试。至少比公认的解决方案要快。
Marco Freudenberger

5
位偏移“ int val”超出其范围是UB。“ unsigned val = 0; int valb = ...”是正确的。
凯文·尹

22

我认为这更好用:

#include <string>

static const char* B64chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

static const int B64index[256] =
{
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  62, 63, 62, 62, 63,
    52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 0,  0,  0,  0,  0,  0,
    0,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  10, 11, 12, 13, 14,
    15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0,  0,  0,  0,  63,
    0,  26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
    41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51
};

const std::string b64encode(const void* data, const size_t &len)
{
    std::string result((len + 2) / 3 * 4, '=');
    char *p = (char*) data, *str = &result[0];
    size_t j = 0, pad = len % 3;
    const size_t last = len - pad;

    for (size_t i = 0; i < last; i += 3)
    {
        int n = int(p[i]) << 16 | int(p[i + 1]) << 8 | p[i + 2];
        str[j++] = B64chars[n >> 18];
        str[j++] = B64chars[n >> 12 & 0x3F];
        str[j++] = B64chars[n >> 6 & 0x3F];
        str[j++] = B64chars[n & 0x3F];
    }
    if (pad)  /// set padding
    {
        int n = --pad ? int(p[last]) << 8 | p[last + 1] : p[last];
        str[j++] = B64chars[pad ? n >> 10 & 0x3F : n >> 2];
        str[j++] = B64chars[pad ? n >> 4 & 0x03F : n << 4 & 0x3F];
        str[j++] = pad ? B64chars[n << 2 & 0x3F] : '=';
    }
    return result;
}

const std::string b64decode(const void* data, const size_t &len)
{
    if (len == 0) return "";

    unsigned char *p = (unsigned char*) data;
    size_t j = 0,
        pad1 = len % 4 || p[len - 1] == '=',
        pad2 = pad1 && (len % 4 > 2 || p[len - 2] != '=');
    const size_t last = (len - pad1) / 4 << 2;
    std::string result(last / 4 * 3 + pad1 + pad2, '\0');
    unsigned char *str = (unsigned char*) &result[0];

    for (size_t i = 0; i < last; i += 4)
    {
        int n = B64index[p[i]] << 18 | B64index[p[i + 1]] << 12 | B64index[p[i + 2]] << 6 | B64index[p[i + 3]];
        str[j++] = n >> 16;
        str[j++] = n >> 8 & 0xFF;
        str[j++] = n & 0xFF;
    }
    if (pad1)
    {
        int n = B64index[p[last]] << 18 | B64index[p[last + 1]] << 12;
        str[j++] = n >> 16;
        if (pad2)
        {
            n |= B64index[p[last + 2]] << 6;
            str[j++] = n >> 8 & 0xFF;
        }
    }
    return result;
}

std::string b64encode(const std::string& str)
{
    return b64encode(str.c_str(), str.size());
}

std::string b64decode(const std::string& str64)
{
    return b64decode(str64.c_str(), str64.size());
}

感谢@Jens Alfke指出了性能问题,我对此帖子进行了一些修改。这一工作比以前更快。它的另一个优点是也可以平稳地处理损坏的数据。

上一版:尽管在这类问题中,看来速度是一个过大的选择,但仅出于乐趣,我进行了一些其他修改,以使其成为AFAIK中最快的算法。特别感谢@GaspardP的宝贵建议和不错的基准。


1
所有这些调用strchr都会使解码器变慢-您解码的每个字节平均循环32次。大多数解决方案都使用256项查找表来避免这种情况,这要快得多。
詹斯·阿尔夫克

3
我在stackoverflow.com/questions/342409/…上发表了评论-尽管此代码并不是最快的编码,但它是最快的解码(与其他16个实现相比)。
GaspardP

3
在解码器中,您可以通过使用achar*代替a std::string(简单地做char* out = &str[0],然后再使用out[j++]而不是str[j++])来进一步提高性能(在我的测试中为10-15%)。这样做可以跳过进行的不必要的检查std::string::operator[]。另外,要避免最后push_back一个可能会非常昂贵通过分配多一个字节(std::string str; str.resize(3*((len+3)/4));然后用out[j++]everywere和str.resize(j);结尾。
GaspardP

1
您在上次编辑中添加了内存泄漏,更不用说缓冲区副本了。不要使用new没有delete。实际上,根本不使用new。@Gaspard,我不知道任何“不必要的检查std::string::operator[]”(实际上,我有把握地确定,至少在发行版中没有检查),但是您可以使用一个vector<char>非常绝望的方法-无论如何您的替换代码不是加斯帕德(Gaspard)提出的建议,尽管我认为这是不必要的,但仍然使用字符串来进行除元素访问之外的所有操作,并且是安全/快速的;)
Lightness Races in Orbit

5
@polfosolఠ_ఠ,谢谢你。请考虑使用unsigned char *p,而不是char *p在编码器中使用。如果输入中包含字节,则base64字符串损坏了>= 0x80。添加后unsigned,似乎一切正常。
丹尼斯·戈洛夫金

15

使用base-n mini lib,您可以执行以下操作:

some_data_t in[] { ... };
constexpr int len = sizeof(in)/sizeof(in[0]);

std::string encoded;
bn::encode_b64(in, in + len, std::back_inserter(encoded));

some_data_t out[len];
bn::decode_b64(encoded.begin(), encoded.end(), out);

该API是通用的,基于迭代器的。

披露:我是作者。


3
std::size会更好比sizeofHAX
亮度种族在轨道

1
确实,从c ++ 17开始,这显然是正确的等待时间。
azawadzki

1
从一开始,这就是正确的方法。只是在C ++ 17之前,您必须自己实现它(但实际上它是单行的)。在C ++中,hack的大小永远都不行。
Lightness Races in Orbit

13

根据这个由GaspardP我作了精彩比较不会选择这种解决方案。这不是最坏的,但也不是最好的。它唯一要做的就是它可能更容易理解。

我发现其他两个答案很难理解。它们还会在我的编译器中产生一些警告,并且在解码部分中使用find函数会导致非常差的效率。所以我决定自己动手。

标头:

#ifndef _BASE64_H_
#define _BASE64_H_

#include <vector>
#include <string>
typedef unsigned char BYTE;

class Base64
{
public:
    static std::string encode(const std::vector<BYTE>& buf);
    static std::string encode(const BYTE* buf, unsigned int bufLen);
    static std::vector<BYTE> decode(std::string encoded_string);
};

#endif

身体:

static const BYTE from_base64[] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  
                                    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
                                    255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255,  62, 255,  63, 
                                     52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255, 255, 255, 255, 255, 
                                    255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
                                     15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255,  63,
                                    255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40, 
                                     41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255};

static const char to_base64[] = 
             "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
             "abcdefghijklmnopqrstuvwxyz"
             "0123456789+/";


std::string Base64::encode(const std::vector<BYTE>& buf)
{
    if (buf.empty())
        return ""; // Avoid dereferencing buf if it's empty
    return encode(&buf[0], (unsigned int)buf.size());
}

std::string Base64::encode(const BYTE* buf, unsigned int bufLen)
{
    // Calculate how many bytes that needs to be added to get a multiple of 3
    size_t missing = 0;
    size_t ret_size = bufLen;
    while ((ret_size % 3) != 0)
    {
        ++ret_size;
        ++missing;
    }

    // Expand the return string size to a multiple of 4
    ret_size = 4*ret_size/3;

    std::string ret;
    ret.reserve(ret_size);

    for (unsigned int i=0; i<ret_size/4; ++i)
    {
        // Read a group of three bytes (avoid buffer overrun by replacing with 0)
        size_t index = i*3;
        BYTE b3[3];
        b3[0] = (index+0 < bufLen) ? buf[index+0] : 0;
        b3[1] = (index+1 < bufLen) ? buf[index+1] : 0;
        b3[2] = (index+2 < bufLen) ? buf[index+2] : 0;

        // Transform into four base 64 characters
        BYTE b4[4];
        b4[0] =                         ((b3[0] & 0xfc) >> 2);
        b4[1] = ((b3[0] & 0x03) << 4) + ((b3[1] & 0xf0) >> 4);
        b4[2] = ((b3[1] & 0x0f) << 2) + ((b3[2] & 0xc0) >> 6);
        b4[3] = ((b3[2] & 0x3f) << 0);

        // Add the base 64 characters to the return value
        ret.push_back(to_base64[b4[0]]);
        ret.push_back(to_base64[b4[1]]);
        ret.push_back(to_base64[b4[2]]);
        ret.push_back(to_base64[b4[3]]);
    }

    // Replace data that is invalid (always as many as there are missing bytes)
    for (size_t i=0; i<missing; ++i)
        ret[ret_size - i - 1] = '=';

    return ret;
}

std::vector<BYTE> Base64::decode(std::string encoded_string)
{
    // Make sure string length is a multiple of 4
    while ((encoded_string.size() % 4) != 0)
        encoded_string.push_back('=');

    size_t encoded_size = encoded_string.size();
    std::vector<BYTE> ret;
    ret.reserve(3*encoded_size/4);

    for (size_t i=0; i<encoded_size; i += 4)
    {
        // Get values for each group of four base 64 characters
        BYTE b4[4];
        b4[0] = (encoded_string[i+0] <= 'z') ? from_base64[encoded_string[i+0]] : 0xff;
        b4[1] = (encoded_string[i+1] <= 'z') ? from_base64[encoded_string[i+1]] : 0xff;
        b4[2] = (encoded_string[i+2] <= 'z') ? from_base64[encoded_string[i+2]] : 0xff;
        b4[3] = (encoded_string[i+3] <= 'z') ? from_base64[encoded_string[i+3]] : 0xff;

        // Transform into a group of three bytes
        BYTE b3[3];
        b3[0] = ((b4[0] & 0x3f) << 2) + ((b4[1] & 0x30) >> 4);
        b3[1] = ((b4[1] & 0x0f) << 4) + ((b4[2] & 0x3c) >> 2);
        b3[2] = ((b4[2] & 0x03) << 6) + ((b4[3] & 0x3f) >> 0);

        // Add the byte to the return value if it isn't part of an '=' character (indicated by 0xff)
        if (b4[1] != 0xff) ret.push_back(b3[0]);
        if (b4[2] != 0xff) ret.push_back(b3[1]);
        if (b4[3] != 0xff) ret.push_back(b3[2]);
    }

    return ret;
}

用法:

BYTE buf[] = "ABCD";
std::string encoded = Base64::encode(buf, 4);
// encoded = "QUJDRA=="
std::vector<BYTE> decoded = Base64::decode(encoded);

这样做的好处是,解码功能还可以解码base 64编码的url变体。


2
没有find()的奖励点,以及输出的reserve()的奖励点。一点点偏离,因为您将输入作为副本(因此,如果需要,可以在末尾添加=)。如果这是没有复制的事情,那会很好。
优雅的骰子

3
更长:我最喜欢这个答案……也很适合行数。没有find()的奖励点,以及输出的reserve()的奖励点。我要改进的事情(这会占用大量代码):您将输入作为副本(因此,如果需要,可以在末尾添加=)。如果这是没有复制的事情,那会很好。并且也可能被编写为普通函数-不需要该类。并且应在取消引用buf [0]之前检查空的buf向量。并添加接口以将数据写出到引用中(以便调用者可以重用内存)。
优雅的骰子

1
感谢您的反馈,我按值取字符串的原因是,如果我始终可以保证其长度有效,那么它将简化代码。我认为在大多数情况下,RVO无论如何都应防止字符串复制,因此这不是问题。至于取消对buf [0]的引用-很好的捕获,我将修复此问题:)
DaedalusAlpha

1
我完成了您自己的修改后的版本,为您的愉悦,我将在此处发布答案作为答案...我通过添加测试来消除了将字符串作为副本的需要(您已经进行了测试,因此实际上并没有添加任何性能问题)。
优雅的骰子

1
我可能是错的,但是不应该交换from_base64 []中索引60和61的值吗?我猜想这个想法是通过返回255来忽略“ <”(索引= 60),并通过返回0来识别填充字符“ =“(索引= 61)
。– pbyhistorian

5

我对@DaedalusAlpha答案的变化。它避免了以两次测试为代价来复制参数。

使用uint8_t代替BYTE。

尽管通常输入数据是二进制的并且内部可能具有零字节,但是添加了一些用于处理字符串的方便函数,因此通常不应将其作为字符串进行操作(这通常意味着以空值结尾的数据)。

还添加了一些强制转换来修复编译器警告(至少在GCC上,我还没有通过MSVC运行它)。

base64.hpp的一部分:

   void base64_encode(string & out, const vector<uint8_t>& buf);
   void base64_encode(string & out, const uint8_t* buf, size_t bufLen);
   void base64_encode(string & out, string const& buf);

   void base64_decode(vector<uint8_t> & out, string const& encoded_string);

   // use this if you know the output should be a valid string
   void base64_decode(string & out, string const& encoded_string);

base64.cpp:

static const uint8_t from_base64[128] = {
   // 8 rows of 16 = 128
   // note: only require 123 entries, as we only lookup for <= z , which z=122
               255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  
               255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,
               255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255,  62, 255,  62, 255,  63, 
                52,  53,  54,  55,  56,  57,  58,  59,  60,  61, 255, 255,   0, 255, 255, 255, 
               255,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
                15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25, 255, 255, 255, 255,  63,
               255,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  38,  39,  40, 
                41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51, 255, 255, 255, 255, 255
            };

static const char to_base64[65] = 
            "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
            "abcdefghijklmnopqrstuvwxyz"
            "0123456789+/";


void base64_encode(string & out, string const& buf)
{
   if (buf.empty())
      base64_encode(out, NULL, 0);
   else
      base64_encode(out, reinterpret_cast<uint8_t const*>(&buf[0]), buf.size());
}


void base64_encode(string & out, std::vector<uint8_t> const& buf)
{
   if (buf.empty())
      base64_encode(out, NULL, 0);
   else
      base64_encode(out, &buf[0], buf.size());
}

void base64_encode(string & ret, uint8_t const* buf, size_t bufLen)
{
   // Calculate how many bytes that needs to be added to get a multiple of 3
   size_t missing = 0;
   size_t ret_size = bufLen;
   while ((ret_size % 3) != 0)
   {
      ++ret_size;
      ++missing;
   }

   // Expand the return string size to a multiple of 4
   ret_size = 4*ret_size/3;

   ret.clear();
   ret.reserve(ret_size);

   for (size_t i = 0; i < ret_size/4; ++i)
   {
      // Read a group of three bytes (avoid buffer overrun by replacing with 0)
      const size_t index = i*3;
      const uint8_t b3_0 = (index+0 < bufLen) ? buf[index+0] : 0;
      const uint8_t b3_1 = (index+1 < bufLen) ? buf[index+1] : 0;
      const uint8_t b3_2 = (index+2 < bufLen) ? buf[index+2] : 0;

      // Transform into four base 64 characters
      const uint8_t b4_0 =                        ((b3_0 & 0xfc) >> 2);
      const uint8_t b4_1 = ((b3_0 & 0x03) << 4) + ((b3_1 & 0xf0) >> 4);
      const uint8_t b4_2 = ((b3_1 & 0x0f) << 2) + ((b3_2 & 0xc0) >> 6);
      const uint8_t b4_3 = ((b3_2 & 0x3f) << 0);

      // Add the base 64 characters to the return value
      ret.push_back(to_base64[b4_0]);
      ret.push_back(to_base64[b4_1]);
      ret.push_back(to_base64[b4_2]);
      ret.push_back(to_base64[b4_3]);
   }

   // Replace data that is invalid (always as many as there are missing bytes)
   for (size_t i = 0; i != missing; ++i)
      ret[ret_size - i - 1] = '=';
}


template <class Out>
void base64_decode_any( Out & ret, std::string const& in)
{
   typedef typename Out::value_type T;

   // Make sure the *intended* string length is a multiple of 4
   size_t encoded_size = in.size();

   while ((encoded_size % 4) != 0)
      ++encoded_size;

   const size_t N = in.size();
   ret.clear();
   ret.reserve(3*encoded_size/4);

   for (size_t i = 0; i < encoded_size; i += 4)
   {
      // Note: 'z' == 122

      // Get values for each group of four base 64 characters
      const uint8_t b4_0 = (            in[i+0] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+0])] : 0xff;
      const uint8_t b4_1 = (i+1 < N and in[i+1] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+1])] : 0xff;
      const uint8_t b4_2 = (i+2 < N and in[i+2] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+2])] : 0xff;
      const uint8_t b4_3 = (i+3 < N and in[i+3] <= 'z') ? from_base64[static_cast<uint8_t>(in[i+3])] : 0xff;

      // Transform into a group of three bytes
      const uint8_t b3_0 = ((b4_0 & 0x3f) << 2) + ((b4_1 & 0x30) >> 4);
      const uint8_t b3_1 = ((b4_1 & 0x0f) << 4) + ((b4_2 & 0x3c) >> 2);
      const uint8_t b3_2 = ((b4_2 & 0x03) << 6) + ((b4_3 & 0x3f) >> 0);

      // Add the byte to the return value if it isn't part of an '=' character (indicated by 0xff)
      if (b4_1 != 0xff) ret.push_back( static_cast<T>(b3_0) );
      if (b4_2 != 0xff) ret.push_back( static_cast<T>(b3_1) );
      if (b4_3 != 0xff) ret.push_back( static_cast<T>(b3_2) );
   }
}

void base64_decode(vector<uint8_t> & out, string const& encoded_string)
{
   base64_decode_any(out, encoded_string);
}

void base64_decode(string & out, string const& encoded_string)
{
   base64_decode_any(out, encoded_string);
}

1
我不知道&&和|| 在C ++中有一个已定义的评估顺序,所以我今天学到了一些新东西。在这种情况下,如果要检查索引的条件,但同时要确保索引不超出范围,则它非常有用。
DaedalusAlpha '02

1
是的,我一直都在使用这种技术。
优雅的骰子

4

使用更紧凑的查找表并使用c ++ 17功能进行了一些改动:

std::string base64_decode(const std::string_view in) {
  // table from '+' to 'z'
  const uint8_t lookup[] = {
      62,  255, 62,  255, 63,  52,  53, 54, 55, 56, 57, 58, 59, 60, 61, 255,
      255, 0,   255, 255, 255, 255, 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,
      10,  11,  12,  13,  14,  15,  16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
      255, 255, 255, 255, 63,  255, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
      36,  37,  38,  39,  40,  41,  42, 43, 44, 45, 46, 47, 48, 49, 50, 51};
  static_assert(sizeof(lookup) == 'z' - '+' + 1);

  std::string out;
  int val = 0, valb = -8;
  for (uint8_t c : in) {
    if (c < '+' || c > 'z')
      break;
    c -= '+';
    if (lookup[c] >= 64)
      break;
    val = (val << 6) + lookup[c];
    valb += 6;
    if (valb >= 0) {
      out.push_back(char((val >> valb) & 0xFF));
      valb -= 8;
    }
  }
  return out;
}

如果您没有std :: string_view,请尝试改用std :: experimental :: string_view。


2

我的版本是C ++ Builder的base64的简单快速编码器(解码器)。

//---------------------------------------------------------------------------
UnicodeString __fastcall TExample::Base64Encode(void *data,int length)
{
 if (length<=0) return L"";
 static const char set[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 unsigned char *in=(unsigned char*)data;
 char *pos,*out=pos=new char[((length-1)/3+1)<<2];
 while ((length-=3)>=0)
 {
  pos[0]=set[in[0]>>2];
  pos[1]=set[((in[0]&0x03)<<4)|(in[1]>>4)];
  pos[2]=set[((in[1]&0x0F)<<2)|(in[2]>>6)];
  pos[3]=set[in[2]&0x3F];
  pos+=4;
  in+=3;
 };
 if ((length&2)!=0)
 {
  pos[0]=set[in[0]>>2];
  if ((length&1)!=0)
  {
   pos[1]=set[((in[0]&0x03)<<4)|(in[1]>>4)];
   pos[2]=set[(in[1]&0x0F)<<2];
  }
  else
  {
   pos[1]=set[(in[0]&0x03)<<4];
   pos[2]='=';
  };
  pos[3]='=';
  pos+=4;
 };
 UnicodeString code=UnicodeString(out,pos-out);
 delete[] out;
 return code;
};
//---------------------------------------------------------------------------
int __fastcall TExample::Base64Decode(const UnicodeString &code,unsigned char **data)
{
 int length;
 if (((length=code.Length())==0)||((length&3)!=0)) return 0;
 wchar_t *str=code.c_str();
 unsigned char *pos,*out=pos=new unsigned char[(length>>2)*3];
 while (*str!=0)
 {
  length=-1;
  int shift=18,bits=0;
  do
  {
   wchar_t s=str[++length];
   if ((s>=L'A')&&(s<=L'Z')) bits|=(s-L'A')<<shift;
   else if ((s>=L'a')&&(s<=L'z')) bits|=(s-(L'a'-26))<<shift;
   else if (((s>=L'0')&&(s<=L'9'))) bits|=(s-(L'0'-52))<<shift;
   else if (s==L'+') bits|=62<<shift;
   else if (s==L'/') bits|=63<<shift;
   else if (s==L'=')
   {
    length--;
    break;
   }
   else
   {
    delete[] out;
    return 0;
   };
  }
  while ((shift-=6)>=0);
  pos[0]=bits>>16;
  pos[1]=bits>>8;
  pos[2]=bits;
  pos+=length;
  str+=4;
 };
 *data=out;
 return pos-out;
};
//---------------------------------------------------------------------------

1

轮到我了。我用这个:

class BinaryVector {
public:
    std::vector<char> bytes;

    uint64_t bit_count = 0;

public:
    /* add a bit to the end */
    void push_back(bool bit);

    /* return false if character is unrecognized */
    bool pushBase64Char(char b64_c);
};

void BinaryVector::push_back(bool bit)
{
    if (!bit_count || bit_count % 8 == 0) {
        bytes.push_back(bit << 7);
    }
    else {
        uint8_t next_bit = 8 - (bit_count % 8) - 1;
        bytes[bit_count / 8] |= bit << next_bit;
    }
    bit_count++;
}

/* converts one Base64 character to 6 bits */
bool BinaryVector::pushBase64Char(char c)
{
    uint8_t d;

    // A to Z
    if (c > 0x40 && c < 0x5b) {
        d = c - 65;  // Base64 A is 0
    }
    // a to z
    else if (c > 0x60 && c < 0x7b) {
        d = c - 97 + 26;  // Base64 a is 26
    }
    // 0 to 9
    else if (c > 0x2F && c < 0x3a) {
        d = c - 48 + 52;  // Base64 0 is 52
    }
    else if (c == '+') {
        d = 0b111110;
    }
    else if (c == '/') {
        d = 0b111111;
    }
    else if (c == '=') {
        d = 0;
    }
    else {
        return false;
    }

    push_back(d & 0b100000);
    push_back(d & 0b010000);
    push_back(d & 0b001000);
    push_back(d & 0b000100);
    push_back(d & 0b000010);
    push_back(d & 0b000001);

    return true;
}

bool loadBase64(std::vector<char>& b64_bin, BinaryVector& vec)
{
    for (char& c : b64_bin) {
        if (!vec.pushBase64Char(c)) {
            return false;
        }
    }
    return true;
}

使用vec.bytes访问转换后的数据。


0

我首先制作了自己的版本,然后找到了这个主题。

为什么我的版本比这里介绍的其他版本简单?难道我做错了什么?我没有测试速度。

inline char const* b64units = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

inline char* b64encode(void const* a, int64_t b) {
    ASSERT(a != nullptr);
    if (b > 0) {
        uint8_t const* aa = static_cast<uint8_t const*>(a);
        uint8_t v = 0;
        int64_t bp = 0;
        int64_t sb = 0;
        int8_t off = 0;
        int64_t nt = ((b + 2) / 3) * 4;
        int64_t nd = (b * 8) / 6;
        int64_t tl = ((b * 8) % 6) ? 1 : 0;
        int64_t nf = nt - nd - tl;
        int64_t ri = 0;
        char* r = new char[nt + 1]();
        for (int64_t i = 0; i < nd; i++) {
            v = (aa[sb] << off) | (aa[sb + 1] >> (8 - off));
            v >>= 2;
            r[ri] = b64units[v];
            ri += 1;
            bp += 6;
            sb = (bp / 8);
            off = (bp % 8);
        }
        if (tl > 0) {
            v = (aa[sb] << off);
            v >>= 2;
            r[ri] = b64units[v];
            ri += 1;
        }
        for (int64_t i = 0; i < nf; i++) {
            r[ri] = '=';
            ri += 1;
        }
        return r;
    } else return nullptr;
}

PS我的方法很好用,我用Node.js进行了测试:

let data = 'stackabuse.com';
let buff = new Buffer(data);
let base64data = buff.toString('base64');
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.