MD5 VHDL管道


10

我正在尝试根据此链接实现3级MD5管道。特别是第31页的算法。还有另一个文档描述了数据转发。这是在FPGA(Terasic DE2-115)中完成的。该项目中没有原理图,只有VHDL代码。

library ieee;
use ieee.std_logic_1164.all;
use ieee.numeric_std.all;

entity md5core is
    port (
        CLOCK_50        : in std_logic;
        SW              : in std_logic_vector(17 downto 17)
    );
end entity md5core;

architecture md5core_rtl of md5core is
type r_array is array(0 to 64) of std_logic_vector(7 downto 0);
constant R        : r_array := ( x"07", x"0c", x"11", x"16", x"07", x"0c", x"11", x"16", x"07", x"0c", x"11", x"16", x"07", x"0c", x"11", 
                                 x"16", x"05", x"09", x"0e", x"14", x"05", x"09", x"0e", x"14", x"05", x"09", x"0e", x"14", x"05", x"09",
                                 x"0e", x"14", x"04", x"0b", x"10", x"17", x"04", x"0b", x"10", x"17", x"04", x"0b", x"10", x"17", x"04",
                                 x"0b", x"10", x"17", x"06", x"0a", x"0f", x"15", x"06", x"0a", x"0f", x"15", x"06", x"0a", x"0f", x"15",
                                 x"06", x"0a", x"0f", x"15", others => x"00");

type k_array is array(0 to 66) of std_logic_vector(31 downto 0);
constant K        : k_array := (x"d76aa478", x"e8c7b756", x"242070db", x"c1bdceee",
                                x"f57c0faf", x"4787c62a", x"a8304613", x"fd469501",
                                x"698098d8", x"8b44f7af", x"ffff5bb1", x"895cd7be",
                                x"6b901122", x"fd987193", x"a679438e", x"49b40821", 
                                x"f61e2562", x"c040b340", x"265e5a51", x"e9b6c7aa",
                                x"d62f105d", x"02441453", x"d8a1e681", x"e7d3fbc8",
                                x"21e1cde6", x"c33707d6", x"f4d50d87", x"455a14ed",
                                x"a9e3e905", x"fcefa3f8", x"676f02d9", x"8d2a4c8a",
                                x"fffa3942", x"8771f681", x"6d9d6122", x"fde5380c",
                                x"a4beea44", x"4bdecfa9", x"f6bb4b60", x"bebfbc70",
                                x"289b7ec6", x"eaa127fa", x"d4ef3085", x"04881d05",
                                x"d9d4d039", x"e6db99e5", x"1fa27cf8", x"c4ac5665",
                                x"f4292244", x"432aff97", x"ab9423a7", x"fc93a039",
                                x"655b59c3", x"8f0ccc92", x"ffeff47d", x"85845dd1",
                                x"6fa87e4f", x"fe2ce6e0", x"a3014314", x"4e0811a1",
                                x"f7537e82", x"bd3af235", x"2ad7d2bb", x"eb86d391", others => x"00000000");

type g_array is array(0 to 64) of integer range 0 to 15;
constant g_arr      : g_array := (0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                                          1, 6, 11, 0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12,
                                          5, 8, 11, 14, 1, 4, 7, 10, 13, 0, 3, 6, 9, 12, 15, 2,
                                          0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, 0);                                               

type w_array is array(0 to 15) of std_logic_vector(31 downto 0);
signal W            : w_array;

constant AA        : std_logic_vector(31 downto 0) := x"67452301";
constant BB        : std_logic_vector(31 downto 0) := x"EFCDAB89";
constant CC        : std_logic_vector(31 downto 0) := x"98BADCFE";
constant DD        : std_logic_vector(31 downto 0) := x"10325476";

signal res_A    : std_logic_vector(31 downto 0) := x"00000000";
signal res_B    : std_logic_vector(31 downto 0) := x"00000000";
signal res_C    : std_logic_vector(31 downto 0) := x"00000000";
signal res_D    : std_logic_vector(31 downto 0) := x"00000000";

type in_str_t is array(0 to 5) of std_logic_vector(7 downto 0);
constant in_str    : in_str_t := (x"68", x"65", x"6c", x"6c", x"6f", x"6f");

type pad_str_t    is array(0 to 63) of std_logic_vector(7 downto 0);
signal pad_str    : pad_str_t;

type state_t is (start, padding, init_w, state_1, state_2, state_3, state_4, done);
signal state    : state_t;

signal a, b, c, d, f    : std_logic_vector(31 downto 0) := x"00000000";
signal i                : integer range 0 to 64 := 0;
signal g                        : integer range 0 to 15 := 0;
--signal tmp_b              : std_logic_vector(31 downto 0);

signal akw                  : std_logic_vector(31 downto 0);
signal ak                   : std_logic_vector(31 downto 0);
signal b_tmp                : std_logic_vector(31 downto 0);
begin

    --tmp_b <= std_logic_vector(unsigned(b) + rotate_left(unsigned(a) + unsigned(f) + unsigned(K(i)) + unsigned(W(g)), to_integer(unsigned(R(i)))));

    pipe_p : process(CLOCK_50, SW, a, b, c, d, i)
    begin
        if SW(17) = '0' then
--          ak <= std_logic_vector(unsigned(K(2)) + unsigned(BB));
--          akw <= std_logic_vector(unsigned(W(0)) + 1 + unsigned(K(2)) + unsigned(BB));
            b_tmp <= BB;
        elsif rising_edge(CLOCK_50) and state = state_1 then
            if i = 0 then
                ak <= std_logic_vector(unsigned(K(0)) + unsigned(a));
            elsif i = 1 then
                ak <= std_logic_vector(unsigned(K(1)) + unsigned(a));
                akw <= std_logic_vector(unsigned(W(0)) + unsigned(ak));
            elsif i = 2 then
                ak <= std_logic_vector(unsigned(K(2)) + unsigned(a));
                akw <= std_logic_vector(unsigned(W(1)) + unsigned(ak));
                b_tmp <= std_logic_vector(unsigned(b) + (rotate_left(unsigned(akw) + unsigned(f), to_integer(unsigned(R(0))))));
            else
                ak <= std_logic_vector(unsigned(K(i)) + unsigned(a));
                akw <= std_logic_vector(unsigned(W(g_arr(i-1))) + unsigned(ak));
                b_tmp <= std_logic_vector(unsigned(b) + (rotate_left(unsigned(akw) + unsigned(f), to_integer(unsigned(R(i-2))))));
            end if;
        end if;
    end process pipe_p;


    md5_f_p : process(state, a, b, c, d, i)
    begin 
        case state is
            when state_1 =>
                if i = 0 or i > 4 then
                    f <= (b and c) or ((not b) and d);
                    g <= g_arr(i);
                end if;

            when state_2 =>
            f <= (d and b) or ((not d) and c);
                g <= g_arr(i);

            when state_3 =>
                f <= b xor c xor d;
            g <= g_arr(i);

            when state_4 =>
                f <= c xor (b or (not d));
            g <= g_arr(i);

            when others =>
                f <= x"00000000";
                g <= 0;             

        end case;
    end process md5_f_p;

     md5_p : process(CLOCK_50, SW, a, b, c, d, f, g)
     begin
        if SW(17) = '0' then
            state <= start;
                i <= 0;
                a <= AA;
            b <= BB;
            c <= CC;
            d <= DD;                
            W <= (others => x"00000000");
                pad_str <= (others => x"00");
                --tmp_b := BB;
        elsif rising_edge(CLOCK_50) then
            case state is            
                when start =>

                    pad_str(0) <= in_str(0);
                    pad_str(1) <= in_str(1);
                    pad_str(2) <= in_str(2);
                    pad_str(3) <= in_str(3);
                    pad_str(4) <= in_str(4);
                    pad_str(5) <= in_str(5);
                    state <= padding;

                when padding =>
                    pad_str(6) <= "10000000";
                    pad_str(56) <= std_logic_vector(to_unsigned(in_str'length*8, 8));
                          state <= init_w;

                when init_w =>                
                    W(0) <= pad_str(3) & pad_str(2) & pad_str(1) & pad_str(0);
                    W(1) <= pad_str(7) & pad_str(6) & pad_str(5) & pad_str(4);
                    W(14) <= pad_str(59) & pad_str(58) & pad_str(57) & pad_str(56);
                          state <= state_1;

                when state_1 =>
                          if i = 16 then
                              state <= state_2;
                          else 
                        if i > 2 then
                                    --tmp_b := b;
                                    a <= d;
                                    c <= b;
                                    d <= c;
                                    b <= b_tmp;

--                                  d <= c;
--                                  b <= b_tmp;
--                                  c <= b;
--                                  a <= d;
                                end if;
                                i <= i + 1;
                    end if;

                when state_2 =>
                    if i = 32 then
                        state <= state_3;
                          else                  
                        d <= c;
                        b <= b_tmp;
                                c <= b;
                        a <= d;
                        i <= i + 1;
                    end if;

                when state_3 =>
                    if i = 48 then
                                state <= state_4;
                          else
                        d <= c;
                        b <= b_tmp;
                                c <= b;
                        a <= d;
                        i <= i + 1;
                    end if;

                when state_4 =>
                    if i = 64 then
                                res_A <= std_logic_vector(unsigned(AA) + unsigned(a));
                                res_B <= std_logic_vector(unsigned(BB) + unsigned(b));
                                res_C <= std_logic_vector(unsigned(CC) + unsigned(c));
                                res_D <= std_logic_vector(unsigned(DD) + unsigned(d));
                                state <= done;
                    else
                        d <= c;
                        c <= b;
                                b <= b_tmp;
                        a <= d;
                        i <= i + 1;
                    end if;

                when done =>
                    state <= done;

                when others =>
                    state <= done;

            end case;
        end if;
    end process md5_p;
end architecture md5core_rtl;

使用此代码,我b在第0轮的第一阶段获得了正确的值,但此后似乎没有任何合适的值。从该模拟中可以看出,第0轮的第一阶段是正确的,但此后是不正确的。这是a在此表达式中使用时:

ak <= std_logic_vector(unsigned(K(0)) + unsigned(a)); -- using a

模拟

但是...如果我正确理解了第二个文档,我应该使用它c而不是a(数据转发),但是第0轮的第一阶段也不起作用。也就是说,当我这样做时,第0轮的第一阶段也会得到错误的数字。

ak <= std_logic_vector(unsigned(K(0)) + unsigned(c)); -- changed to c

对于代码(helloo)中的特定字符串,以下值是正确的(第0轮的前3个阶段)。

i:0 => a:271733878, b:3679623978, c:4023233417, d:2562383102, f:2562383102, g:0
i:1 => a:2562383102, b:268703616, c:3679623978, d:4023233417, f:3421032412, g:1
i:2 => a:4023233417, b:566857930, c:268703616, d:3679623978, f:4291410697, g:2

顺便说一下,AKM文件中的是akw代码。

任何有关使我朝正确方向发展的提示或建议,将不胜感激。代码将是理想的。如果不清楚,我将编辑问题并尝试纠正该问题。


您正在使用微控制器吗?哪一种?你的设置是什么?示意图?请提供更多信息:-)

我添加了更多信息,如果可以进一步改进,请告诉我。
jgr 2013年

我认为您不完全了解您的要求。您似乎并不真正熟悉构建管道和相关问题,例如处理各阶段之间的数据依赖关系以及正确初始化各阶段。教您需要一本书,而不是网页。而且我怀疑已经完成该算法的流水线实现的任何人都将免费将其免费提供给您。这样的知识产权才具有真正的价值。
Dave Tweed

很公平。你当然是对的。我不太熟悉构建管道,只有6个月的VHDL经验。太老实了,我也不希望太多,但是当您陷入困境时,寻求帮助似乎总是一个不错的选择。
jgr

Answers:


5

我认为您误解了本文作者对流水线算法的评论。您不仅可以流水线化B的计算,而且还不流水线化其余的过程。

为了入门,我建议您完全不要使用流水线方法,而只需让算法与B计算的非流水线实现一起工作即可。

一旦获得正确的结果,并且如果需要更高的性能,则可以考虑对其进行流水线化。然后,您将能够看到中间结果在每个时钟周期如何排列,以及如何使它们保持同步。


感谢您的回答。我已经有一个有效的非流水线版本,关于f(max)的保留条件是b的计算。据我了解,对b的计算进行流水线化就足够了,但是我想我必须再次阅读它。
jgr

Bi1Ai+2Mi+1Mi+1

谢谢!我再次通读了一下,当您提到我不能只是管道时b,您的意思是他们在论文中称其为“数据转发” A吗?
jgr

Ai+2Ci

好的,AK真的是CK。这变得越来越复杂:/
jgr 2013年
By using our site, you acknowledge that you have read and understand our Cookie Policy and Privacy Policy.
Licensed under cc by-sa 3.0 with attribution required.