# Project help

Discussion in 'VHDL' started by Rejin James, Jan 25, 2011.

1. ### Rejin JamesGuest

Hi Friends I am currently doin my university project on the topic
Low Power AES algorithm using VHDL

I was having problems understanding the logic of Mixcolumns operation
in GALIOS FIELD and other parts of the algorithm like galios field
multiplication and key expansion.
Can anyone help me out >??

this is the base paper im following
www.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

actually i got the cores from their website and was having a problem
in understanding it .
They are using 8- bit data paths and i was having problems in
understanding their architecture and implementation in VHDl.

The following is the code for mixcolumns operation . can somebody help
me out with it ??
i was not understanding the GALIOS FIELD multiplication concept.

library ieee;
use ieee.std_logic_1164.all;

entity mixcolumns is
port(
clk : in std_logic;
start_in : in std_logic;
inverse_in : in std_logic; -- '1' = inverse
transformation

data_in : in std_logic_vector (7 downto 0); -- input data
data0_out : out std_logic_vector (7 downto 0); -- output data
data1_out : out std_logic_vector (7 downto 0); -- output data
data2_out : out std_logic_vector (7 downto 0); -- output data
data3_out : out std_logic_vector (7 downto 0) -- output data
);
end mixcolumns;

-- fwd_rtl = forward only
architecture fwd_rtl of mixcolumns is

-- GF(2^8) multiplication with constant: x
-- reduction polynomial is x^8 + x^4 + x^3 + x + 1
function gf256_mul2 (a : std_logic_vector(7 downto 0))
return std_logic_vector is
variable b : std_logic_vector(7 downto 0);
begin
b(0) := a(7);
b(1) := a(0) xor a(7);
b(2) := a(1);
b(3) := a(2) xor a(7);
b(4) := a(3) xor a(7);
b(5) := a(4);
b(6) := a(5);
b(7) := a(6);
return b;
end;

type accum_array_t is array (0 to 3) of std_logic_vector(7 downto
0);
signal accum_r : accum_array_t;

signal prod2, prod3 : std_logic_vector(7 downto 0);
signal x : std_logic_vector(7 downto 0);

begin -- rtl

assert (inverse_in /= '1') report "this architecture supports only
forward operation"
severity failure;
x <= data_in;

prod2 <= gf256_mul2(x);
prod3 <= prod2 xor x;

-- forward transform:
--
-- x0 |02 03 01 01| y0
-- x1 = |01 02 03 01|*y1
-- x2 |01 01 02 03| y2
-- x3 |03 01 01 02| y3

-- inverse transform
-- y0 |0e 0b 0d 09| x0
-- y1 = |09 0e 0b 0d|*x1
-- y2 |0d 09 0e 0b| x2
-- y3 |0b 0d 09 0e| x3

clocked : process (clk)
begin -- process clocked
if rising_edge(clk) then -- rising clock edge
if (start_in = '1') then
accum_r(0) <= x;
accum_r(1) <= x;
accum_r(2) <= prod3;
accum_r(3) <= prod2;
else
accum_r(0) <= x xor accum_r(1);
accum_r(1) <= x xor accum_r(2);
accum_r(2) <= prod3 xor accum_r(3);
accum_r(3) <= prod2 xor accum_r(0);
end if;
end if;
end process clocked;

data0_out <= accum_r(0);
data1_out <= accum_r(1);
data2_out <= accum_r(2);
data3_out <= accum_r(3);

end fwd_rtl;

ANY HELP WOULD BE APPRECIATED .. thanks

Rejin James, Jan 25, 2011

2. ### backhusGuest

On 25 Jan., 06:49, Rejin James <> wrote:
> Hi Friends I am currently doin my university project on the topic
> Low Power AES algorithm using VHDL
>
> I was having problems understanding the logic of Mixcolumns operation
> in GALIOS FIELD and other parts of the algorithm like galios field
> multiplication and key expansion.
> Can anyone help me out >??
>
> this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf
>
> actually i got the cores from their website and was having a problem
> in understanding it .
> They are using 8- bit data paths and i was having problems in
> understanding their architecture and implementation in VHDl.
>
> The following is the code for mixcolumns operation . can somebody help
> me out with it ??
> i was not understanding the GALIOS FIELD multiplication concept.
>
> library ieee;
> use ieee.std_logic_1164.all;
>
> entity mixcolumns is
>   port(
>     clk        : in  std_logic;
>     start_in   : in  std_logic;
>     inverse_in : in  std_logic;         -- '1' = inverse
> transformation
>
>     data_in    : in  std_logic_vector (7 downto 0);  -- input data
>     data0_out  : out std_logic_vector (7 downto 0);  -- output data
>     data1_out  : out std_logic_vector (7 downto 0);  -- output data
>     data2_out  : out std_logic_vector (7 downto 0);  -- output data
>     data3_out  : out std_logic_vector (7 downto 0)   -- output data
>     );
> end mixcolumns;
>
> -- fwd_rtl = forward only
> architecture fwd_rtl of mixcolumns is
>
>   -- GF(2^8) multiplication with constant: x
>   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
>   function gf256_mul2 (a : std_logic_vector(7 downto 0))
>     return std_logic_vector is
>     variable b : std_logic_vector(7 downto 0);
>   begin
>     b(0) := a(7);
>     b(1) := a(0) xor a(7);
>     b(2) := a(1);
>     b(3) := a(2) xor a(7);
>     b(4) := a(3) xor a(7);
>     b(5) := a(4);
>     b(6) := a(5);
>     b(7) := a(6);
>     return b;
>   end;
>
>   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> 0);
>   signal accum_r : accum_array_t;
>
>   signal prod2, prod3 : std_logic_vector(7 downto 0);
>   signal x           : std_logic_vector(7 downto 0);
>
> begin  -- rtl
>
>   assert (inverse_in /= '1') report "this architecture supports only
> forward operation"
>     severity failure;
>   x <= data_in;
>
>   prod2 <= gf256_mul2(x);
>   prod3 <= prod2 xor x;
>
>   -- forward transform:
>   --
>   -- x0   |02 03 01 01| y0
>   -- x1 = |01 02 03 01|*y1
>   -- x2   |01 01 02 03| y2
>   -- x3   |03 01 01 02| y3
>
>   -- inverse transform
>   -- y0   |0e 0b 0d 09| x0
>   -- y1 = |09 0e 0b 0d|*x1
>   -- y2   |0d 09 0e 0b| x2
>   -- y3   |0b 0d 09 0e| x3
>
>   clocked : process (clk)
>   begin  -- process clocked
>     if rising_edge(clk) then            -- rising clock edge
>       if (start_in = '1') then
>         accum_r(0) <= x;
>         accum_r(1) <= x;
>         accum_r(2) <= prod3;
>         accum_r(3) <= prod2;
>       else
>         accum_r(0) <= x xor accum_r(1);
>         accum_r(1) <= x xor accum_r(2);
>         accum_r(2) <= prod3 xor accum_r(3);
>         accum_r(3) <= prod2 xor accum_r(0);
>       end if;
>     end if;
>   end process clocked;
>
>   data0_out <= accum_r(0);
>   data1_out <= accum_r(1);
>   data2_out <= accum_r(2);
>   data3_out <= accum_r(3);
>
> end fwd_rtl;
>
> ANY HELP WOULD BE APPRECIATED .. thanks

Hi,
that's a lot of questions at once.
Galois Field math is a topic for 10th semester math students.
So don't be bothered when it seems complicated to understand.
In some VERY simple words: The galois field is a limited bunch of
numbers, that obeys defined mathematical rules.
This is only possible because he operations can always be seen as
modulo operations in order to keep the number space constant.
When you chop down the field size to tw0 you can work with simple gate
functions (AND and XOR) for multiplication and addition.

Mix columns is an ordinary vector/matrix multiplication, where you
multiply a row of the input matrix with a given transformation
matrix.
The result is then written to a column of the result matrix.

The code you provided has one strange property.
It takes a single stream of data (data_in) and creates four result
values.
You need to find out in which order the input stram has to provide the
data of the input matrix (and when to apply start_in)
and what to do with the four result values.

Maybe you should take a look ath this book:
The Design of Rijndael: AES. The Advanced Encryption Standard
Written by the designers of the algorithm.
There you find many examples and calculations that you can compare
with your simulations, in order to understand how the code works that
you have.

Have a nice simulation
Eilert

backhus, Jan 26, 2011

3. ### Rejin JamesGuest

On Jan 26, 1:10 pm, backhus <> wrote:
> On 25 Jan., 06:49, Rejin James <> wrote:
>
>
>
> > Hi Friends I am currently doin my university project on the topic
> > Low Power AES algorithm using VHDL

>
> > I was having problems understanding the logic of Mixcolumns operation
> > in GALIOS FIELD and other parts of the algorithm like galios field
> > multiplication and key expansion.
> > Can anyone help me out >??

>
> > this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

>
> > actually i got the cores from their website and was having a problem
> > in understanding it .
> > They are using 8- bit data paths and i was having problems in
> > understanding their architecture and implementation in VHDl.

>
> > The following is the code for mixcolumns operation . can somebody help
> > me out with it ??
> > i was not understanding the GALIOS FIELD multiplication concept.

>
> > library ieee;
> > use ieee.std_logic_1164.all;

>
> > entity mixcolumns is
> >   port(
> >     clk        : in  std_logic;
> >     start_in   : in  std_logic;
> >     inverse_in : in  std_logic;         -- '1' = inverse
> > transformation

>
> >     data_in    : in  std_logic_vector (7 downto 0);  -- input data
> >     data0_out  : out std_logic_vector (7 downto 0);  -- output data
> >     data1_out  : out std_logic_vector (7 downto 0);  -- output data
> >     data2_out  : out std_logic_vector (7 downto 0);  -- output data
> >     data3_out  : out std_logic_vector (7 downto 0)   -- output data
> >     );
> > end mixcolumns;

>
> > -- fwd_rtl = forward only
> > architecture fwd_rtl of mixcolumns is

>
> >   -- GF(2^8) multiplication with constant: x
> >   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
> >   function gf256_mul2 (a : std_logic_vector(7 downto 0))
> >     return std_logic_vector is
> >     variable b : std_logic_vector(7 downto 0);
> >   begin
> >     b(0) := a(7);
> >     b(1) := a(0) xor a(7);
> >     b(2) := a(1);
> >     b(3) := a(2) xor a(7);
> >     b(4) := a(3) xor a(7);
> >     b(5) := a(4);
> >     b(6) := a(5);
> >     b(7) := a(6);
> >     return b;
> >   end;

>
> >   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> > 0);
> >   signal accum_r : accum_array_t;

>
> >   signal prod2, prod3 : std_logic_vector(7 downto 0);
> >   signal x           : std_logic_vector(7 downto 0);

>
> > begin  -- rtl

>
> >   assert (inverse_in /= '1') report "this architecture supports only
> > forward operation"
> >     severity failure;
> >   x <= data_in;

>
> >   prod2 <= gf256_mul2(x);
> >   prod3 <= prod2 xor x;

>
> >   -- forward transform:
> >   --
> >   -- x0   |02 03 01 01| y0
> >   -- x1 = |01 02 03 01|*y1
> >   -- x2   |01 01 02 03| y2
> >   -- x3   |03 01 01 02| y3

>
> >   -- inverse transform
> >   -- y0   |0e 0b 0d 09| x0
> >   -- y1 = |09 0e 0b 0d|*x1
> >   -- y2   |0d 09 0e 0b| x2
> >   -- y3   |0b 0d 09 0e| x3

>
> >   clocked : process (clk)
> >   begin  -- process clocked
> >     if rising_edge(clk) then            -- rising clock edge
> >       if (start_in = '1') then
> >         accum_r(0) <= x;
> >         accum_r(1) <= x;
> >         accum_r(2) <= prod3;
> >         accum_r(3) <= prod2;
> >       else
> >         accum_r(0) <= x xor accum_r(1);
> >         accum_r(1) <= x xor accum_r(2);
> >         accum_r(2) <= prod3 xor accum_r(3);
> >         accum_r(3) <= prod2 xor accum_r(0);
> >       end if;
> >     end if;
> >   end process clocked;

>
> >   data0_out <= accum_r(0);
> >   data1_out <= accum_r(1);
> >   data2_out <= accum_r(2);
> >   data3_out <= accum_r(3);

>
> > end fwd_rtl;

>
> > ANY HELP WOULD BE APPRECIATED .. thanks

>
> Hi,
> that's a lot of questions at once.
> Galois Field math is a topic for 10th semester math students.
> So don't be bothered when it seems complicated to understand.
> In some VERY simple words: The galois field is a limited bunch of
> numbers, that obeys defined mathematical rules.
> This is only possible because he operations can always be seen as
> modulo operations in order to keep the number space constant.
> When you chop down the field size to tw0 you can work with simple gate
> functions (AND and XOR) for multiplication and addition.
>
> Mix columns is an ordinary vector/matrix multiplication, where you
> multiply a row of the input matrix with a given transformation
> matrix.
> The result is then written to a column of the result matrix.
>
> The code you provided has one strange property.
> It takes a single stream of data (data_in) and creates four result
> values.
> You need to find out in which order the input stram has to provide the
> data of the input matrix (and when to apply start_in)
> and what to do with the four result values.
>
> Maybe you should take a look ath this book:
> The Design of Rijndael: AES. TheAdvanced Encryption Standard
> Written by the designers of the algorithm.
> There you find many examples and calculations that you can compare
> with your simulations, in order to understand how the code works that
> you have.
>
> Have a nice simulation
>   Eilert

Hey Eilert,
Thanks a lot for the reply.

i went through the galois Field multiplication once more from a book
on cryptography and got a basic idea as to wat happens in that.

The reason dat the code only takes one input is because i am
implementing Aes algorithm with 8 bit DAta path (data input)
And according to the paper that i am using the mix columns multiplier
unit takes in the 8 bit data and produces 32 bit output which is then
given to a parallel to serial converter.

If u cud see the base paper which i was referring once then i guess u
wud understand it better. Coz i was not able to understand much.

And i also understood when to apply start_in. it helps us to control
operation of mixcolumns for encryption or decryption.
when start_in is not one it signifies we want to do the inverse
operation i.e. decryption.

Rejin James, Jan 26, 2011
4. ### backhusGuest

On 26 Jan., 13:32, Rejin James <> wrote:
> On Jan 26, 1:10 pm, backhus <> wrote:
>
>
>
> > On 25 Jan., 06:49, Rejin James <> wrote:

>
> > > Hi Friends I am currently doin my university project on the topic
> > > Low Power AES algorithm using VHDL

>
> > > I was having problems understanding the logic of Mixcolumns operation
> > > in GALIOS FIELD and other parts of the algorithm like galios field
> > > multiplication and key expansion.
> > > Can anyone help me out >??

>
> > > this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

>
> > > actually i got the cores from their website and was having a problem
> > > in understanding it .
> > > They are using 8- bit data paths and i was having problems in
> > > understanding their architecture and implementation in VHDl.

>
> > > The following is the code for mixcolumns operation . can somebody help
> > > me out with it ??
> > > i was not understanding the GALIOS FIELD multiplication concept.

>
> > > library ieee;
> > > use ieee.std_logic_1164.all;

>
> > > entity mixcolumns is
> > >   port(
> > >     clk        : in  std_logic;
> > >     start_in   : in  std_logic;
> > >     inverse_in : in  std_logic;         -- '1' = inverse
> > > transformation

>
> > >     data_in    : in  std_logic_vector (7 downto 0);  -- input data
> > >     data0_out  : out std_logic_vector (7 downto 0);  -- output data
> > >     data1_out  : out std_logic_vector (7 downto 0);  -- output data
> > >     data2_out  : out std_logic_vector (7 downto 0);  -- output data
> > >     data3_out  : out std_logic_vector (7 downto 0)   -- output data
> > >     );
> > > end mixcolumns;

>
> > > -- fwd_rtl = forward only
> > > architecture fwd_rtl of mixcolumns is

>
> > >   -- GF(2^8) multiplication with constant: x
> > >   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
> > >   function gf256_mul2 (a : std_logic_vector(7 downto 0))
> > >     return std_logic_vector is
> > >     variable b : std_logic_vector(7 downto 0);
> > >   begin
> > >     b(0) := a(7);
> > >     b(1) := a(0) xor a(7);
> > >     b(2) := a(1);
> > >     b(3) := a(2) xor a(7);
> > >     b(4) := a(3) xor a(7);
> > >     b(5) := a(4);
> > >     b(6) := a(5);
> > >     b(7) := a(6);
> > >     return b;
> > >   end;

>
> > >   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> > > 0);
> > >   signal accum_r : accum_array_t;

>
> > >   signal prod2, prod3 : std_logic_vector(7 downto 0);
> > >   signal x           : std_logic_vector(7 downto 0);

>
> > > begin  -- rtl

>
> > >   assert (inverse_in /= '1') report "this architecture supports only
> > > forward operation"
> > >     severity failure;
> > >   x <= data_in;

>
> > >   prod2 <= gf256_mul2(x);
> > >   prod3 <= prod2 xor x;

>
> > >   -- forward transform:
> > >   --
> > >   -- x0   |02 03 01 01| y0
> > >   -- x1 = |01 02 03 01|*y1
> > >   -- x2   |01 01 02 03| y2
> > >   -- x3   |03 01 01 02| y3

>
> > >   -- inverse transform
> > >   -- y0   |0e 0b 0d 09| x0
> > >   -- y1 = |09 0e 0b 0d|*x1
> > >   -- y2   |0d 09 0e 0b| x2
> > >   -- y3   |0b 0d 09 0e| x3

>
> > >   clocked : process (clk)
> > >   begin  -- process clocked
> > >     if rising_edge(clk) then            -- rising clock edge
> > >       if (start_in = '1') then
> > >         accum_r(0) <= x;
> > >         accum_r(1) <= x;
> > >         accum_r(2) <= prod3;
> > >         accum_r(3) <= prod2;
> > >       else
> > >         accum_r(0) <= x xor accum_r(1);
> > >         accum_r(1) <= x xor accum_r(2);
> > >         accum_r(2) <= prod3 xor accum_r(3);
> > >         accum_r(3) <= prod2 xor accum_r(0);
> > >       end if;
> > >     end if;
> > >   end process clocked;

>
> > >   data0_out <= accum_r(0);
> > >   data1_out <= accum_r(1);
> > >   data2_out <= accum_r(2);
> > >   data3_out <= accum_r(3);

>
> > > end fwd_rtl;

>
> > > ANY HELP WOULD BE APPRECIATED .. thanks

>
> > Hi,
> > that's a lot of questions at once.
> > Galois Field math is a topic for 10th semester math students.
> > So don't be bothered when it seems complicated to understand.
> > In some VERY simple words: The galois field is a limited bunch of
> > numbers, that obeys defined mathematical rules.
> > This is only possible because he operations can always be seen as
> > modulo operations in order to keep the number space constant.
> > When you chop down the field size to tw0 you can work with simple gate
> > functions (AND and XOR) for multiplication and addition.

>
> > Mix columns is an ordinary vector/matrix multiplication, where you
> > multiply a row of the input matrix with a given transformation
> > matrix.
> > The result is then written to a column of the result matrix.

>
> > The code you provided has one strange property.
> > It takes a single stream of data (data_in) and creates four result
> > values.
> > You need to find out in which order the input stram has to provide the
> > data of the input matrix (and when to apply start_in)
> > and what to do with the four result values.

>
> > Maybe you should take a look ath this book:
> > The Design of Rijndael: AES. TheAdvanced Encryption Standard
> > Written by the designers of the algorithm.
> > There you find many examples and calculations that you can compare
> > with your simulations, in order to understand how the code works that
> > you have.

>
> > Have a nice simulation
> >   Eilert

>
> Hey Eilert,
> Thanks a lot for the reply.
>
> i went through the galois Field multiplication once more from a book
> on cryptography and got a basic idea as to wat happens in that.
>
> The reason dat the code only takes one input is because i am
> implementing Aes algorithm with 8 bit DAta path (data input)
> And according to the paper that i am using the mix columns multiplier
> unit takes in the 8 bit data and produces 32 bit output which is then
> given to a parallel to serial converter.
>
> If u cud see the base paper which i was referring once then i guess u
> wud understand it better. Coz i was not able to understand much.
>
> And i also understood when to apply start_in. it helps us to control
> operation of mixcolumns for encryption or decryption.
> when start_in is not one it signifies we want to do the inverse
> operation i.e. decryption.

Hi,
you confused start_in with inverse_in.
Also inverse_in has to be '1' all the time since the code doesn't
suport the inverse transformation.
(Look at the assert statement)

In the mentioned paper there is a signal mentioned called "en".
That's probably called start_in in the provided source now.

It has to be applied "During inputting the first byte of a column
(bytes 0, 4, 8, and 12 in Fig. 1)"
as mentioned in the paper.
It seems like the results have to be taken after every four clock
cycles.
So, start in can be also used for the following stage to take over the
results from the mix column stage.

If you have the full sources of that project, there should be some
design unit that controlls the datapath.
Some kind of FSM. By analysing that you probably gain more detailed
insights how the whole thing works.

Have a nice synthesis
Eilert

backhus, Jan 27, 2011
5. ### Rejin JamesGuest

On Jan 27, 12:29 pm, backhus <> wrote:
> On 26 Jan., 13:32, Rejin James <> wrote:
>
>
>
> > On Jan 26, 1:10 pm, backhus <> wrote:

>
> > > On 25 Jan., 06:49, Rejin James <> wrote:

>
> > > > Hi Friends I am currently doin my university project on the topic
> > > > Low Power AES algorithm using VHDL

>
> > > > I was having problems understanding the logic of Mixcolumns operation
> > > > in GALIOS FIELD and other parts of the algorithm like galios field
> > > > multiplication and key expansion.
> > > > Can anyone help me out >??

>
> > > > this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

>
> > > > actually i got the cores from their website and was having a problem
> > > > in understanding it .
> > > > They are using 8- bit data paths and i was having problems in
> > > > understanding their architecture and implementation in VHDl.

>
> > > > The following is the code for mixcolumns operation . can somebody help
> > > > me out with it ??
> > > > i was not understanding the GALIOS FIELD multiplication concept.

>
> > > > library ieee;
> > > > use ieee.std_logic_1164.all;

>
> > > > entity mixcolumns is
> > > >   port(
> > > >     clk        : in  std_logic;
> > > >     start_in   : in  std_logic;
> > > >     inverse_in : in  std_logic;         -- '1' = inverse
> > > > transformation

>
> > > >     data_in    : in  std_logic_vector (7 downto 0);  -- input data
> > > >     data0_out  : out std_logic_vector (7 downto 0);  -- output data
> > > >     data1_out  : out std_logic_vector (7 downto 0);  -- output data
> > > >     data2_out  : out std_logic_vector (7 downto 0);  -- output data
> > > >     data3_out  : out std_logic_vector (7 downto 0)   -- output data
> > > >     );
> > > > end mixcolumns;

>
> > > > -- fwd_rtl = forward only
> > > > architecture fwd_rtl of mixcolumns is

>
> > > >   -- GF(2^8) multiplication with constant: x
> > > >   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
> > > >   function gf256_mul2 (a : std_logic_vector(7 downto 0))
> > > >     return std_logic_vector is
> > > >     variable b : std_logic_vector(7 downto 0);
> > > >   begin
> > > >     b(0) := a(7);
> > > >     b(1) := a(0) xor a(7);
> > > >     b(2) := a(1);
> > > >     b(3) := a(2) xor a(7);
> > > >     b(4) := a(3) xor a(7);
> > > >     b(5) := a(4);
> > > >     b(6) := a(5);
> > > >     b(7) := a(6);
> > > >     return b;
> > > >   end;

>
> > > >   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> > > > 0);
> > > >   signal accum_r : accum_array_t;

>
> > > >   signal prod2, prod3 : std_logic_vector(7 downto 0);
> > > >   signal x           : std_logic_vector(7 downto 0);

>
> > > > begin  -- rtl

>
> > > >   assert (inverse_in /= '1') report "this architecture supports only
> > > > forward operation"
> > > >     severity failure;
> > > >   x <= data_in;

>
> > > >   prod2 <= gf256_mul2(x);
> > > >   prod3 <= prod2 xor x;

>
> > > >   -- forward transform:
> > > >   --
> > > >   -- x0   |02 03 01 01| y0
> > > >   -- x1 = |01 02 03 01|*y1
> > > >   -- x2   |01 01 02 03| y2
> > > >   -- x3   |03 01 01 02| y3

>
> > > >   -- inverse transform
> > > >   -- y0   |0e 0b 0d 09| x0
> > > >   -- y1 = |09 0e 0b 0d|*x1
> > > >   -- y2   |0d 09 0e 0b| x2
> > > >   -- y3   |0b 0d 09 0e| x3

>
> > > >   clocked : process (clk)
> > > >   begin  -- process clocked
> > > >     if rising_edge(clk) then            -- rising clock edge
> > > >       if (start_in = '1') then
> > > >         accum_r(0) <= x;
> > > >         accum_r(1) <= x;
> > > >         accum_r(2) <= prod3;
> > > >         accum_r(3) <= prod2;
> > > >       else
> > > >         accum_r(0) <= x xor accum_r(1);
> > > >         accum_r(1) <= x xor accum_r(2);
> > > >         accum_r(2) <= prod3 xor accum_r(3);
> > > >         accum_r(3) <= prod2 xor accum_r(0);
> > > >       end if;
> > > >     end if;
> > > >   end process clocked;

>
> > > >   data0_out <= accum_r(0);
> > > >   data1_out <= accum_r(1);
> > > >   data2_out <= accum_r(2);
> > > >   data3_out <= accum_r(3);

>
> > > > end fwd_rtl;

>
> > > > ANY HELP WOULD BE APPRECIATED .. thanks

>
> > > Hi,
> > > that's a lot of questions at once.
> > > Galois Field math is a topic for 10th semester math students.
> > > So don't be bothered when it seems complicated to understand.
> > > In some VERY simple words: The galois field is a limited bunch of
> > > numbers, that obeys defined mathematical rules.
> > > This is only possible because he operations can always be seen as
> > > modulo operations in order to keep the number space constant.
> > > When you chop down the field size to tw0 you can work with simple gate
> > > functions (AND and XOR) for multiplication and addition.

>
> > > Mix columns is an ordinary vector/matrix multiplication, where you
> > > multiply a row of the input matrix with a given transformation
> > > matrix.
> > > The result is then written to a column of the result matrix.

>
> > > The code you provided has one strange property.
> > > It takes a single stream of data (data_in) and creates four result
> > > values.
> > > You need to find out in which order the input stram has to provide the
> > > data of the input matrix (and when to apply start_in)
> > > and what to do with the four result values.

>
> > > Maybe you should take a look ath this book:
> > > The Design of Rijndael: AES. TheAdvanced Encryption Standard
> > > Written by the designers of the algorithm.
> > > There you find many examples and calculations that you can compare
> > > with your simulations, in order to understand how the code works that
> > > you have.

>
> > > Have a nice simulation
> > >   Eilert

>
> > Hey Eilert,
> > Thanks a lot for the reply.

>
> > i went through the galois Field multiplication once more from a book
> > on cryptography and got a basic idea as to wat happens in that.

>
> > The reason dat the code only takes one input is because i am
> > implementing Aes algorithm with 8 bit DAta path (data input)
> > And according to the paper that i am using the mix columns multiplier
> > unit takes in the 8 bit data and produces 32 bit output which is then
> > given to a parallel to serial converter.

>
> > If u cud see the base paper which i was referring once then i guess u
> > wud understand it better. Coz i was not able to understand much.

>
> > And i also understood when to apply start_in. it helps us to control
> > operation of mixcolumns for encryption or decryption.
> > when start_in is not one it signifies we want to do the inverse
> > operation i.e. decryption.

>
> Hi,
> you confused start_in with inverse_in.
> Also inverse_in has to be '1' all the time since the code doesn't
> suport the inverse transformation.
> (Look at the assert statement)
>
> In the mentioned paper there is a signal mentioned called "en".
> That's probably called start_in in the provided source now.
>
> It has to be applied "During inputting the first byte of a column
> (bytes 0, 4, 8, and 12 in Fig. 1)"
> as mentioned in the paper.
> It seems like the results have to be taken after every four clock
> cycles.
> So, start in can be also used for the following stage to take over the
> results from the mix column stage.
>
> If you have the full sources of that project, there should be some
> design unit that controlls the datapath.
> Some kind of FSM. By analysing that you probably gain more detailed
> insights how the whole thing works.
>
> Have a nice synthesis
>   Eilert

Hi,
i think when inverse_in is 1 it means reverse process i.e decryption.
it was given as comment somewhere in the full sources.
As for start _in i think u are right. But the things mentioned about
taking data byte by byte is not making sense to me as the input that
we feed is 8-bit(i.e 1 byte) so how come rest of the bytes are coming.
And how can one make The state when input is only 8-bit ?

Here is the link for the full source.
http://www.tkt.cs.tut.fi/research/daci/ra_security_8bit_aes_hw.html

I did not go through the top level. Ill try to go through it and
understand.
If u could also just take a look it would be a big help.
Thanks Eilert
Rejin

Rejin James, Jan 28, 2011
6. ### backhusGuest

On 28 Jan., 05:31, Rejin James <> wrote:
> On Jan 27, 12:29 pm, backhus <> wrote:
>
>
>
> > On 26 Jan., 13:32, Rejin James <> wrote:

>
> > > On Jan 26, 1:10 pm, backhus <> wrote:

>
> > > > On 25 Jan., 06:49, Rejin James <> wrote:

>
> > > > > Hi Friends I am currently doin my university project on the topic
> > > > > Low Power AES algorithm using VHDL

>
> > > > > I was having problems understanding the logic of Mixcolumns operation
> > > > > in GALIOS FIELD and other parts of the algorithm like galios field
> > > > > multiplication and key expansion.
> > > > > Can anyone help me out >??

>
> > > > > this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

>
> > > > > actually i got the cores from their website and was having a problem
> > > > > in understanding it .
> > > > > They are using 8- bit data paths and i was having problems in
> > > > > understanding their architecture and implementation in VHDl.

>
> > > > > The following is the code for mixcolumns operation . can somebody help
> > > > > me out with it ??
> > > > > i was not understanding the GALIOS FIELD multiplication concept.

>
> > > > > library ieee;
> > > > > use ieee.std_logic_1164.all;

>
> > > > > entity mixcolumns is
> > > > >   port(
> > > > >     clk        : in  std_logic;
> > > > >     start_in   : in  std_logic;
> > > > >     inverse_in : in  std_logic;         -- '1' = inverse
> > > > > transformation

>
> > > > >     data_in    : in  std_logic_vector (7 downto 0);  -- input data
> > > > >     data0_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > >     data1_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > >     data2_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > >     data3_out  : out std_logic_vector (7 downto 0)   -- output data
> > > > >     );
> > > > > end mixcolumns;

>
> > > > > -- fwd_rtl = forward only
> > > > > architecture fwd_rtl of mixcolumns is

>
> > > > >   -- GF(2^8) multiplication with constant: x
> > > > >   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
> > > > >   function gf256_mul2 (a : std_logic_vector(7 downto 0))
> > > > >     return std_logic_vector is
> > > > >     variable b : std_logic_vector(7 downto 0);
> > > > >   begin
> > > > >     b(0) := a(7);
> > > > >     b(1) := a(0) xor a(7);
> > > > >     b(2) := a(1);
> > > > >     b(3) := a(2) xor a(7);
> > > > >     b(4) := a(3) xor a(7);
> > > > >     b(5) := a(4);
> > > > >     b(6) := a(5);
> > > > >     b(7) := a(6);
> > > > >     return b;
> > > > >   end;

>
> > > > >   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> > > > > 0);
> > > > >   signal accum_r : accum_array_t;

>
> > > > >   signal prod2, prod3 : std_logic_vector(7 downto 0);
> > > > >   signal x           : std_logic_vector(7 downto 0);

>
> > > > > begin  -- rtl

>
> > > > >   assert (inverse_in /= '1') report "this architecture supports only
> > > > > forward operation"
> > > > >     severity failure;
> > > > >   x <= data_in;

>
> > > > >   prod2 <= gf256_mul2(x);
> > > > >   prod3 <= prod2 xor x;

>
> > > > >   -- forward transform:
> > > > >   --
> > > > >   -- x0   |02 03 01 01| y0
> > > > >   -- x1 = |01 02 03 01|*y1
> > > > >   -- x2   |01 01 02 03| y2
> > > > >   -- x3   |03 01 01 02| y3

>
> > > > >   -- inverse transform
> > > > >   -- y0   |0e 0b 0d 09| x0
> > > > >   -- y1 = |09 0e 0b 0d|*x1
> > > > >   -- y2   |0d 09 0e 0b| x2
> > > > >   -- y3   |0b 0d 09 0e| x3

>
> > > > >   clocked : process (clk)
> > > > >   begin  -- process clocked
> > > > >     if rising_edge(clk) then            -- rising clock edge
> > > > >       if (start_in = '1') then
> > > > >         accum_r(0) <= x;
> > > > >         accum_r(1) <= x;
> > > > >         accum_r(2) <= prod3;
> > > > >         accum_r(3) <= prod2;
> > > > >       else
> > > > >         accum_r(0) <= x xor accum_r(1);
> > > > >         accum_r(1) <= x xor accum_r(2);
> > > > >         accum_r(2) <= prod3 xor accum_r(3);
> > > > >         accum_r(3) <= prod2 xor accum_r(0);
> > > > >       end if;
> > > > >     end if;
> > > > >   end process clocked;

>
> > > > >   data0_out <= accum_r(0);
> > > > >   data1_out <= accum_r(1);
> > > > >   data2_out <= accum_r(2);
> > > > >   data3_out <= accum_r(3);

>
> > > > > end fwd_rtl;

>
> > > > > ANY HELP WOULD BE APPRECIATED .. thanks

>
> > > > Hi,
> > > > that's a lot of questions at once.
> > > > Galois Field math is a topic for 10th semester math students.
> > > > So don't be bothered when it seems complicated to understand.
> > > > In some VERY simple words: The galois field is a limited bunch of
> > > > numbers, that obeys defined mathematical rules.
> > > > This is only possible because he operations can always be seen as
> > > > modulo operations in order to keep the number space constant.
> > > > When you chop down the field size to tw0 you can work with simple gate
> > > > functions (AND and XOR) for multiplication and addition.

>
> > > > Mix columns is an ordinary vector/matrix multiplication, where you
> > > > multiply a row of the input matrix with a given transformation
> > > > matrix.
> > > > The result is then written to a column of the result matrix.

>
> > > > The code you provided has one strange property.
> > > > It takes a single stream of data (data_in) and creates four result
> > > > values.
> > > > You need to find out in which order the input stram has to provide the
> > > > data of the input matrix (and when to apply start_in)
> > > > and what to do with the four result values.

>
> > > > Maybe you should take a look ath this book:
> > > > The Design of Rijndael: AES. TheAdvanced Encryption Standard
> > > > Written by the designers of the algorithm.
> > > > There you find many examples and calculations that you can compare
> > > > with your simulations, in order to understand how the code works that
> > > > you have.

>
> > > > Have a nice simulation
> > > >   Eilert

>
> > > Hey Eilert,
> > > Thanks a lot for the reply.

>
> > > i went through the galois Field multiplication once more from a book
> > > on cryptography and got a basic idea as to wat happens in that.

>
> > > The reason dat the code only takes one input is because i am
> > > implementing Aes algorithm with 8 bit DAta path (data input)
> > > And according to the paper that i am using the mix columns multiplier
> > > unit takes in the 8 bit data and produces 32 bit output which is then
> > > given to a parallel to serial converter.

>
> > > If u cud see the base paper which i was referring once then i guess u
> > > wud understand it better. Coz i was not able to understand much.

>
> > > And i also understood when to apply start_in. it helps us to control
> > > operation of mixcolumns for encryption or decryption.
> > > when start_in is not one it signifies we want to do the inverse
> > > operation i.e. decryption.

>
> > Hi,
> > you confused start_in with inverse_in.
> > Also inverse_in has to be '1' all the time since the code doesn't
> > suport the inverse transformation.
> > (Look at the assert statement)

>
> > In the mentioned paper there is a signal mentioned called "en".
> > That's probably called start_in in the provided source now.

>
> > It has to be applied "During inputting the first byte of a column
> > (bytes 0, 4, 8, and 12 in Fig. 1)"
> > as mentioned in the paper.
> > It seems like the results have to be taken after every four clock
> > cycles.
> > So, start in can be also used for the following stage to take over the
> > results from the mix column stage.

>
> > If you have the full sources of that project, there should be some
> > design unit that controlls the datapath.
> > Some kind of FSM. By analysing that you probably gain more detailed
> > insights how the whole thing works.

>
> > Have a nice synthesis
> >   Eilert

>
> Hi,
> i think when inverse_in is 1 it means reverse process i.e decryption.
> it was given as comment somewhere in the full sources.
> As for start _in i think u are right. But the things mentioned about
> taking data byte by byte is not making sense to me as the input that
> we feed is 8-bit(i.e 1 byte) so how come rest of the bytes are coming.
> And how can one make The state when input is only 8-bit ?
>
> Here is the link for the full source.http://www.tkt.cs.tut.fi/research/daci/ra_security_8bit_aes_hw.html
>
> I did not go through the top level. Ill try to go through it and
> understand.
> If u could also just take a look it would be a big help.
> Thanks Eilert
> Rejin

Hi Rejin,
the README file in the sources sais it's only an encryption core, and
also the paper mentiones only encryption in the tables.
And even the sources say that the inverse algorithm (needed for
decryption) is not implemented.
So no chance for generating the inverse algorithm.

The AES state matrix consists of 16 bytes.
So for each operation, 16 bytes have to be provided.
This implementation works in a "byteserial" manner, that mewans you
have to provide all the bytes on 16 consecutive clock cycles.
(The same applies to the key matrix)

Look at the main file aes.vhd.
there you find the processes
control_clocked
and
control_comb.

These are a 2-process implementation of the controlling FSM.
The signal sequence_r there counts the state matrix position that's
been worked on.
Furthermore there's a signal round, that counts the rounds of the AES
algorithm.
Depending on the data- and keywidth the input data has to iterate 8 to
14 times through the algorithm.
(and the fist or last round is treated special, depending wether you
do en- or decryption)

So, by further analysing this part of the code you can learn how to
feed data into the core, and when to read out the results.
vhdl source, so you have a graphical point of orientation.

Also there are three testbenches.
I think you can see how to controll the input data pretty well by
looking at this part of the code from aes_tb_test1.vhd:

-------------------------------------------------------------------------

-- encrypt

-------------------------------------------------------------------------

for i in 0 to 15 loop

wait until falling_edge(clk);

inverse_in <= '0';

data_in <= std_logic_vector(to_unsigned(ptext(t)(i), 8));

key_in <= std_logic_vector(to_unsigned(key_fwd(t)(i), 8));

end loop; -- i

wait until falling_edge(clk);

start_in <= '1';

wait until falling_edge(clk);

start_in <= '0';

-- compute

wait until rising_edge(clk) and busy_out = '0';

-- read output & check result

wait until falling_edge(clk);

for i in 0 to 15 loop

wait until rising_edge(clk);

assert ctext(t)(i) = to_integer(unsigned(data_out))

report "forward failed" severity error;

end loop; -- i

wait until rising_edge(clk);

Have a nice simulation
Eilert

backhus, Jan 28, 2011
7. ### Rejin JamesGuest

On Jan 28, 12:57 pm, backhus <> wrote:
> On 28 Jan., 05:31, Rejin James <> wrote:
>
> > On Jan 27, 12:29 pm, backhus <> wrote:

>
> > > On 26 Jan., 13:32, Rejin James <> wrote:

>
> > > > On Jan 26, 1:10 pm, backhus <> wrote:

>
> > > > > On 25 Jan., 06:49, Rejin James <> wrote:

>
> > > > > > Hi Friends I am currently doin my university project on the topic
> > > > > > Low Power AES algorithm using VHDL

>
> > > > > > I was having problems understanding the logic of Mixcolumns operation
> > > > > > in GALIOS FIELD and other parts of the algorithm like galios field
> > > > > > multiplication and key expansion.
> > > > > > Can anyone help me out >??

>
> > > > > > this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

>
> > > > > > actually i got the cores from their website and was having a problem
> > > > > > in understanding it .
> > > > > > They are using 8- bit data paths and i was having problems in
> > > > > > understanding their architecture and implementation in VHDl.

>
> > > > > > The following is the code for mixcolumns operation . can somebody help
> > > > > > me out with it ??
> > > > > > i was not understanding the GALIOS FIELD multiplication concept..

>
> > > > > > library ieee;
> > > > > > use ieee.std_logic_1164.all;

>
> > > > > > entity mixcolumns is
> > > > > >   port(
> > > > > >     clk        : in  std_logic;
> > > > > >     start_in   : in  std_logic;
> > > > > >     inverse_in : in  std_logic;         -- '1' = inverse
> > > > > > transformation

>
> > > > > >     data_in    : in  std_logic_vector (7 downto 0);  -- input data
> > > > > >     data0_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > > >     data1_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > > >     data2_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > > >     data3_out  : out std_logic_vector (7 downto 0)   -- output data
> > > > > >     );
> > > > > > end mixcolumns;

>
> > > > > > -- fwd_rtl = forward only
> > > > > > architecture fwd_rtl of mixcolumns is

>
> > > > > >   -- GF(2^8) multiplication with constant: x
> > > > > >   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
> > > > > >   function gf256_mul2 (a : std_logic_vector(7 downto 0))
> > > > > >     return std_logic_vector is
> > > > > >     variable b : std_logic_vector(7 downto 0);
> > > > > >   begin
> > > > > >     b(0) := a(7);
> > > > > >     b(1) := a(0) xor a(7);
> > > > > >     b(2) := a(1);
> > > > > >     b(3) := a(2) xor a(7);
> > > > > >     b(4) := a(3) xor a(7);
> > > > > >     b(5) := a(4);
> > > > > >     b(6) := a(5);
> > > > > >     b(7) := a(6);
> > > > > >     return b;
> > > > > >   end;

>
> > > > > >   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> > > > > > 0);
> > > > > >   signal accum_r : accum_array_t;

>
> > > > > >   signal prod2, prod3 : std_logic_vector(7 downto 0);
> > > > > >   signal x           : std_logic_vector(7 downto 0);

>
> > > > > > begin  -- rtl

>
> > > > > >   assert (inverse_in /= '1') report "this architecture supports only
> > > > > > forward operation"
> > > > > >     severity failure;
> > > > > >   x <= data_in;

>
> > > > > >   prod2 <= gf256_mul2(x);
> > > > > >   prod3 <= prod2 xor x;

>
> > > > > >   -- forward transform:
> > > > > >   --
> > > > > >   -- x0   |02 03 01 01| y0
> > > > > >   -- x1 = |01 02 03 01|*y1
> > > > > >   -- x2   |01 01 02 03| y2
> > > > > >   -- x3   |03 01 01 02| y3

>
> > > > > >   -- inverse transform
> > > > > >   -- y0   |0e 0b 0d 09| x0
> > > > > >   -- y1 = |09 0e 0b 0d|*x1
> > > > > >   -- y2   |0d 09 0e 0b| x2
> > > > > >   -- y3   |0b 0d 09 0e| x3

>
> > > > > >   clocked : process (clk)
> > > > > >   begin  -- process clocked
> > > > > >     if rising_edge(clk) then            -- rising clock edge
> > > > > >       if (start_in = '1') then
> > > > > >         accum_r(0) <= x;
> > > > > >         accum_r(1) <= x;
> > > > > >         accum_r(2) <= prod3;
> > > > > >         accum_r(3) <= prod2;
> > > > > >       else
> > > > > >         accum_r(0) <= x xor accum_r(1);
> > > > > >         accum_r(1) <= x xor accum_r(2);
> > > > > >         accum_r(2) <= prod3 xor accum_r(3);
> > > > > >         accum_r(3) <= prod2 xor accum_r(0);
> > > > > >       end if;
> > > > > >     end if;
> > > > > >   end process clocked;

>
> > > > > >   data0_out <= accum_r(0);
> > > > > >   data1_out <= accum_r(1);
> > > > > >   data2_out <= accum_r(2);
> > > > > >   data3_out <= accum_r(3);

>
> > > > > > end fwd_rtl;

>
> > > > > > ANY HELP WOULD BE APPRECIATED .. thanks

>
> > > > > Hi,
> > > > > that's a lot of questions at once.
> > > > > Galois Field math is a topic for 10th semester math students.
> > > > > So don't be bothered when it seems complicated to understand.
> > > > > In some VERY simple words: The galois field is a limited bunch of
> > > > > numbers, that obeys defined mathematical rules.
> > > > > This is only possible because he operations can always be seen as
> > > > > modulo operations in order to keep the number space constant.
> > > > > When you chop down the field size to tw0 you can work with simple gate
> > > > > functions (AND and XOR) for multiplication and addition.

>
> > > > > Mix columns is an ordinary vector/matrix multiplication, where you
> > > > > multiply a row of the input matrix with a given transformation
> > > > > matrix.
> > > > > The result is then written to a column of the result matrix.

>
> > > > > The code you provided has one strange property.
> > > > > It takes a single stream of data (data_in) and creates four result
> > > > > values.
> > > > > You need to find out in which order the input stram has to provide the
> > > > > data of the input matrix (and when to apply start_in)
> > > > > and what to do with the four result values.

>
> > > > > Maybe you should take a look ath this book:
> > > > > The Design of Rijndael: AES. TheAdvanced Encryption Standard
> > > > > Written by the designers of the algorithm.
> > > > > There you find many examples and calculations that you can compare
> > > > > with your simulations, in order to understand how the code works that
> > > > > you have.

>
> > > > > Have a nice simulation
> > > > >   Eilert

>
> > > > Hey Eilert,
> > > > Thanks a lot for the reply.

>
> > > > i went through the galois Field multiplication once more from a book
> > > > on cryptography and got a basic idea as to wat happens in that.

>
> > > > The reason dat the code only takes one input is because i am
> > > > implementing Aes algorithm with 8 bit DAta path (data input)
> > > > And according to the paper that i am using the mix columns multiplier
> > > > unit takes in the 8 bit data and produces 32 bit output which is then
> > > > given to a parallel to serial converter.

>
> > > > If u cud see the base paper which i was referring once then i guess u
> > > > wud understand it better. Coz i was not able to understand much.

>
> > > > And i also understood when to apply start_in. it helps us to control
> > > > operation of mixcolumns for encryption or decryption.
> > > > when start_in is not one it signifies we want to do the inverse
> > > > operation i.e. decryption.

>
> > > Hi,
> > > you confused start_in with inverse_in.
> > > Also inverse_in has to be '1' all the time since the code doesn't
> > > suport the inverse transformation.
> > > (Look at the assert statement)

>
> > > In the mentioned paper there is a signal mentioned called "en".
> > > That's probably called start_in in the provided source now.

>
> > > It has to be applied "During inputting the first byte of a column
> > > (bytes 0, 4, 8, and 12 in Fig. 1)"
> > > as mentioned in the paper.
> > > It seems like the results have to be taken after every four clock
> > > cycles.
> > > So, start in can be also used for the following stage to take over the
> > > results from the mix column stage.

>
> > > If you have the full sources of that project, there should be some
> > > design unit that controlls the datapath.
> > > Some kind of FSM. By analysing that you probably gain more detailed
> > > insights how the whole thing works.

>
> > > Have a nice synthesis
> > >   Eilert

>
> > Hi,
> > i think when inverse_in is 1 it means reverse process i.e decryption.
> > it was given as comment somewhere in the full sources.
> > As for start _in i think u are right. But the things mentioned about
> > taking data byte by byte is not making sense to me as the input that
> > we feed is 8-bit(i.e 1 byte) so how come rest of the bytes are coming.
> > And how can one make The state when input is only 8-bit ?

>
> > Here is the link for the full source.http://www.tkt.cs.tut.fi/research/daci/ra_security_8bit_aes_hw.html

>
> > Thanks for your help.
> > I did not go through the top level. Ill try to go through it and
> > understand.
> > If u could also just take a look it would be a big help.
> > Thanks Eilert
> > Rejin

>
> Hi Rejin,
> the README file in the sources sais it's only an encryption core, and
> also the paper mentiones only encryption in the tables.
> And even the sources say that the inverse algorithm (needed for
> decryption) is not implemented.
> So no chance for generating the inverse algorithm.
>
> The AES state matrix consists of 16 bytes.
> So for each operation, 16 bytes have to be provided.
> This implementation works in a "byteserial" manner, that mewans you
> have to provide all the bytes on 16 consecutive clock cycles.
> (The same applies to the key matrix)
>
> Look at the main file aes.vhd.
> there you find the processes
>   control_clocked
> and
>   control_comb.
>
> These are a 2-process implementation of the controlling FSM.
> The signal sequence_r there counts the state matrix position that's
> been worked on.
> Furthermore there's a signal round, that counts the rounds of the AES
> algorithm.
> Depending on the data- and keywidth the input data has to iterate 8 to
> 14 times through the algorithm.
> (and the fist or last round is treated special, depending wether you
> do en- or decryption)
>
> So, by further analysing this part of the code you can learn how to
> feed data into the core, and when to read out the results.
> Maybe you could start with reconstructing the state diagramm from the
> vhdl source, so you have a graphical point of orientation.
>
> Also there are three testbenches.
> I think you can see how to controll the input data pretty well by
> looking at this part of the code from aes_tb_test1.vhd:
>
> -------------------------------------------------------------------------
>
>       -- encrypt
>
> -------------------------------------------------------------------------
>
>
>       for i in 0 to 15 loop
>
>         wait until falling_edge(clk);
>
>         inverse_in <= '0';
>
>
>         data_in <= std_logic_vector(to_unsigned(ptext(t)(i), 8));
>
>         key_in  <= std_logic_vector(to_unsigned(key_fwd(t)(i), 8));
>
>       end loop;  -- i
>
>       wait until falling_edge(clk);
>
>
>       start_in <= '1';
>
>       wait until falling_edge(clk);
>
>       start_in <= '0';
>
>       -- compute
>
>       wait until rising_edge(clk) and busy_out = '0';
>
>       -- read output & check result
>
>       wait until falling_edge(clk);
>
>
>       for i in 0 to 15 loop
>
>         wait until rising_edge(clk);
>
>         assert ctext(t)(i) = to_integer(unsigned(data_out))
>
>           report "forward failed" severity error;
>
>       end loop;  -- i
>
>
>       wait until rising_edge(clk);
>
> Have a nice simulation
>   Eilert

Hi eilert..
so sorry for replying so late..
had taken a mini vacation ..

Ya i got the inputting part..
im not sure whether ill be able to reconstruct the FSM ., ill try..
Currently i was doing the key expansion unit and was facing problems
understanding this part of the core..
im gettin confused as to wen and during wich clock puse we have 2 send
key)_in to the key expansion unit and during which clock pulse we send
data to sbox and retrieve from it..
a further thing i noticed is that according to the table given for key
expansion the data is supposed 2 shift and replaced by new data, but
somehow i dint notice it actually happening,,
if u cud just explain the code a lil...

architecture fwd_rtl of keyexpansion is

type control_seq_t is array (0 to 15) of integer range 0 to 4;
constant control_seq : control_seq_t := (1, 2, 2, 3, 0, 0, 0, 0,
0, 0, 0, 0, 4, 4, 4, 4);
signal sequence : integer range 0 to 15;

type shiftreg_t is array (0 to 15) of std_logic_vector(7 downto 0);

signal shift_r : shiftreg_t;
signal rcon_value : std_logic_vector(7 downto 0);

-- storage for key byte required for RotWord() -operation.
signal rotword_r : std_logic_vector(7 downto 0);

signal d0, d1, d2 : std_logic; -- mux control signals
signal d3 : std_logic; -- '1' = signal "rcon_value"
is zeroed

-- key byte output (also fed back to shift register)
signal key_out_int : std_logic_vector(7 downto 0);

signal ext_control : std_logic_vector(1 downto 0);

begin -- architecture rtl

key_out <= key_out_int;
key_d4_out <= shift_r(12);

--mux0
data_to_sbox_out <= shift_r(13) when d0 = '1' else
rotword_r;
--mux1 and rcon xor
key_out_int <= data_from_sbox_in xor rcon_value xor shift_r(0)
when (d1 = '1' and d3 = '0') else
data_from_sbox_in xor shift_r(0)
when (d1 = '1' and d3 = '1') else
shift_r(0);

ext_control(0) <= shift_in;

sequence <= to_integer(unsigned(seq_in));

-- inverse is not implemented in this architecture
assert (inverse_in /= '1') report "inverse operation not supported"
severity warning;

---------------------------------------------+
-- | control |
-- | 0 | 1 | 2 | 3 | 4 |
---------------+-----+-----+-----+-----+-----|
-- d0 | 0 | 1 | 1 | 0 | 0 |
-- d1 | 0 | 1 | 1 | 1 | 0 |
-- d2 | 0 | 0 | 0 | 0 | 1 |
-- d3 | 1 | 0 | 1 | 1 | 1 |
---------------------------------------------+
muxcontrol : process (sequence) is
begin -- process muxcontrol
case control_seq(sequence) is
when 0 =>
d0 <= '0';
d1 <= '0';
d2 <= '0';
d3 <= '1';

when 1 =>
d0 <= '1';
d1 <= '1';
d2 <= '0';
d3 <= '0';

when 2 =>
d0 <= '1';
d1 <= '1';
d2 <= '0';
d3 <= '1';

when 3 =>
d0 <= '0';
d1 <= '1';
d2 <= '0';
d3 <= '1';

when others => -- when 4 =>
d0 <= '0';
d1 <= '0';
d2 <= '1';
d3 <= '1';

end case;
end process muxcontrol;

--shift register
shifter : process (clk) is
begin
if rising_edge(clk) then -- rising clock edge
case ext_control is
when "00" => -- stall

when "01" => -- shift
shift_r(15) <= key_out_int;
if (d2 = '0') then
shift_r(3) <= key_out_int xor shift_r(4);
else
shift_r(3) <= shift_r(4);
end if;

shift_r(15) <= key_in;
shift_r(3) <= shift_r(4);

when others => -- load and shift, (used
during
shift_r(15) <= key_in;
if (d2 = '0') then
shift_r(3) <= key_out_int xor shift_r(4);
else
shift_r(3) <= shift_r(4);
end if;

end case;

if (shift_in = '1' or load_in = '1') then
for i in 4 to 14 loop
shift_r(i) <= shift_r(i+1);
end loop; -- i

for i in 0 to 2 loop
shift_r(i) <= shift_r(i+1);
end loop; -- i

end if;

if (sequence = 0) then
rotword_r <= shift_r(12);
end if;

end if;
end process shifter;

-- round constant "calculation"
rcon : process (round_in) is
begin -- process rcon
case round_in is
when "0000" =>
rcon_value <= std_logic_vector(to_unsigned(16#01#, 8));
when "0001" =>
rcon_value <= std_logic_vector(to_unsigned(16#02#, 8));
when "0010" =>
rcon_value <= std_logic_vector(to_unsigned(16#04#, 8));
when "0011" =>
rcon_value <= std_logic_vector(to_unsigned(16#08#, 8));
when "0100" =>
rcon_value <= std_logic_vector(to_unsigned(16#10#, 8));
when "0101" =>
rcon_value <= std_logic_vector(to_unsigned(16#20#, 8));
when "0110" =>
rcon_value <= std_logic_vector(to_unsigned(16#40#, 8));
when "0111" =>
rcon_value <= std_logic_vector(to_unsigned(16#80#, 8));
when "1000" =>
rcon_value <= std_logic_vector(to_unsigned(16#1b#, 8));
when "1001" =>
rcon_value <= std_logic_vector(to_unsigned(16#36#, 8));
when others =>
rcon_value <= (others => '-');
end case;
end process rcon;

end architecture fwd_rtl;

Rejin James, Feb 15, 2011
8. ### Rejin JamesGuest

On Feb 15, 10:31 pm, Rejin James <> wrote:
> On Jan 28, 12:57 pm, backhus <> wrote:
>
> > On 28 Jan., 05:31, Rejin James <> wrote:

>
> > > On Jan 27, 12:29 pm, backhus <> wrote:

>
> > > > On 26 Jan., 13:32, Rejin James <> wrote:

>
> > > > > On Jan 26, 1:10 pm, backhus <> wrote:

>
> > > > > > On 25 Jan., 06:49, Rejin James <> wrote:

>
> > > > > > > Hi Friends I am currently doin my university project on the topic
> > > > > > > Low Power AES algorithm using VHDL

>
> > > > > > > I was having problems understanding the logic of Mixcolumns operation
> > > > > > > in GALIOS FIELD and other parts of the algorithm like galios field
> > > > > > > multiplication and key expansion.
> > > > > > > Can anyone help me out >??

>
> > > > > > > this is the base paper im followingwww.martes-itea.org/.../Hamalainen-Design_and_Implementation_2.pdf

>
> > > > > > > actually i got the cores from their website and was having a problem
> > > > > > > in understanding it .
> > > > > > > They are using 8- bit data paths and i was having problems in
> > > > > > > understanding their architecture and implementation in VHDl.

>
> > > > > > > The following is the code for mixcolumns operation . can somebody help
> > > > > > > me out with it ??
> > > > > > > i was not understanding the GALIOS FIELD multiplication concept.

>
> > > > > > > library ieee;
> > > > > > > use ieee.std_logic_1164.all;

>
> > > > > > > entity mixcolumns is
> > > > > > >   port(
> > > > > > >     clk        : in  std_logic;
> > > > > > >     start_in   : in  std_logic;
> > > > > > >     inverse_in : in  std_logic;         -- '1' = inverse
> > > > > > > transformation

>
> > > > > > >     data_in    : in  std_logic_vector (7 downto 0);  -- input data
> > > > > > >     data0_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > > > >     data1_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > > > >     data2_out  : out std_logic_vector (7 downto 0);  -- output data
> > > > > > >     data3_out  : out std_logic_vector (7 downto 0)   -- output data
> > > > > > >     );
> > > > > > > end mixcolumns;

>
> > > > > > > -- fwd_rtl = forward only
> > > > > > > architecture fwd_rtl of mixcolumns is

>
> > > > > > >   -- GF(2^8) multiplication with constant: x
> > > > > > >   -- reduction polynomial is x^8 + x^4 + x^3 + x + 1
> > > > > > >   function gf256_mul2 (a : std_logic_vector(7 downto 0))
> > > > > > >     return std_logic_vector is
> > > > > > >     variable b : std_logic_vector(7 downto 0);
> > > > > > >   begin
> > > > > > >     b(0) := a(7);
> > > > > > >     b(1) := a(0) xor a(7);
> > > > > > >     b(2) := a(1);
> > > > > > >     b(3) := a(2) xor a(7);
> > > > > > >     b(4) := a(3) xor a(7);
> > > > > > >     b(5) := a(4);
> > > > > > >     b(6) := a(5);
> > > > > > >     b(7) := a(6);
> > > > > > >     return b;
> > > > > > >   end;

>
> > > > > > >   type   accum_array_t is array (0 to 3) of std_logic_vector(7 downto
> > > > > > > 0);
> > > > > > >   signal accum_r : accum_array_t;

>
> > > > > > >   signal prod2, prod3 : std_logic_vector(7 downto 0);
> > > > > > >   signal x           : std_logic_vector(7 downto 0);

>
> > > > > > > begin  -- rtl

>
> > > > > > >   assert (inverse_in /= '1') report "this architecture supports only
> > > > > > > forward operation"
> > > > > > >     severity failure;
> > > > > > >   x <= data_in;

>
> > > > > > >   prod2 <= gf256_mul2(x);
> > > > > > >   prod3 <= prod2 xor x;

>
> > > > > > >   -- forward transform:
> > > > > > >   --
> > > > > > >   -- x0   |02 03 01 01| y0
> > > > > > >   -- x1 = |01 02 03 01|*y1
> > > > > > >   -- x2   |01 01 02 03| y2
> > > > > > >   -- x3   |03 01 01 02| y3

>
> > > > > > >   -- inverse transform
> > > > > > >   -- y0   |0e 0b 0d 09| x0
> > > > > > >   -- y1 = |09 0e 0b 0d|*x1
> > > > > > >   -- y2   |0d 09 0e 0b| x2
> > > > > > >   -- y3   |0b 0d 09 0e| x3

>
> > > > > > >   clocked : process (clk)
> > > > > > >   begin  -- process clocked
> > > > > > >     if rising_edge(clk) then            -- rising clock edge
> > > > > > >       if (start_in = '1') then
> > > > > > >         accum_r(0) <= x;
> > > > > > >         accum_r(1) <= x;
> > > > > > >         accum_r(2) <= prod3;
> > > > > > >         accum_r(3) <= prod2;
> > > > > > >       else
> > > > > > >         accum_r(0) <= x xor accum_r(1);
> > > > > > >         accum_r(1) <= x xor accum_r(2);
> > > > > > >         accum_r(2) <= prod3 xor accum_r(3);
> > > > > > >         accum_r(3) <= prod2 xor accum_r(0);
> > > > > > >       end if;
> > > > > > >     end if;
> > > > > > >   end process clocked;

>
> > > > > > >   data0_out <= accum_r(0);
> > > > > > >   data1_out <= accum_r(1);
> > > > > > >   data2_out <= accum_r(2);
> > > > > > >   data3_out <= accum_r(3);

>
> > > > > > > end fwd_rtl;

>
> > > > > > > ANY HELP WOULD BE APPRECIATED .. thanks

>
> > > > > > Hi,
> > > > > > that's a lot of questions at once.
> > > > > > Galois Field math is a topic for 10th semester math students.
> > > > > > So don't be bothered when it seems complicated to understand.
> > > > > > In some VERY simple words: The galois field is a limited bunch of
> > > > > > numbers, that obeys defined mathematical rules.
> > > > > > This is only possible because he operations can always be seen as
> > > > > > modulo operations in order to keep the number space constant.
> > > > > > When you chop down the field size to tw0 you can work with simple gate
> > > > > > functions (AND and XOR) for multiplication and addition.

>
> > > > > > Mix columns is an ordinary vector/matrix multiplication, where you
> > > > > > multiply a row of the input matrix with a given transformation
> > > > > > matrix.
> > > > > > The result is then written to a column of the result matrix.

>
> > > > > > The code you provided has one strange property.
> > > > > > It takes a single stream of data (data_in) and creates four result
> > > > > > values.
> > > > > > You need to find out in which order the input stram has to provide the
> > > > > > data of the input matrix (and when to apply start_in)
> > > > > > and what to do with the four result values.

>
> > > > > > Maybe you should take a look ath this book:
> > > > > > The Design of Rijndael: AES. TheAdvanced Encryption Standard
> > > > > > Written by the designers of the algorithm.
> > > > > > There you find many examples and calculations that you can compare
> > > > > > with your simulations, in order to understand how the code works that
> > > > > > you have.

>
> > > > > > Have a nice simulation
> > > > > >   Eilert

>
> > > > > Hey Eilert,
> > > > > Thanks a lot for the reply.

>
> > > > > i went through the galois Field multiplication once more from a book
> > > > > on cryptography and got a basic idea as to wat happens in that.

>
> > > > > The reason dat the code only takes one input is because i am
> > > > > implementing Aes algorithm with 8 bit DAta path (data input)
> > > > > And according to the paper that i am using the mix columns multiplier
> > > > > unit takes in the 8 bit data and produces 32 bit output which is then
> > > > > given to a parallel to serial converter.

>
> > > > > If u cud see the base paper which i was referring once then i guess u
> > > > > wud understand it better. Coz i was not able to understand much.

>
> > > > > And i also understood when to apply start_in. it helps us to control
> > > > > operation of mixcolumns for encryption or decryption.
> > > > > when start_in is not one it signifies we want to do the inverse
> > > > > operation i.e. decryption.

>
> > > > Hi,
> > > > you confused start_in with inverse_in.
> > > > Also inverse_in has to be '1' all the time since the code doesn't
> > > > suport the inverse transformation.
> > > > (Look at the assert statement)

>
> > > > In the mentioned paper there is a signal mentioned called "en".
> > > > That's probably called start_in in the provided source now.

>
> > > > It has to be applied "During inputting the first byte of a column
> > > > (bytes 0, 4, 8, and 12 in Fig. 1)"
> > > > as mentioned in the paper.
> > > > It seems like the results have to be taken after every four clock
> > > > cycles.
> > > > So, start in can be also used for the following stage to take over the
> > > > results from the mix column stage.

>
> > > > If you have the full sources of that project, there should be some
> > > > design unit that controlls the datapath.
> > > > Some kind of FSM. By analysing that you probably gain more detailed
> > > > insights how the whole thing works.

>
> > > > Have a nice synthesis
> > > >   Eilert

>
> > > Hi,
> > > i think when inverse_in is 1 it means reverse process i.e decryption.
> > > it was given as comment somewhere in the full sources.
> > > As for start _in i think u are right. But the things mentioned about
> > > taking data byte by byte is not making sense to me as the input that
> > > we feed is 8-bit(i.e 1 byte) so how come rest of the bytes are coming..
> > > And how can one make The state when input is only 8-bit ?

>
> > > Here is the link for the full source.http://www.tkt.cs.tut.fi/research/daci/ra_security_8bit_aes_hw.html

>
> > > Thanks for your help.
> > > I did not go through the top level. Ill try to go through it and
> > > understand.
> > > If u could also just take a look it would be a big help.
> > > Thanks Eilert
> > > Rejin

>
> > Hi Rejin,
> > the README file in the sources sais it's only an encryption core, and
> > also the paper mentiones only encryption in the tables.
> > And even the sources say that the inverse algorithm (needed for
> > decryption) is not implemented.
> > So no chance for generating the inverse algorithm.

>
> > The AES state matrix consists of 16 bytes.
> > So for each operation, 16 bytes have to be provided.
> > This implementation works in a "byteserial" manner, that mewans you
> > have to provide all the bytes on 16 consecutive clock cycles.
> > (The same applies to the key matrix)

>
> > Look at the main file aes.vhd.
> > there you find the processes
> >   control_clocked
> > and
> >   control_comb.

>
> > These are a 2-process implementation of the controlling FSM.
> > The signal sequence_r there counts the state matrix position that's
> > been worked on.
> > Furthermore there's a signal round, that counts the rounds of the AES
> > algorithm.
> > Depending on the data- and keywidth the input data has to iterate 8 to
> > 14 times through the algorithm.
> > (and the fist or last round is treated special, depending wether you
> > do en- or decryption)

>
> > So, by further analysing this part of the code you can learn how to
> > feed data into the core, and when to read out the results.
> > Maybe you could start with reconstructing the state diagramm from the
> > vhdl source, so you have a graphical point of orientation.

>
> > Also there are three testbenches.
> > I think you can see how to controll the input data pretty well by
> > looking at this part of the code from aes_tb_test1.vhd:

>
> > -------------------------------------------------------------------------

>
> >       -- encrypt

>
> > -------------------------------------------------------------------------

>

>
> >       for i in 0 to 15 loop

>
> >         wait until falling_edge(clk);

>
> >         inverse_in <= '0';

>

>
> >         data_in <= std_logic_vector(to_unsigned(ptext(t)(i), 8));

>
> >         key_in  <= std_logic_vector(to_unsigned(key_fwd(t)(i), 8));

>
> >       end loop;  -- i

>
> >       wait until falling_edge(clk);

>

>
> >       start_in <= '1';

>
> >       wait until falling_edge(clk);

>
> >       start_in <= '0';

>
> >       -- compute

>
> >       wait until rising_edge(clk) and busy_out = '0';

>
> >       -- read output & check result

>
> >       wait until falling_edge(clk);

>

>
> >       for i in 0 to 15 loop

>
> >         wait until rising_edge(clk);

>
> >         assert ctext(t)(i) = to_integer(unsigned(data_out))

>
> >           report "forward failed" severity error;

>
> >       end loop;  -- i

>

>
> >       wait until rising_edge(clk);

>
> > Have a nice simulation
> >   Eilert

>
> Hi eilert..
> so sorry for replying so late..
> had takena mini vacation ..
>
> Ya i got the inputting part..
> im not sure whether ill be able to reconstruct the FSM ., ill try..
> Currently i was doing the key expansion unit and was facing problems
> understanding this part of the core..
> im gettin confused as to wen and during wich clock puse we have 2 send
> key)_in to the key expansion unit and during which clock pulse we send
> data to sbox and retrieve from it..
> a further thing i noticed is that according to the table given for key
> expansion the data is supposed 2 shift and replaced by new data, but
> somehow i dint notice it actually happening,,
> if u cud just explain the code a lil...
>
> architecture fwd_rtl of keyexpansion is
>
>   type control_seq_t is array (0 to 15) of integer range 0 to 4;
>   constant control_seq : control_seq_t := (1, 2, 2, 3, 0, 0, 0, 0,
>                                            0, 0, 0, 0, 4, 4, 4, 4);
>   signal sequence : integer range 0 to 15;
>
>   type shiftreg_t is array (0 to 15) of std_logic_vector(7 downto 0);
>
>   signal shift_r    : shiftreg_t;
>   signal rcon_value : std_logic_vector(7 downto 0);
>
>   -- storage for key byte required for RotWord() -operation.
>   signal rotword_r : std_logic_vector(7 downto 0);
>
>   signal d0, d1, d2 : std_logic;        -- mux control signals
>   signal d3         : std_logic;        -- '1' = signal "rcon_value"
> is zeroed
>
>   -- key byte output (also fed back to shift register)
>   signal key_out_int : std_logic_vector(7 downto 0);
>
>   signal ext_control : std_logic_vector(1 downto 0);
>
> begin  -- architecture rtl
>
>   key_out    <= key_out_int;
>   key_d4_out <= shift_r(12);
>
>   --mux0
>   data_to_sbox_out <= shift_r(13) when d0 = '1' else
>                       rotword_r;
>   --mux1 and rcon xor
>   key_out_int <= data_from_sbox_in xor rcon_value xor shift_r(0)
>                  when (d1 = '1' and d3 = '0') else
>                  data_from_sbox_in xor shift_r(0)
>                  when (d1 = '1' and d3 = '1') else
>                  shift_r(0);
>
>   ext_control(0) <= shift_in;
>
>   sequence <= to_integer(unsigned(seq_in));
>
>   -- inverse is not implemented in this architecture
>   assert (inverse_in /= '1') report "inverse operation not supported"
> severity warning;
>
>   ---------------------------------------------+
>   --             |          control            |
>   --             |  0  |  1  |  2  |  3  |  4  |
>   ---------------+-----+-----+-----+-----+-----|
>   --    d0       |  0  |  1  |  1  |  0  |  0  |
>   --    d1       |  0  |  1  |  1  |  1  |  0  |
>   --    d2       |  0  |  0  |  0  |  0  |  1  |
>   --    d3       |  1  |  0  |  1  |  1  |  1  |
>   ---------------------------------------------+
>   muxcontrol : process (sequence) is
>   begin  -- process muxcontrol
>     case control_seq(sequence) is
>       when 0 =>
>         d0 <= '0';
>         d1 <= '0';
>         d2 <= '0';
>         d3 <= '1';
>
>       when 1 =>
>         d0 <= '1';
>         d1 <= '1';
>         d2 <= '0';
>         d3 <= '0';
>
>       when 2 =>
>         d0 <= '1';
>         d1 <= '1';
>         d2 <= '0';
>         d3 <= '1';
>
>       when 3 =>
>         d0 <= '0';
>         d1 <= '1';
>         d2 <= '0';
>         d3 <= '1';
>
>       when others =>                    -- when 4 =>
>         d0 <= '0';
>         d1 <= '0';
>         d2 <= '1';
>         d3 <= '1';
>
>     end case;
>   end process muxcontrol;
>
>   --shift register
>   shifter : process (clk) is
>   begin
>     if rising_edge(clk) then            -- rising clock edge
>       case ext_control is
>         when "00" =>                    -- stall
>
>         when "01" =>                    -- shift
>           shift_r(15) <= key_out_int;
>           if (d2 = '0') then
>             shift_r(3) <= key_out_int xor shift_r(4);
>           else
>             shift_r(3) <= shift_r(4);
>           end if;
>
>         when "10" =>                    -- load
>           shift_r(15) <= key_in;
>           shift_r(3)  <= shift_r(4);
>
>         when others =>                  -- load and shift, (used
> during
>           shift_r(15) <= key_in;
>           if (d2 = '0') then
>             shift_r(3) <= key_out_int xor shift_r(4);
>           else
>             shift_r(3) <= shift_r(4);
>           end if;
>
>       end case;
>
>       if (shift_in = '1' or load_in = '1') then
>         for i in 4 to 14 loop
>           shift_r(i) <= shift_r(i+1);
>         end loop;  -- i
>
>         for i in 0 to 2 loop
>           shift_r(i) <= shift_r(i+1);
>         end loop;  -- i
>
>       end if;
>
>       if (sequence = 0) then
>         rotword_r <= shift_r(12);
>       end if;
>
>     end if;
>   end process shifter;
>
> -- round constant "calculation"
>   rcon : process (round_in) is
>   begin  -- process rcon
>     case round_in is
>       when "0000" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#01#, 8));
>       when "0001" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#02#, 8));
>       when "0010" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#04#, 8));
>       when "0011" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#08#, 8));
>       when "0100" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#10#, 8));
>       when "0101" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#20#, 8));
>       when "0110" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#40#, 8));
>       when "0111" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#80#, 8));
>       when "1000" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#1b#, 8));
>       when "1001" =>
>         rcon_value <= std_logic_vector(to_unsigned(16#36#, 8));
>       when others =>
>         rcon_value <= (others => '-');
>     end case;
>   end process rcon;
>
> end architecture fwd_rtl;

Hi
I figured out the operation of the mux controls but im having trouble
understanding the shift register process in the key expansion unit.