utf8 to utf16

Discussion in 'VHDL' started by jmgeu, Mar 9, 2007.

  1. jmgeu

    jmgeu Guest

    I have written one utf8 to utf16 vhdl description.

    It looks like working with ghdl simulator.

    does this have any kind of interest for open source/ opencore?

    Here after is the code.

    -- test component to convert utf-8 to utf-16
    -- simulation with ghdl.
    -- auteur : jmg
    -- date: 2006.03.01

    --this works with two data bus.

    library IEEE;
    use IEEE.std_logic_1164.all;


    entity utf8ToUtf16_test is
    end entity;

    architecture jmg of utf8ToUtf16_test is

    component utf8ToUtf16 is
    port (
    clock : in std_logic ;
    utf8 : in std_logic_vector ( 7 downto 0 ) ;
    utf16 : out std_logic_vector ( 15 downto 0 ) ;
    done : out std_logic ;
    err : out std_logic
    );
    end component;
    signal clock : std_logic ;
    signal utf8 : std_logic_vector ( 7 downto 0 ) ;
    signal utf16 : std_logic_vector ( 15 downto 0 ) ;
    signal done : std_logic ;
    signal err : std_logic ;

    --signal count : std_logic_vector ( 1 downto 0 );
    begin
    utf8ToUtf16_inst : utf8ToUtf16
    port map(
    clock => clock,
    utf8 => utf8,
    utf16 => utf16,
    done => done,
    err => err
    );

    p_main : process
    type pattern_type is record
    -- The inputs of the adder.
    utf8 : std_logic_vector ( 7 downto 0 ) ;
    -- The expected outputs of the adder.
    last : std_logic ;
    utf16 : std_logic_vector ( 15 downto 0 ) ;
    end record;
    -- The patterns to apply.
    type pattern_array is array (natural range <>) of pattern_type;
    constant patterns : pattern_array :=
    (("01000001", '1', "0000000001000001"),
    ("11000011", '0', "0000000000000000"),
    ("10101001", '1', "0000000011101001"),
    ("11100010", '0', "0000000000000000"),
    ("10000010", '0', "0000000000000000"),
    ("10101100", '1', "0010000010101100"),
    ("11110000", '0', "0000000000000000"),
    ("10011101", '0', "0000000000000000"),
    ("10000100", '0', "0000000000000000"),
    ("10011110", '1', "1101000100011110"), -- FIXME:
    requires UTF-32? "11101000100011110",
    ("00000000", '1', "0000000000000000"));
    begin
    clock <= '0' ;
    -- Wait for the results.
    wait for 1 ns;
    -- Check each pattern.
    for i in patterns'range loop
    -- Set the inputs.
    utf8 <= patterns(i).utf8;
    clock <= '0' ;
    -- Wait for the results.
    wait for 1 ns;
    clock <= '1' ;
    wait for 1 ns;
    -- Check the outputs.
    assert done = patterns(i).last
    report "bad result timing" severity error;
    assert '0' =err
    report "error detected " severity warning;
    if patterns(i).last = '1' then
    assert utf16 = patterns(i).utf16
    report "bad utf16 data" severity warning;
    --FIXME: error ?
    end if;
    wait for 1 ns;
    end loop;
    assert false report "end of test" severity note;
    -- Wait forever; this will finish the simulation.
    wait;

    end process;

    end architecture;
    -- component to convert utf-8 to utf-16
    -- simulation with ghdl.
    -- auteur : jmg
    -- date: 2006.03.01

    --this works with two data bus.

    library IEEE;
    use IEEE.std_logic_1164.all;


    entity utf8ToUtf16 is
    port (
    clock : in std_logic ;
    utf8 : in std_logic_vector ( 7 downto 0 ) ;
    utf16 : out std_logic_vector ( 15 downto 0 ) ;
    done : out std_logic ;
    err : out std_logic
    );
    end entity;

    architecture jmg of utf8ToUtf16 is
    signal count : std_logic_vector ( 1 downto 0 );
    signal buffer_utf16 : std_logic_vector ( 15 downto 0 ) ;
    begin


    process (clock)
    begin
    if rising_edge(clock) then
    if utf8(7) = '0' then
    -- ascii
    utf16 (15 downto 7) <= "000000000" ;
    utf16 (6 downto 0) <= utf8 (6 downto 0);
    err <= '0' ;
    done <= '1' ;

    count <= "00" ;
    elsif utf8(7 downto 6) = "10" then
    -- continuation
    if (count /= "00" ) then
    buffer_utf16 (15 downto 6) <= buffer_utf16 (9 downto 0) ;
    buffer_utf16 (5 downto 0) <= utf8 (5 downto 0);
    err <= '0' ;
    if count = "01" then
    done <= '1' ;
    --utf16 <= buffer_utf16 ;
    utf16 (15 downto 6) <= buffer_utf16 (9 downto 0) ;
    utf16 (5 downto 0) <= utf8 (5 downto 0);
    end if;
    --count <= count - "01" ;
    count(1) <= not (count(1) xor count(0)) ;
    count(0) <= not count(0) ;
    else
    err <= '1' ;
    done <= '1' ;
    end if;
    -- FIXME: done?
    elsif utf8(7 downto 5) = "110" then
    -- two bytes sequence
    -- 110xxxxx 10xxxxxx
    count <= "01" ;
    done <= '0' ;
    buffer_utf16 (15 downto 5) <= "00000000000" ;
    buffer_utf16 (4 downto 0) <= utf8 (4 downto 0);
    err <= '0' ;
    elsif utf8(7 downto 4) = "1110" then
    -- three bytes sequence
    -- 1110xxxx 10xxxxxx 10xxxxxx
    count <= "10" ;
    done <= '0' ;
    buffer_utf16 (15 downto 4) <= "000000000000" ;
    buffer_utf16 (3 downto 0) <= utf8 (3 downto 0);
    err <= '0' ;
    elsif utf8(7 downto 3) = "11110" then
    -- four bytes sequence
    -- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
    count <= "11" ;
    done <= '0' ;
    buffer_utf16 (15 downto 3) <= "0000000000000" ;
    buffer_utf16 (2 downto 0) <= utf8 (2 downto 0);
    err <= '0' ;
    else
    -- unknow, assumes about iso-8859-1?
    utf16 (15 downto 8) <= "00000000";
    utf16 (7 downto 0) <= utf8 (7 downto 0);
    err <= '1' ;
    done <= '1' ;
    count <= "00" ;
    end if;
    end if;

    end process;

    end architecture;
     
    jmgeu, Mar 9, 2007
    #1
    1. Advertising

Want to reply to this thread or ask your own question?

It takes just 2 minutes to sign up (and it's free!). Just click the sign up button to choose a username and then you can ask your own questions on the forum.
Similar Threads
  1. Xah Lee

    convert gb18030 to utf16

    Xah Lee, Mar 6, 2005, in forum: Python
    Replies:
    2
    Views:
    1,557
    Xah Lee
    Mar 7, 2005
  2. John Perks and Sarah Mount

    UTF16 codec doesn't round-trip?

    John Perks and Sarah Mount, May 28, 2005, in forum: Python
    Replies:
    1
    Views:
    479
    =?ISO-8859-1?Q?=22Martin_v=2E_L=F6wis=22?=
    May 28, 2005
  3. Fuzzyman
    Replies:
    4
    Views:
    588
    Fuzzyman
    Feb 7, 2006
  4. news.fe.internet.bosch.com

    Regarding UTF16

    news.fe.internet.bosch.com, Feb 2, 2006, in forum: C Programming
    Replies:
    5
    Views:
    368
    those who know me have no need of my name
    Feb 12, 2006
  5. gry
    Replies:
    2
    Views:
    823
    Alf P. Steinbach
    Mar 13, 2012
Loading...

Share This Page