utf8 to utf16

J

jmgeu

I have written one utf8 to utf16 vhdl description.

It looks like working with ghdl simulator.

does this have any kind of interest for open source/ opencore?

Here after is the code.

-- test component to convert utf-8 to utf-16
-- simulation with ghdl.
-- auteur : jmg
-- date: 2006.03.01

--this works with two data bus.

library IEEE;
use IEEE.std_logic_1164.all;


entity utf8ToUtf16_test is
end entity;

architecture jmg of utf8ToUtf16_test is

component utf8ToUtf16 is
port (
clock : in std_logic ;
utf8 : in std_logic_vector ( 7 downto 0 ) ;
utf16 : out std_logic_vector ( 15 downto 0 ) ;
done : out std_logic ;
err : out std_logic
);
end component;
signal clock : std_logic ;
signal utf8 : std_logic_vector ( 7 downto 0 ) ;
signal utf16 : std_logic_vector ( 15 downto 0 ) ;
signal done : std_logic ;
signal err : std_logic ;

--signal count : std_logic_vector ( 1 downto 0 );
begin
utf8ToUtf16_inst : utf8ToUtf16
port map(
clock => clock,
utf8 => utf8,
utf16 => utf16,
done => done,
err => err
);

p_main : process
type pattern_type is record
-- The inputs of the adder.
utf8 : std_logic_vector ( 7 downto 0 ) ;
-- The expected outputs of the adder.
last : std_logic ;
utf16 : std_logic_vector ( 15 downto 0 ) ;
end record;
-- The patterns to apply.
type pattern_array is array (natural range <>) of pattern_type;
constant patterns : pattern_array :=
(("01000001", '1', "0000000001000001"),
("11000011", '0', "0000000000000000"),
("10101001", '1', "0000000011101001"),
("11100010", '0', "0000000000000000"),
("10000010", '0', "0000000000000000"),
("10101100", '1', "0010000010101100"),
("11110000", '0', "0000000000000000"),
("10011101", '0', "0000000000000000"),
("10000100", '0', "0000000000000000"),
("10011110", '1', "1101000100011110"), -- FIXME:
requires UTF-32? "11101000100011110",
("00000000", '1', "0000000000000000"));
begin
clock <= '0' ;
-- Wait for the results.
wait for 1 ns;
-- Check each pattern.
for i in patterns'range loop
-- Set the inputs.
utf8 <= patterns(i).utf8;
clock <= '0' ;
-- Wait for the results.
wait for 1 ns;
clock <= '1' ;
wait for 1 ns;
-- Check the outputs.
assert done = patterns(i).last
report "bad result timing" severity error;
assert '0' =err
report "error detected " severity warning;
if patterns(i).last = '1' then
assert utf16 = patterns(i).utf16
report "bad utf16 data" severity warning;
--FIXME: error ?
end if;
wait for 1 ns;
end loop;
assert false report "end of test" severity note;
-- Wait forever; this will finish the simulation.
wait;

end process;

end architecture;
-- component to convert utf-8 to utf-16
-- simulation with ghdl.
-- auteur : jmg
-- date: 2006.03.01

--this works with two data bus.

library IEEE;
use IEEE.std_logic_1164.all;


entity utf8ToUtf16 is
port (
clock : in std_logic ;
utf8 : in std_logic_vector ( 7 downto 0 ) ;
utf16 : out std_logic_vector ( 15 downto 0 ) ;
done : out std_logic ;
err : out std_logic
);
end entity;

architecture jmg of utf8ToUtf16 is
signal count : std_logic_vector ( 1 downto 0 );
signal buffer_utf16 : std_logic_vector ( 15 downto 0 ) ;
begin


process (clock)
begin
if rising_edge(clock) then
if utf8(7) = '0' then
-- ascii
utf16 (15 downto 7) <= "000000000" ;
utf16 (6 downto 0) <= utf8 (6 downto 0);
err <= '0' ;
done <= '1' ;

count <= "00" ;
elsif utf8(7 downto 6) = "10" then
-- continuation
if (count /= "00" ) then
buffer_utf16 (15 downto 6) <= buffer_utf16 (9 downto 0) ;
buffer_utf16 (5 downto 0) <= utf8 (5 downto 0);
err <= '0' ;
if count = "01" then
done <= '1' ;
--utf16 <= buffer_utf16 ;
utf16 (15 downto 6) <= buffer_utf16 (9 downto 0) ;
utf16 (5 downto 0) <= utf8 (5 downto 0);
end if;
--count <= count - "01" ;
count(1) <= not (count(1) xor count(0)) ;
count(0) <= not count(0) ;
else
err <= '1' ;
done <= '1' ;
end if;
-- FIXME: done?
elsif utf8(7 downto 5) = "110" then
-- two bytes sequence
-- 110xxxxx 10xxxxxx
count <= "01" ;
done <= '0' ;
buffer_utf16 (15 downto 5) <= "00000000000" ;
buffer_utf16 (4 downto 0) <= utf8 (4 downto 0);
err <= '0' ;
elsif utf8(7 downto 4) = "1110" then
-- three bytes sequence
-- 1110xxxx 10xxxxxx 10xxxxxx
count <= "10" ;
done <= '0' ;
buffer_utf16 (15 downto 4) <= "000000000000" ;
buffer_utf16 (3 downto 0) <= utf8 (3 downto 0);
err <= '0' ;
elsif utf8(7 downto 3) = "11110" then
-- four bytes sequence
-- 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
count <= "11" ;
done <= '0' ;
buffer_utf16 (15 downto 3) <= "0000000000000" ;
buffer_utf16 (2 downto 0) <= utf8 (2 downto 0);
err <= '0' ;
else
-- unknow, assumes about iso-8859-1?
utf16 (15 downto 8) <= "00000000";
utf16 (7 downto 0) <= utf8 (7 downto 0);
err <= '1' ;
done <= '1' ;
count <= "00" ;
end if;
end if;

end process;

end architecture;
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,744
Messages
2,569,484
Members
44,903
Latest member
orderPeak8CBDGummies

Latest Threads

Top