J
Jesse Merriman
I based my solution on the algorithm described here:
http://www.siggraph.org/education/materials/HyperGraph/video/mpeg/mpegfaq/huffman_tutorial.html
I use the rubytree gem for the tree, and subclassed its TreeNode into the class
FreqNode, which holds a frequency, possibly a letter (just the leaves), and a
side (either :left or :right). I made FreqNodes compare primarily based on
their frequencies so I could build a SortedSet of them and repeatedly take the
two lowest to combine. The actual tree is built in the build_tree method.
It just takes an input string, builds the tree, and outputs the compressed
data (which is slightly different from the example given in the original quiz
message because we treat ties differently. I'm pretty sure this doesn't
matter though). No extra credit, though it shouldn't be too hard to add on some
tree serialization.
irb> tree = build_tree('ABC')
irb> tree.printTree
Node ID: 16 Content: 3 Parent: ROOT Children: 2 Total Nodes: 5 Side:
Node ID: 14 Content: 1C Parent: 16 Children: 0 Total Nodes: 1 Side: left
Node ID: 15 Content: 2 Parent: 16 Children: 2 Total Nodes: 3 Side: right
Node ID: 12 Content: 1A Parent: 15 Children: 0 Total Nodes: 1 Side: left
Node ID: 13 Content: 1B Parent: 15 Children: 0 Total Nodes: 1 Side: right
irb> encode 'ABC', tree
=> "10110"
$ ./huffman_encode.rb ABRRKBAARAA
Encoded:
01011111 10010100 1100
Encoded bytes: 3
Original:
ABRRKBAARAA
Original bytes: 11
Compressed: 73%
#!/usr/bin/env ruby
require 'rubygems'
require 'tree'
require 'set'
class Tree::TreeNode
# Yield once for each leaf node.
def each_leaf
self.each { |node| yield(node) if not node.hasChildren? }
end
# Both assume exactly 2 children
def left_child; children.first; end
def right_child; children.last; end
end
# FreqNodes are TreeNodes that:
# 1) Have generated, numeric names to ensure they're all unique.
# 2) Compare based primarily on their contents.
# 3) Have as contents an array containing a frequency, and possibly a letter
# (for the leaves).
# 4) Have a @side attribute that should be set to either :left or :right for
# non-root nodes.
# 5) Have a few other nicities, like letter().
class FreqNode < Tree::TreeNode
attr_accessor :side
def initialize(content = nil)
super(FreqNode.next_name, content)
end
def <=>(node)
content_diff = @content <=> node.content
return content_diff if not content_diff.zero?
super(node)
end
# Assumes this is a leaf.
def letter; @content[1]; end
def to_s; super() + " Side: #{@side}"; end
# Generate unique names because we can't add two nodes with the same name as
# children of the same parent node.
@next_name_num = -1
def FreqNode.next_name
(@next_name_num += 1).to_s
end
end
class Set
def shift
elem = self.min
self.delete elem
elem
end
end
# Build a SortedSet containing pairs of [freq, letter] for the given string.
def build_freqs str
# Build hash of byte => count
counts = Hash.new 0
str.each_byte { |byte| counts[byte.chr] += 1 }
# Build SortedSet of [freq, byte] pairs (lower freqs first).
freqs = SortedSet[]
counts.each { |bc| freqs << bc.reverse }
freqs
end
# Build the tree for the given input string, and return the root node.
def build_tree str
nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }
while nodes.size > 1
child1, child2 = nodes.shift, nodes.shift
parent = FreqNode.new([child1.content.first + child2.content.first])
child1.side, child2.side = :left, :right
parent << child1
parent << child2
nodes << parent
end
nodes.min
end
# Encode the given letter using the given tree of FreqNodes.
def encode_letter letter, tree
enc = ''
# Find leaf with the right byte value
node = nil
tree.each_leaf do |leaf|
(node = leaf; break) if leaf.letter == letter
end
while not node.isRoot?
node.side == :left ? enc << '0' : enc << '1'
node = node.parent
end
enc.reverse
end
# Build a tree for the given string, and encode it using that tree.
def encode str, tree = build_tree(str)
#tree = build_tree(str)
encoded = ''
str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
encoded
end
# Decode the given string (which should consist of '0's and '1's) using the
# given tree.
def decode str, tree
dec = ''
i = 0
node = tree
str.each_byte do |byte|
node = (byte.chr == '0' ? node.left_child : node.right_child)
if not node.hasChildren?
dec << node.byte_value.chr
node = tree
end
end
dec
end
class String
# Split up the string by inserting sep between len-length chunks.
def split_up! len, sep
(((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
insert i, sep
end
self
end
end
# Output a binary string, splitting it up for easier reading.
def puts_binary str
str = str.dup
str.split_up! 40, "\n"
str.each { |line| puts line.split_up!(8, ' ') }
end
if __FILE__ == $0
input = ARGV.join ' '
input_bytes = input.length
enc = encode(input)
enc_bytes = (enc.length / 8.0).ceil
compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round
puts 'Encoded:'
puts_binary(enc)
puts "Encoded bytes: #{enc_bytes}\n\n"
puts "Original:\n#{input}"
puts "Original bytes: #{input_bytes}\n\n"
puts "Compressed: #{compressed}%"
end
http://www.siggraph.org/education/materials/HyperGraph/video/mpeg/mpegfaq/huffman_tutorial.html
I use the rubytree gem for the tree, and subclassed its TreeNode into the class
FreqNode, which holds a frequency, possibly a letter (just the leaves), and a
side (either :left or :right). I made FreqNodes compare primarily based on
their frequencies so I could build a SortedSet of them and repeatedly take the
two lowest to combine. The actual tree is built in the build_tree method.
It just takes an input string, builds the tree, and outputs the compressed
data (which is slightly different from the example given in the original quiz
message because we treat ties differently. I'm pretty sure this doesn't
matter though). No extra credit, though it shouldn't be too hard to add on some
tree serialization.
irb> tree = build_tree('ABC')
irb> tree.printTree
Node ID: 16 Content: 3 Parent: ROOT Children: 2 Total Nodes: 5 Side:
Node ID: 14 Content: 1C Parent: 16 Children: 0 Total Nodes: 1 Side: left
Node ID: 15 Content: 2 Parent: 16 Children: 2 Total Nodes: 3 Side: right
Node ID: 12 Content: 1A Parent: 15 Children: 0 Total Nodes: 1 Side: left
Node ID: 13 Content: 1B Parent: 15 Children: 0 Total Nodes: 1 Side: right
irb> encode 'ABC', tree
=> "10110"
$ ./huffman_encode.rb ABRRKBAARAA
Encoded:
01011111 10010100 1100
Encoded bytes: 3
Original:
ABRRKBAARAA
Original bytes: 11
Compressed: 73%
#!/usr/bin/env ruby
require 'rubygems'
require 'tree'
require 'set'
class Tree::TreeNode
# Yield once for each leaf node.
def each_leaf
self.each { |node| yield(node) if not node.hasChildren? }
end
# Both assume exactly 2 children
def left_child; children.first; end
def right_child; children.last; end
end
# FreqNodes are TreeNodes that:
# 1) Have generated, numeric names to ensure they're all unique.
# 2) Compare based primarily on their contents.
# 3) Have as contents an array containing a frequency, and possibly a letter
# (for the leaves).
# 4) Have a @side attribute that should be set to either :left or :right for
# non-root nodes.
# 5) Have a few other nicities, like letter().
class FreqNode < Tree::TreeNode
attr_accessor :side
def initialize(content = nil)
super(FreqNode.next_name, content)
end
def <=>(node)
content_diff = @content <=> node.content
return content_diff if not content_diff.zero?
super(node)
end
# Assumes this is a leaf.
def letter; @content[1]; end
def to_s; super() + " Side: #{@side}"; end
# Generate unique names because we can't add two nodes with the same name as
# children of the same parent node.
@next_name_num = -1
def FreqNode.next_name
(@next_name_num += 1).to_s
end
end
class Set
def shift
elem = self.min
self.delete elem
elem
end
end
# Build a SortedSet containing pairs of [freq, letter] for the given string.
def build_freqs str
# Build hash of byte => count
counts = Hash.new 0
str.each_byte { |byte| counts[byte.chr] += 1 }
# Build SortedSet of [freq, byte] pairs (lower freqs first).
freqs = SortedSet[]
counts.each { |bc| freqs << bc.reverse }
freqs
end
# Build the tree for the given input string, and return the root node.
def build_tree str
nodes = build_freqs(str).map! { |pair| FreqNode.new(pair) }
while nodes.size > 1
child1, child2 = nodes.shift, nodes.shift
parent = FreqNode.new([child1.content.first + child2.content.first])
child1.side, child2.side = :left, :right
parent << child1
parent << child2
nodes << parent
end
nodes.min
end
# Encode the given letter using the given tree of FreqNodes.
def encode_letter letter, tree
enc = ''
# Find leaf with the right byte value
node = nil
tree.each_leaf do |leaf|
(node = leaf; break) if leaf.letter == letter
end
while not node.isRoot?
node.side == :left ? enc << '0' : enc << '1'
node = node.parent
end
enc.reverse
end
# Build a tree for the given string, and encode it using that tree.
def encode str, tree = build_tree(str)
#tree = build_tree(str)
encoded = ''
str.each_byte { |byte| encoded << encode_letter(byte.chr, tree) }
encoded
end
# Decode the given string (which should consist of '0's and '1's) using the
# given tree.
def decode str, tree
dec = ''
i = 0
node = tree
str.each_byte do |byte|
node = (byte.chr == '0' ? node.left_child : node.right_child)
if not node.hasChildren?
dec << node.byte_value.chr
node = tree
end
end
dec
end
class String
# Split up the string by inserting sep between len-length chunks.
def split_up! len, sep
(((length / len.to_f).ceil - 1) * len).step(len, -len) do |i|
insert i, sep
end
self
end
end
# Output a binary string, splitting it up for easier reading.
def puts_binary str
str = str.dup
str.split_up! 40, "\n"
str.each { |line| puts line.split_up!(8, ' ') }
end
if __FILE__ == $0
input = ARGV.join ' '
input_bytes = input.length
enc = encode(input)
enc_bytes = (enc.length / 8.0).ceil
compressed = ((1 - enc_bytes.to_f/input_bytes) * 100).round
puts 'Encoded:'
puts_binary(enc)
puts "Encoded bytes: #{enc_bytes}\n\n"
puts "Original:\n#{input}"
puts "Original bytes: #{input_bytes}\n\n"
puts "Compressed: #{compressed}%"
end