--------------070106000601070906010503
Content-Type: text/plain; charset=ISO-8859-1; format=flowed
Content-Transfer-Encoding: 7bit
Dick said:
long shot but what the hell - don't suppose any of you good
good people are sitting on a parser for Mozilla/Firefox bookmarks.html
files, by any chance?
Funny you should ask.
I've had this for awhile, and I can't even
remember why I wrote it. It's pretty hacked together, and it's not a
true "parser" (I just search for certain patterns in the bookmark file)
and it is hardcoded (currently) for my own (obsolete) Phoenix bookmarks
file, but it should be fairly straightforward to modify for your own
purposes.
Hope this is at least close to what you are looking for...
- Jamis
--
Jamis Buck
(e-mail address removed)
http://www.jamisbuck.org/jamis
"I use octal until I get to 8, and then I switch to decimal."
--------------070106000601070906010503
Content-Type: text/plain;
name="bookmarks.rb"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="bookmarks.rb"
#!/usr/bin/ruby
class Item
attr_accessor :last_modified
attr_accessor :id
attr_accessor :title
attr_accessor :remarks
def to_html_attr_list
s = ""
s << " LAST_MODIFIED=\"#{@last_modified}\"" if @last_modified
s << " ID=\"#{@id}\"" if @id
return s
end
end
class Folder < Item
attr_reader :items
def initialize
@items = Array.new
end
def dump( level = 0 )
puts "#{' ' * level * 2}#{title}" if @title
@items.each do |i|
i.dump( level+1 )
end
end
def sort!
@items.sort! do |a,b|
if a.type == b.type
a.title.downcase <=> b.title.downcase if a.type == b.type
elsif a.is_a? Folder
-1
elsif b.is_a? Folder
1
else
raise "wrong type in folder"
end
end
@items.each { |i| i.sort! if i.is_a? Folder }
end
def to_html( level, file )
indent = " " * level * 4
file.puts indent + "<DT><H3#{to_html_attr_list}>#{@title}</H3>" if @title
file.puts indent + "<DD>#{@remarks}" if @remarks
file.puts indent + "<HR>" if !@title # hack for top-level folder
file.puts indent + "<DL><p>"
@items.each do |i|
i.to_html( level+1, file )
end
file.puts indent + "</DL><p>"
end
end
class Bookmark < Item
attr_accessor :last_visit
attr_accessor :icon
attr_accessor :last_charset
attr_accessor :href
def dump( level )
print " " * level * 2
print "'" + @title + "' => "
puts @href
end
def to_html_attr_list
s = super
s << " LAST_VISIT=\"#{@last_visit}\"" if @last_visit
s << " ICON=\"#{@icon}\"" if @icon
s << " LAST_CHARSET=\"#{@last_charset}\"" if @last_charset
s << " HREF=\"#{@href}\"" if @href
return s
end
def to_html( level, file )
indent = " " * level * 4
file.puts indent + "<DT><A#{to_html_attr_list}>#{@title}</A>"
file.puts indent + "<DD>#{@remarks}" if @remarks
end
end
class BookmarkManager
def initialize
@top_folder = Folder.new
end
def build_attribute_hash( str )
list = str.scan( /[_A-Z]+="[^"]*"/ )
hash = Hash.new
list.each do |item|
item =~ /([_A-Z]+)="(.*)"/
hash[ $1 ] = $2
end
hash
end
def append( bookmarks_file )
folder_stack = [ @top_folder ]
File.open( bookmarks_file, "r" ) do |file|
# skip to the start of the bookmark data
while ( line = file.gets.strip ) != "<DL><p>"; end
last_item = nil
while folder_stack.length > 0
line = file.gets.strip
case line
when /<HR>/ then
# separator...
last_item = nil
when /<DT><H3 (.*)>(.*)<\/H3>/
last_item = folder = Folder.new
attr_list = $1
folder.title = $2
attrs = build_attribute_hash( attr_list )
folder.last_modified = attrs[ "LAST_MODIFIED" ]
folder.id = attrs[ "ID" ]
folder_stack.last.items.push folder
folder_stack.push folder
when /<DT><A (.*)>(.*)<\/A>/
last_item = bookmark = Bookmark.new
attr_list = $1
bookmark.title = $2
attrs = build_attribute_hash( attr_list )
bookmark.last_modified = attrs[ "LAST_MODIFIED" ]
bookmark.id = attrs[ "ID" ]
bookmark.last_visit = attrs[ "LAST_VISIT" ]
bookmark.icon = attrs[ "ICON" ]
bookmark.last_charset = attrs[ "LAST_CHARSET" ]
bookmark.href = attrs[ "HREF" ]
folder_stack.last.items.push bookmark
when /<\/DL><p>/
folder_stack.pop
last_item = nil
when /<DD>(.*)/
last_item.remarks = $1
when /<DL><p>/
# start of a list
end
end
end
@top_folder.sort!
end
def dump
puts "Bookmarks:"
@top_folder.dump
end
def to_html( file )
file.puts "<!DOCTYPE NETSCAPE-Bookmark-file-1>"
file.puts "<!-- This is an automatically generated file."
file.puts " It will be read and overwritten."
file.puts " DO NOT EDIT! -->"
file.puts "<META HTTP-EQUIV=\"Content-Type\" CONTENT=\"text/html; charset=UTF-8\">"
file.puts "<TITLE>Bookmarks</TITLE>"
file.puts "<H1>Bookmarks</H1>"
file.puts
@top_folder.to_html( 0, file )
end
end
mgr = BookmarkManager.new
mgr.append "/home/jgb3/.phoenix/default/d2isamzz.slt/bookmarks.html"
mgr.to_html( $stdout )
--------------070106000601070906010503--