# Convert Tango documentation to XML
# Public domain
require 'hpricot'
require 'open-uri'
def parseLevel(source, doc)
headers = doc/"> dl > dt"
descs = doc/"> dl > dd"
headers.zip(descs).each do |h, d|
text = h.innerText.gsub(/[\r\n]/, ' ').gsub(/explorer\.outline\.addDecl\([^)]*\);/, '').gsub('"', '"').squeeze(" ").strip
type = "method"
case text
when /^class/ then type = "class"
when /^struct/ then type = "struct"
end
if type == "method"
puts %Q{}
parseLevel(source, d)
else
puts %Q{}
parseLevel(source, d)
puts ""
end
end
end
index = "http://dsource.org/projects/tango/docs/current/"
indexDoc = Hpricot(open(index))
puts ""
(indexDoc/"#searchable ul li a").each do |a|
source = index + a.attributes["href"]
$stderr.puts source
# Skip: there are errors in the html of those files
if source =~ /tango\.core\.Variant/ or source =~ /tango\.util\.Convert/
$stderr.puts "Skipping #{source}..."
next
end
begin
fullDoc = Hpricot(open(source))
docbody = fullDoc/"#docbody"
puts %Q{}
parseLevel(source, docbody)
puts ""
rescue OpenURI::HTTPError => e
$stderr.puts "Error with #{source}: #{e.message}"
end
end
puts ""