Here is a script to scrape quotes from bash.org, and create a bottalker data file (source’d tcl script) for our eggdrop. If you want to chat with it, come to #oaktoncc on freenode.
require 'rubygems'
require 'open-uri'
require 'hpricot'
require 'cgi'
count = 10
last_page = 407
lines = []
count.times do |i|
page = last_page - i
uri = "http://bash.org/?browse&p=#{page}"
puts "#{uri}"
doc = Hpricot(open(uri))
doc.search("//p[@class='qt']").each do |element|
quote = element.inner_html
quote.split(/n/).each do |line|
line = CGI::unescapeHTML(line)
line.gsub!(//, "")
line.gsub!(" ", " ")
# Different formats (otherwise ignore):
# blah
# (@name:#channel) blah
# name: blah
# [name] blah
if line =~ /^s*(.*)/ or line =~ /^s*(.*?)(.*)/ or line =~ /^s*.+?:(.*)/ or line =~ /^s*[.+?](.*)/
lines << $1.strip
end
end
end
end
File.open("BotTalker_data_bash_org.tcl", "w") do |f|
f.puts("# Bot Talker Data file.")
f.puts("set TalkzStrArray {")
lines.each do |line|
next if line =~ /[{}\]/ # Ignore lines with {} chars since it screws up the tcl source
line.gsub!(/[/, "(")
line.gsub!(/]/, ")")
f.puts(" {#{line}}")
end
f.puts("}")
end


