Module | ::Utils |
In: |
lib/rbot/core/utils/httputil.rb
lib/rbot/core/utils/utils.rb lib/rbot/core/utils/parse_time.rb |
Miscellaneous useful functions
UNESCAPE_TABLE | = | { 'laquo' => '«', 'raquo' => '»', 'quot' => '"', 'apos' => '\'', 'micro' => 'µ', 'copy' => '©', 'trade' => '™', 'reg' => '®', 'amp' => '&', 'lt' => '<', 'gt' => '>', 'hellip' => '…', 'nbsp' => ' ', 'Agrave' => 'À', 'Aacute' => 'Á', 'Acirc' => 'Â', 'Atilde' => 'Ã', 'Auml' => 'Ä', 'Aring' => 'Å', 'AElig' => 'Æ', 'OElig' => 'Œ', 'Ccedil' => 'Ç', 'Egrave' => 'È', 'Eacute' => 'É', 'Ecirc' => 'Ê', 'Euml' => 'Ë', 'Igrave' => 'Ì', 'Iacute' => 'Í', 'Icirc' => 'Î', 'Iuml' => 'Ï', 'ETH' => 'Ð', 'Ntilde' => 'Ñ', 'Ograve' => 'Ò', 'Oacute' => 'Ó', 'Ocirc' => 'Ô', 'Otilde' => 'Õ', 'Ouml' => 'Ö', 'Oslash' => 'Ø', 'Ugrave' => 'Ù', 'Uacute' => 'Ú', 'Ucirc' => 'Û', 'Uuml' => 'Ü', 'Yacute' => 'Ý', 'THORN' => 'Þ', 'szlig' => 'ß', 'agrave' => 'à', 'aacute' => 'á', 'acirc' => 'â', 'atilde' => 'ã', 'auml' => 'ä', 'aring' => 'å', 'aelig' => 'æ', 'oelig' => 'œ', 'ccedil' => 'ç', 'egrave' => 'è', 'eacute' => 'é', 'ecirc' => 'ê', 'euml' => 'ë', 'igrave' => 'ì', 'iacute' => 'í', 'icirc' => 'î', 'iuml' => 'ï', 'eth' => 'ð', 'ntilde' => 'ñ', 'ograve' => 'ò', 'oacute' => 'ó', 'ocirc' => 'ô', 'otilde' => 'õ', 'ouml' => 'ö', 'oslash' => 'ø', 'ugrave' => 'ù', 'uacute' => 'ú', 'ucirc' => 'û', 'uuml' => 'ü', 'yacute' => 'ý', 'thorn' => 'þ', 'yuml' => 'ÿ' | ||
AFTER_PAR_PATH | = | /^(?:div|span)$/ | ||
AFTER_PAR_EX | = | /^(?:td|tr|tbody|table)$/ | ||
AFTER_PAR_CLASS | = | /body|message|text/i | ||
TITLE_REGEX | = | /<\s*?title\s*?>(.+?)<\s*?\/title\s*?>/im | Title | |
HX_REGEX | = | /<h(\d)(?:\s+[^>]*)?>(.*?)<\/h\1>/im | H1, H2, etc | |
PAR_REGEX | = | /<p(?:\s+[^>]*)?>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im | A paragraph | |
AFTER_PAR1_REGEX | = | /<\w+\s+[^>]*(?:body|message|text)[^>]*>.*?<\/?(?:p|div|html|body|table|td|tr)(?:\s+[^>]*)?>/im | Some blogging and forum platforms use spans or divs with a ‘body’ or ‘message’ or ‘text’ in their class to mark actual text | |
AFTER_PAR2_REGEX | = | /<br(?:\s+[^>]*)?\/?>.*?<\/?(?:br|p|div|html|body|table|td|tr)(?:\s+[^>]*)?\/?>/im | At worst, we can try stuff which is comprised between two <br> | |
SEC_PER_MIN | = | 60 | Seconds per minute | |
SEC_PER_HR | = | SEC_PER_MIN * 60 | Seconds per hour | |
SEC_PER_DAY | = | SEC_PER_HR * 24 | Seconds per day | |
SEC_PER_WK | = | SEC_PER_DAY * 7 | Seconds per week | |
SEC_PER_MNTH | = | SEC_PER_DAY * 30 | Seconds per (30-day) month | |
SEC_PER_YR | = | SEC_PER_DAY * 365 | Second per (non-leap) year |
Converts age in seconds to "nn units". Inspired by previous attempts but also gitweb‘s age_string() sub
# File lib/rbot/core/utils/utils.rb, line 245 245: def Utils.age_string(secs) 246: case 247: when secs < 0 248: Utils.age_string(-secs) 249: when secs > 2*SEC_PER_YR 250: _("%{m} years") % { :m => secs/SEC_PER_YR } 251: when secs > 2*SEC_PER_MNTH 252: _("%{m} months") % { :m => secs/SEC_PER_MNTH } 253: when secs > 2*SEC_PER_WK 254: _("%{m} weeks") % { :m => secs/SEC_PER_WK } 255: when secs > 2*SEC_PER_DAY 256: _("%{m} days") % { :m => secs/SEC_PER_DAY } 257: when secs > 2*SEC_PER_HR 258: _("%{m} hours") % { :m => secs/SEC_PER_HR } 259: when (20*SEC_PER_MIN..40*SEC_PER_MIN).include?(secs) 260: _("half an hour") 261: when (50*SEC_PER_MIN..70*SEC_PER_MIN).include?(secs) 262: # _("about one hour") 263: _("an hour") 264: when (80*SEC_PER_MIN..100*SEC_PER_MIN).include?(secs) 265: _("an hour and a half") 266: when secs > 2*SEC_PER_MIN 267: _("%{m} minutes") % { :m => secs/SEC_PER_MIN } 268: when secs > 1 269: _("%{m} seconds") % { :m => secs } 270: else 271: _("one second") 272: end 273: end
HTML info filters often need to check if the webpage location of a passed DataStream ds matches a given Regexp.
# File lib/rbot/core/utils/utils.rb, line 653 653: def Utils.check_location(ds, rx) 654: debug ds[:headers] 655: if h = ds[:headers] 656: loc = [h['x-rbot-location'],h['location']].flatten.grep(rx) 657: end 658: loc ||= [] 659: debug loc 660: return loc.empty? ? nil : loc 661: end
Returns a comma separated list except for the last element which is joined in with specified conjunction
# File lib/rbot/core/utils/utils.rb, line 728 728: def Utils.comma_list(words, options={}) 729: defaults = { :join_with => ", ", :join_last_with => _(" and ") } 730: opts = defaults.merge(options) 731: 732: if words.size < 2 733: words.last 734: else 735: [words[0..-2].join(opts[:join_with]), words.last].join(opts[:join_last_with]) 736: end 737: end
Decode HTML entities in the String str, using HTMLEntities if the package was found, or UNESCAPE_TABLE otherwise.
# File lib/rbot/core/utils/utils.rb, line 335 335: def Utils.decode_html_entities(str) 336: if defined? ::HTMLEntities 337: return HTMLEntities.decode_entities(str) 338: else 339: str.gsub(/(&(.+?);)/) { 340: symbol = $2 341: # remove the 0-paddng from unicode integers 342: if symbol =~ /^#(\d+)$/ 343: symbol = $1.to_i.to_s 344: end 345: 346: # output the symbol's irc-translated character, or a * if it's unknown 347: UNESCAPE_TABLE[symbol] || (symbol.match(/^\d+$/) ? [symbol.to_i].pack("U") : '*') 348: } 349: end 350: end
Get the first pars of the first count urls. The pages are downloaded using the bot httputil service. Returns an array of the first paragraphs fetched. If (optional) opts :message is specified, those paragraphs are echoed as replies to the IRC message passed as opts :message
# File lib/rbot/core/utils/utils.rb, line 699 699: def Utils.get_first_pars(urls, count, opts={}) 700: idx = 0 701: msg = opts[:message] 702: retval = Array.new 703: while count > 0 and urls.length > 0 704: url = urls.shift 705: idx += 1 706: 707: begin 708: info = Utils.get_html_info(URI.parse(url), opts) 709: 710: par = info[:content] 711: retval.push(par) 712: 713: if par 714: msg.reply "[#{idx}] #{par}", :overlong => :truncate if msg 715: count -=1 716: end 717: rescue 718: debug "Unable to retrieve #{url}: #{$!}" 719: next 720: end 721: end 722: return retval 723: end
This method extracts title, content (first par) and extra information from the given document doc.
doc can be an URI, a Net::HTTPResponse or a String.
If doc is a String, only title and content information are retrieved (if possible), using standard methods.
If doc is an URI or a Net::HTTPResponse, additional information is retrieved, and special title/summary extraction routines are used if possible.
# File lib/rbot/core/utils/utils.rb, line 568 568: def Utils.get_html_info(doc, opts={}) 569: case doc 570: when String 571: Utils.get_string_html_info(doc, opts) 572: when Net::HTTPResponse 573: Utils.get_resp_html_info(doc, opts) 574: when URI 575: ret = DataStream.new 576: @@bot.httputil.get_response(doc) { |resp| 577: ret.replace Utils.get_resp_html_info(resp, opts) 578: } 579: return ret 580: else 581: raise 582: end 583: end
This method extracts title, content (first par) and extra information from the given Net::HTTPResponse resp.
Currently, the only accepted options (in opts) are
uri_fragment: | the URI fragment of the original request |
full_body: | get the whole body instead of @@bot.config bytes only |
Returns a DataStream with the following keys:
text: | the (partial) body |
title: | the title of the document (if any) |
content: | the first paragraph of the document (if any) |
headers: | the headers of the Net::HTTPResponse. The value is a Hash whose keys are lowercase forms of the HTTP header fields, and whose values are Arrays. |
# File lib/rbot/core/utils/utils.rb, line 605 605: def Utils.get_resp_html_info(resp, opts={}) 606: case resp 607: when Net::HTTPSuccess 608: loc = URI.parse(resp['x-rbot-location'] || resp['location']) rescue nil 609: if loc and loc.fragment and not loc.fragment.empty? 610: opts[:uri_fragment] ||= loc.fragment 611: end 612: ret = DataStream.new(opts.dup) 613: ret[:headers] = resp.to_hash 614: ret[:text] = partial = opts[:full_body] ? resp.body : resp.partial_body(@@bot.config['http.info_bytes']) 615: 616: filtered = Utils.try_htmlinfo_filters(ret) 617: 618: if filtered 619: return filtered 620: elsif resp['content-type'] =~ /^text\/|(?:x|ht)ml/ 621: ret.merge!(Utils.get_string_html_info(partial, opts)) 622: end 623: return ret 624: else 625: raise UrlLinkError, "getting link (#{resp.code} - #{resp.message})" 626: end 627: end
This method extracts title and content (first par) from the given HTML or XML document text, using standard methods (String#ircify_html_title, Utils.ircify_first_html_par)
Currently, the only accepted option (in opts) is
uri_fragment: | the URI fragment of the original request |
# File lib/rbot/core/utils/utils.rb, line 671 671: def Utils.get_string_html_info(text, opts={}) 672: debug "getting string html info" 673: txt = text.dup 674: title = txt.ircify_html_title 675: debug opts 676: if frag = opts[:uri_fragment] and not frag.empty? 677: fragreg = /<a\s+(?:[^>]+\s+)?(?:name|id)=["']?#{frag}["']?[^>]*>/im 678: debug fragreg 679: debug txt 680: if txt.match(fragreg) 681: # grab the post-match 682: txt = $' 683: end 684: debug txt 685: end 686: c_opts = opts.dup 687: c_opts[:strip] ||= title 688: content = Utils.ircify_first_html_par(txt, c_opts) 689: content = nil if content.empty? 690: return {:title => title, :content => content} 691: end
Try to grab and IRCify the first HTML par (<p> tag) in the given string. If possible, grab the one after the first heading
It is possible to pass some options to determine how the stripping occurs. Currently supported options are
strip: | Regex or String to strip at the beginning of the obtained text |
min_spaces: | minimum number of spaces a paragraph should have |
# File lib/rbot/core/utils/utils.rb, line 361 361: def Utils.ircify_first_html_par(xml_org, opts={}) 362: if defined? ::Hpricot 363: Utils.ircify_first_html_par_wh(xml_org, opts) 364: else 365: Utils.ircify_first_html_par_woh(xml_org, opts) 366: end 367: end
HTML first par grabber using hpricot
# File lib/rbot/core/utils/utils.rb, line 370 370: def Utils.ircify_first_html_par_wh(xml_org, opts={}) 371: doc = Hpricot(xml_org) 372: 373: # Strip styles and scripts 374: (doc/"style|script").remove 375: 376: debug doc 377: 378: strip = opts[:strip] 379: strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String) 380: 381: min_spaces = opts[:min_spaces] || 8 382: min_spaces = 0 if min_spaces < 0 383: 384: txt = String.new 385: 386: pre_h = pars = by_span = nil 387: 388: while true 389: debug "Minimum number of spaces: #{min_spaces}" 390: 391: # Initial attempt: <p> that follows <h\d> 392: if pre_h.nil? 393: pre_h = Hpricot::Elements[] 394: found_h = false 395: doc.search("*") { |e| 396: next if e.bogusetag? 397: case e.pathname 398: when /^h\d/ 399: found_h = true 400: when 'p' 401: pre_h << e if found_h 402: end 403: } 404: debug "Hx: found: #{pre_h.pretty_inspect}" 405: end 406: 407: pre_h.each { |p| 408: debug p 409: txt = p.to_html.ircify_html 410: txt.sub!(strip, '') if strip 411: debug "(Hx attempt) #{txt.inspect} has #{txt.count(" ")} spaces" 412: break unless txt.empty? or txt.count(" ") < min_spaces 413: } 414: 415: return txt unless txt.empty? or txt.count(" ") < min_spaces 416: 417: # Second natural attempt: just get any <p> 418: pars = doc/"p" if pars.nil? 419: debug "par: found: #{pars.pretty_inspect}" 420: pars.each { |p| 421: debug p 422: txt = p.to_html.ircify_html 423: txt.sub!(strip, '') if strip 424: debug "(par attempt) #{txt.inspect} has #{txt.count(" ")} spaces" 425: break unless txt.empty? or txt.count(" ") < min_spaces 426: } 427: 428: return txt unless txt.empty? or txt.count(" ") < min_spaces 429: 430: # Nothing yet ... let's get drastic: we look for non-par elements too, 431: # but only for those that match something that we know is likely to 432: # contain text 433: 434: # Some blogging and forum platforms use spans or divs with a 'body' or 435: # 'message' or 'text' in their class to mark actual text. Since we want 436: # the class match to be partial and case insensitive, we collect 437: # the common elements that may have this class and then filter out those 438: # we don't need. If no divs or spans are found, we'll accept additional 439: # elements too (td, tr, tbody, table). 440: if by_span.nil? 441: by_span = Hpricot::Elements[] 442: extra = Hpricot::Elements[] 443: doc.search("*") { |el| 444: next if el.bogusetag? 445: case el.pathname 446: when AFTER_PAR_PATH 447: by_span.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS 448: when AFTER_PAR_EX 449: extra.push el if el[:class] =~ AFTER_PAR_CLASS or el[:id] =~ AFTER_PAR_CLASS 450: end 451: } 452: if by_span.empty? and not extra.empty? 453: by_span.concat extra 454: end 455: debug "other \#1: found: #{by_span.pretty_inspect}" 456: end 457: 458: by_span.each { |p| 459: debug p 460: txt = p.to_html.ircify_html 461: txt.sub!(strip, '') if strip 462: debug "(other attempt \#1) #{txt.inspect} has #{txt.count(" ")} spaces" 463: break unless txt.empty? or txt.count(" ") < min_spaces 464: } 465: 466: return txt unless txt.empty? or txt.count(" ") < min_spaces 467: 468: # At worst, we can try stuff which is comprised between two <br> 469: # TODO 470: 471: debug "Last candidate #{txt.inspect} has #{txt.count(" ")} spaces" 472: return txt unless txt.count(" ") < min_spaces 473: break if min_spaces == 0 474: min_spaces /= 2 475: end 476: end
HTML first par grabber without hpricot
# File lib/rbot/core/utils/utils.rb, line 479 479: def Utils.ircify_first_html_par_woh(xml_org, opts={}) 480: xml = xml_org.gsub(/<!--.*?-->/m, '').gsub(/<script(?:\s+[^>]*)?>.*?<\/script>/im, "").gsub(/<style(?:\s+[^>]*)?>.*?<\/style>/im, "") 481: 482: strip = opts[:strip] 483: strip = Regexp.new(/^#{Regexp.escape(strip)}/) if strip.kind_of?(String) 484: 485: min_spaces = opts[:min_spaces] || 8 486: min_spaces = 0 if min_spaces < 0 487: 488: txt = String.new 489: 490: while true 491: debug "Minimum number of spaces: #{min_spaces}" 492: header_found = xml.match(HX_REGEX) 493: if header_found 494: header_found = $' 495: while txt.empty? or txt.count(" ") < min_spaces 496: candidate = header_found[PAR_REGEX] 497: break unless candidate 498: txt = candidate.ircify_html 499: header_found = $' 500: txt.sub!(strip, '') if strip 501: debug "(Hx attempt) #{txt.inspect} has #{txt.count(" ")} spaces" 502: end 503: end 504: 505: return txt unless txt.empty? or txt.count(" ") < min_spaces 506: 507: # If we haven't found a first par yet, try to get it from the whole 508: # document 509: header_found = xml 510: while txt.empty? or txt.count(" ") < min_spaces 511: candidate = header_found[PAR_REGEX] 512: break unless candidate 513: txt = candidate.ircify_html 514: header_found = $' 515: txt.sub!(strip, '') if strip 516: debug "(par attempt) #{txt.inspect} has #{txt.count(" ")} spaces" 517: end 518: 519: return txt unless txt.empty? or txt.count(" ") < min_spaces 520: 521: # Nothing yet ... let's get drastic: we look for non-par elements too, 522: # but only for those that match something that we know is likely to 523: # contain text 524: 525: # Attempt #1 526: header_found = xml 527: while txt.empty? or txt.count(" ") < min_spaces 528: candidate = header_found[AFTER_PAR1_REGEX] 529: break unless candidate 530: txt = candidate.ircify_html 531: header_found = $' 532: txt.sub!(strip, '') if strip 533: debug "(other attempt \#1) #{txt.inspect} has #{txt.count(" ")} spaces" 534: end 535: 536: return txt unless txt.empty? or txt.count(" ") < min_spaces 537: 538: # Attempt #2 539: header_found = xml 540: while txt.empty? or txt.count(" ") < min_spaces 541: candidate = header_found[AFTER_PAR2_REGEX] 542: break unless candidate 543: txt = candidate.ircify_html 544: header_found = $' 545: txt.sub!(strip, '') if strip 546: debug "(other attempt \#2) #{txt.inspect} has #{txt.count(" ")} spaces" 547: end 548: 549: debug "Last candidate #{txt.inspect} has #{txt.count(" ")} spaces" 550: return txt unless txt.count(" ") < min_spaces 551: break if min_spaces == 0 552: min_spaces /= 2 553: end 554: end
# File lib/rbot/core/utils/parse_time.rb, line 160 160: def Utils.parse_time_offset(str) 161: case str 162: when /^(\d+):(\d+)(?:\:(\d+))?$/ # TODO refactor 163: hour = $1.to_i 164: min = $2.to_i 165: sec = $3.to_i 166: now = Time.now 167: later = Time.mktime(now.year, now.month, now.day, hour, min, sec) 168: 169: # if the given hour is earlier than current hour, given timestr 170: # must have been meant to be in the future 171: if hour < now.hour || hour <= now.hour && min < now.min 172: later += 60*60*24 173: end 174: 175: return later - now 176: when /^(\d+):(\d+)(am|pm)$/ # TODO refactor 177: hour = $1.to_i 178: min = $2.to_i 179: ampm = $3 180: if ampm == "pm" 181: hour += 12 182: end 183: now = Time.now 184: later = Time.mktime(now.year, now.month, now.day, hour, min, now.sec) 185: return later - now 186: else 187: ParseTime.parse_period(str) 188: end 189: end
Execute an external program, returning a String obtained by redirecting the program‘s standards errors and output
# File lib/rbot/core/utils/utils.rb, line 278 278: def Utils.safe_exec(command, *args) 279: IO.popen("-") { |p| 280: if p 281: return p.readlines.join("\n") 282: else 283: begin 284: $stderr.reopen($stdout) 285: exec(command, *args) 286: rescue Exception => e 287: puts "exception #{e.pretty_inspect} trying to run #{command}" 288: Kernel::exit! 1 289: end 290: puts "exec of #{command} failed" 291: Kernel::exit! 1 292: end 293: } 294: end
Safely (atomically) save to file, by passing a tempfile to the block and then moving the tempfile to its final location when done.
# File lib/rbot/core/utils/utils.rb, line 321 321: def Utils.safe_save(file) 322: raise 'No safe save directory defined!' if @@safe_save_dir.nil? 323: basename = File.basename(file) 324: temp = Tempfile.new(basename,@@safe_save_dir) 325: temp.binmode 326: yield temp if block_given? 327: temp.close 328: File.rename(temp.path, file) 329: end
Turn a number of seconds into a hours:minutes:seconds e.g. 3:18:10 or 5‘12" or 7s
# File lib/rbot/core/utils/utils.rb, line 208 208: def Utils.secs_to_short(seconds) 209: secs = seconds.to_i # make sure it's an integer 210: mins, secs = secs.divmod 60 211: hours, mins = mins.divmod 60 212: if hours > 0 213: return ("%s:%s:%s" % [hours, mins, secs]) 214: elsif mins > 0 215: return ("%s'%s\"" % [mins, secs]) 216: else 217: return ("%ss" % [secs]) 218: end 219: end
Turn a number of seconds into a human readable string, e.g 2 days, 3 hours, 18 minutes and 10 seconds
# File lib/rbot/core/utils/utils.rb, line 181 181: def Utils.secs_to_string(secs) 182: ret = [] 183: years, secs = secs.divmod SEC_PER_YR 184: secs_to_string_case(ret, years, _("year"), _("years")) if years > 0 185: months, secs = secs.divmod SEC_PER_MNTH 186: secs_to_string_case(ret, months, _("month"), _("months")) if months > 0 187: days, secs = secs.divmod SEC_PER_DAY 188: secs_to_string_case(ret, days, _("day"), _("days")) if days > 0 189: hours, secs = secs.divmod SEC_PER_HR 190: secs_to_string_case(ret, hours, _("hour"), _("hours")) if hours > 0 191: mins, secs = secs.divmod SEC_PER_MIN 192: secs_to_string_case(ret, mins, _("minute"), _("minutes")) if mins > 0 193: secs = secs.to_i 194: secs_to_string_case(ret, secs, _("second"), _("seconds")) if secs > 0 or ret.empty? 195: case ret.length 196: when 0 197: raise "Empty ret array!" 198: when 1 199: return ret.to_s 200: else 201: return [ret[0, ret.length-1].join(", ") , ret[-1]].join(_(" and ")) 202: end 203: end
Auxiliary method needed by Utils.secs_to_string
# File lib/rbot/core/utils/utils.rb, line 170 170: def Utils.secs_to_string_case(array, var, string, plural) 171: case var 172: when 1 173: array << "1 #{string}" 174: else 175: array << "#{var} #{plural}" 176: end 177: end
Returns human readable time. Like: 5 days ago
about one hour ago
options :start_date, sets the time to measure against, defaults to now :date_format, used with <tt>to_formatted_s<tt>, default to :default
# File lib/rbot/core/utils/utils.rb, line 227 227: def Utils.timeago(time, options = {}) 228: start_date = options.delete(:start_date) || Time.new 229: date_format = options.delete(:date_format) || "%x" 230: delta = (start_date - time).round 231: if delta.abs < 2 232: _("right now") 233: else 234: distance = Utils.age_string(delta) 235: if delta < 0 236: _("%{d} from now") % {:d => distance} 237: else 238: _("%{d} ago") % {:d => distance} 239: end 240: end 241: end
Try executing an external program, returning true if the run was successful and false otherwise
# File lib/rbot/core/utils/utils.rb, line 298 298: def Utils.try_exec(command, *args) 299: IO.popen("-") { |p| 300: if p.nil? 301: begin 302: $stderr.reopen($stdout) 303: exec(command, *args) 304: rescue Exception => e 305: Kernel::exit! 1 306: end 307: Kernel::exit! 1 308: else 309: debug p.readlines 310: end 311: } 312: debug $? 313: return $?.success? 314: end
This method runs an appropriately-crafted DataStream ds through the filters in the :htmlinfo filter group, in order. If one of the filters returns non-nil, its results are merged in ds and returned. Otherwise nil is returned.
The input DataStream should have the downloaded HTML as primary key (:text) and possibly a :headers key holding the resonse headers.
# File lib/rbot/core/utils/utils.rb, line 637 637: def Utils.try_htmlinfo_filters(ds) 638: filters = @@bot.filter_names(:htmlinfo) 639: return nil if filters.empty? 640: cur = nil 641: # TODO filter priority 642: filters.each { |n| 643: debug "testing htmlinfo filter #{n}" 644: cur = @@bot.filter(@@bot.global_filter_name(n, :htmlinfo), ds) 645: debug "returned #{cur.pretty_inspect}" 646: break if cur 647: } 648: return ds.merge(cur) if cur 649: end