ただ作ってみたくなったので実装してみました。発言されたURLのタイトルとContent-Typeを返すボットです。
http://github.com/punytan/mobijiro
名前はアレを真似てつけました。
レスポンスの文字コード
AnyEvent::HTTP::http_get で取得すると、 UTF-8 だったり、 EUC-JP であったりと、 body の文字コードが何か検討がつかない。なので、 HTTP::Message の decoded_content を使うことでそのあたりの処理を楽にしています。その役割を担うのが Tatsumaki::HTTPClient。
ソース
use strict; use warnings; use feature qw/say/; use Data::Dumper; use Encode; use Data::Validate::URI qw/is_uri/; use AnyEvent; use AnyEvent::HTTP; use AnyEvent::IRC; use AnyEvent::IRC::Connection; use AnyEvent::IRC::Client; use Tatsumaki::HTTPClient; use Web::Scraper; our $CONFIG = { ch => '#', server => 'irc.freenode.net', port => 6667, info => { nick => '', user => '', real => 'the bot', }, }; #----- our $CONNECTION = 0; our $LT = scalar localtime; my @conf = ( $CONFIG->{server}, $CONFIG->{port}, $CONFIG->{info} ); my $cv = AnyEvent->condvar; my $ua = Tatsumaki::HTTPClient->new; my $cl; my $ltw; $ltw = AnyEvent->timer( after => 1, interval => 1, cb => sub { $LT = scalar localtime; }, ); my $t; $t = AnyEvent->timer (after => 5, interval=> 30, cb => sub { say "[ $LT ] connection status : $CONNECTION"; connect_to_server() unless $CONNECTION; }); my $scraper = scraper { process '/html/head/title', 'title' => 'TEXT'; }; $cv->recv; sub connect_to_server { say "[ $LT ] start connection"; $cl = AnyEvent::IRC::Client->new; $cl->reg_cb( 'connect' => sub { my ( $cl, $err ) = @_; if ( defined $err ) { print "[ $LT ] Connect ERROR! => $err\n"; $CONNECTION = 0; $cv->broadcast; } else { print "[ $LT ] Connected! Yay!\n"; $CONNECTION = 1; } }, registered => sub { my ($self) = @_; print "[ $LT ] registered!\n"; $cl->enable_ping (60); $cl->send_srv("JOIN", $CONFIG->{ch}); $cl->send_chan($CONFIG->{ch}, "NOTICE", $CONFIG->{ch}, "hi, i'm a bot!"); }, irc_001 => sub { say "[ $LT ] irc_001"; }, irc_privmsg => sub { my ($self, $msg) = @_; my $message = decode_utf8 $msg->{params}[1]; process_msg($message); }, disconnect => sub { $CONNECTION = 0; print "[ $LT ] Oh, got a disconnect: $_[1], exiting...\n"; } ); $cl->connect(@conf); } sub process_msg { my $msg = shift; my @url_list = $msg =~ m{(http://[\S]+)}g; for my $url (@url_list) { say "[ $LT ] $url"; if (is_uri($url)) { $ua->get($url, timeout => 3, sub { my $res = shift; my $info = {}; my $decoded_content = $res->decoded_content; $info->{content_type} = $res->headers->content_type; if ($res->is_success) { my $data = $scraper->scrape($res->decoded_content); $info->{title} = $data->{title}; } else { $info->{title} = 'NO TITLE'; } my $msg = encode_utf8(" $info->{title} [ Content-Type: $info->{content_type} ] "); $cl->send_chan($CONFIG->{ch}, "NOTICE", $CONFIG->{ch}, "$msg"); } ); } else { $cl->send_chan($CONFIG->{ch}, "NOTICE", $CONFIG->{ch}, "$url is not valid URL"); } } } __END__