AnyEvent で HTTP::Response::Parser をつかってみる。
AnyEvent で HTTP::Response::Parser(picohttpparser の xs wrapper) をつかうとどんなかんじかなーということで、かいてみた。SSL, リダイレクト, keep-alive, Cookie-jar などのサポートをはしょってるけど、それなりにうごくはず。
で、ベンチ結果からいうと、AnyEvent::HTTP にくらべるとだいぶいいけど、AnyEvent::Curl にくらべると、まだまだといったかんじ。あとは、nytprof をみるかんじだと、AnyEvent::Socket あたりのアドレス解決部分とかで時間くってるっぽい。
まあ結論としては、全部Cでやる Curl を素直につかえという話か。
Benchmark: timing 5 iterations of anyevent, anyevent_fast, curl, curl_http_response, lwp... anyevent: 0 wallclock secs ( 0.19 usr + 0.03 sys = 0.22 CPU) @ 22.73/s (n=5) (warning: too few iterations for a reliable count) anyevent_fast: 0 wallclock secs ( 0.14 usr + 0.02 sys = 0.16 CPU) @ 31.25/s (n=5) (warning: too few iterations for a reliable count) curl: 0 wallclock secs ( 0.06 usr + 0.02 sys = 0.08 CPU) @ 62.50/s (n=5) (warning: too few iterations for a reliable count) curl_http_response: 0 wallclock secs ( 0.05 usr + 0.00 sys = 0.05 CPU) @ 100.00/s (n=5) (warning: too few iterations for a reliable count) lwp: 1 wallclock secs ( 0.54 usr + 0.05 sys = 0.59 CPU) @ 8.47/s (n=5)
package AnyEvent::HTTP::HRP; # does not supports: # redirection # ssl # keep-alive use strict; use warnings; use HTTP::Response::Parser qw/parse_http_response/; our $VERSION = '0.01'; my $TIMEOUT = 300; my $USERAGENT = __PACKAGE__ . "/$VERSION"; my $CRLF = "\015\012"; sub http_request { my $cb = pop; my ($method, $uri, %arg) = @_; $method = uc $method; my $timeout = $arg{timeout} || $TIMEOUT; my %hdr; if ( my $hdr = $arg{headers} ) { while ( my ( $k, $v ) = each %$hdr ) { $hdr{ lc $k } = $v; } } # parse uri my ($uscheme, $uauthority, $upath, $query, $fragment) = $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|; $uscheme = lc $uscheme; my $uport = $uscheme eq "http" ? 80 : $uscheme eq "https" ? 443 : return $cb->( undef, { Status => 599, Reason => "Only http and https URL schemes supported", URL => $url } ); $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x or return $cb->(undef, { Status => 599, Reason => "Unparsable URL", URL => $url }); my $uhost = $1; $uport = $2 if defined $2; $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost" unless exists $hdr{host}; $uhost =~ s/^\[(.*)\]$/$1/; $upath .= "?$query" if length $query; $upath =~ s%^/?%/%; my ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath); $hdr{"user-agent"} ||= $USERAGENT unless exists $hdr{"user-agent"}; $hdr{"content-length"} = length $arg{body} if length $arg{body} || $method ne "GET"; $hdr{'connection'} ||= 'close'; my %state; $state{sock} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub { my $fh = shift or die "cannot open socket"; my $handle = AnyEvent::Handle->new(fh => $fh, timeout => 1, peername => '127.0.0.1'); $handle->push_write( join('', "$method $rpath HTTP/1.0$CRLF", (map "\u$_: $hdr{$_}$CRLF", grep defined $hdr{$_}, keys %hdr), "$CRLF", '') ); %hdr = (); # reduce memory usage, save a kitten $handle->on_error( sub { $cb->( Status => 599, Reason => "Various socket error: $!", URL => $url ); } ); $handle->on_eof( sub { warn "EOF" }, ); $handle->on_read( sub { my $res = {}; my $ret = parse_http_response($_[0]->{rbuf}, $res); if ($ret > 0) { $_[0]->{rbuf} = substr($_[0]->{rbuf}, $ret); # remove header part my $len = $res->{_headers}->{'content-length'}; my $finish = sub { $cb->($_[0], $_[1] || $res); $handle->on_error(undef); $handle->on_eof(undef); $handle->on_read(undef); %state = (); }; if ( $res->{_rc} =~ /^(?:1..|[23]04)$/ or $method eq "HEAD" or ( defined $len && !$len ) ) { $finish->(""); # no body } elsif ( $len <= length( $_[0]->{rbuf} ) ) { $finish->((substr delete $_[0]->{rbuf}, 0, $len, "")); } else { $_[0]->on_eof(undef); if ($len) { $_[0]->on_error( sub { $finish->( undef, { Status => 599, Reason => $_[2], URL => $url } ); } ); $_[0]->on_read( sub { $finish->( ( substr delete $_[0]{rbuf}, 0, $len, "" ), ) if $len <= length $_[0]{rbuf}; } ); } else { $_[0]->on_error( sub { $! == Errno::EPIPE || !$! ? $finish->( delete $_[0]{rbuf}) : $finish->( undef, { Status => 599, Reason => $_[2], URL => $url } ); } ); $_[0]->on_read( sub { } ); } } } elsif ($ret == -1) { $cb->( Status => 599, Reason => "HTTP response header parsing error", URL => $url ); } elsif ($ret == -2) { # parsed correctly, but incomplete response. # (may not reach here in the real world...) } } ); $state{handle} = $handle; }; defined wantarray && AnyEvent::Util::guard { %state = () } } 1;