AnyEvent で HTTP::Response::Parser をつかってみる。
AnyEvent で HTTP::Response::Parser(picohttpparser の xs wrapper) をつかうとどんなかんじかなーということで、かいてみた。SSL, リダイレクト, keep-alive, Cookie-jar などのサポートをはしょってるけど、それなりにうごくはず。
で、ベンチ結果からいうと、AnyEvent::HTTP にくらべるとだいぶいいけど、AnyEvent::Curl にくらべると、まだまだといったかんじ。あとは、nytprof をみるかんじだと、AnyEvent::Socket あたりのアドレス解決部分とかで時間くってるっぽい。
まあ結論としては、全部Cでやる Curl を素直につかえという話か。
Benchmark: timing 5 iterations of anyevent, anyevent_fast, curl, curl_http_response, lwp...
anyevent: 0 wallclock secs ( 0.19 usr + 0.03 sys = 0.22 CPU) @ 22.73/s (n=5)
(warning: too few iterations for a reliable count)
anyevent_fast: 0 wallclock secs ( 0.14 usr + 0.02 sys = 0.16 CPU) @ 31.25/s (n=5)
(warning: too few iterations for a reliable count)
curl: 0 wallclock secs ( 0.06 usr + 0.02 sys = 0.08 CPU) @ 62.50/s (n=5)
(warning: too few iterations for a reliable count)
curl_http_response: 0 wallclock secs ( 0.05 usr + 0.00 sys = 0.05 CPU) @ 100.00/s (n=5)
(warning: too few iterations for a reliable count)
lwp: 1 wallclock secs ( 0.54 usr + 0.05 sys = 0.59 CPU) @ 8.47/s (n=5)
package AnyEvent::HTTP::HRP;
# does not supports:
# redirection
# ssl
# keep-alive
use strict;
use warnings;
use HTTP::Response::Parser qw/parse_http_response/;
our $VERSION = '0.01';
my $TIMEOUT = 300;
my $USERAGENT = __PACKAGE__ . "/$VERSION";
my $CRLF = "\015\012";
sub http_request {
my $cb = pop;
my ($method, $uri, %arg) = @_;
$method = uc $method;
my $timeout = $arg{timeout} || $TIMEOUT;
my %hdr;
if ( my $hdr = $arg{headers} ) {
while ( my ( $k, $v ) = each %$hdr ) {
$hdr{ lc $k } = $v;
}
}
# parse uri
my ($uscheme, $uauthority, $upath, $query, $fragment) =
$url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
$uscheme = lc $uscheme;
my $uport =
$uscheme eq "http" ? 80
: $uscheme eq "https" ? 443
: return $cb->(
undef,
{
Status => 599,
Reason => "Only http and https URL schemes supported",
URL => $url
}
);
$uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
or return $cb->(undef, { Status => 599, Reason => "Unparsable URL", URL => $url });
my $uhost = $1;
$uport = $2 if defined $2;
$hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
unless exists $hdr{host};
$uhost =~ s/^\[(.*)\]$/$1/;
$upath .= "?$query" if length $query;
$upath =~ s%^/?%/%;
my ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
$hdr{"user-agent"} ||= $USERAGENT unless exists $hdr{"user-agent"};
$hdr{"content-length"} = length $arg{body}
if length $arg{body} || $method ne "GET";
$hdr{'connection'} ||= 'close';
my %state;
$state{sock} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
my $fh = shift or die "cannot open socket";
my $handle = AnyEvent::Handle->new(fh => $fh, timeout => 1, peername => '127.0.0.1');
$handle->push_write(
join('', "$method $rpath HTTP/1.0$CRLF",
(map "\u$_: $hdr{$_}$CRLF", grep defined $hdr{$_}, keys %hdr),
"$CRLF",
'')
);
%hdr = (); # reduce memory usage, save a kitten
$handle->on_error(
sub {
$cb->(
Status => 599,
Reason => "Various socket error: $!",
URL => $url
);
}
);
$handle->on_eof(
sub { warn "EOF" },
);
$handle->on_read(
sub {
my $res = {};
my $ret = parse_http_response($_[0]->{rbuf}, $res);
if ($ret > 0) {
$_[0]->{rbuf} = substr($_[0]->{rbuf}, $ret); # remove header part
my $len = $res->{_headers}->{'content-length'};
my $finish = sub {
$cb->($_[0], $_[1] || $res);
$handle->on_error(undef);
$handle->on_eof(undef);
$handle->on_read(undef);
%state = ();
};
if ( $res->{_rc} =~ /^(?:1..|[23]04)$/
or $method eq "HEAD"
or ( defined $len && !$len ) )
{
$finish->(""); # no body
}
elsif ( $len <= length( $_[0]->{rbuf} ) ) {
$finish->((substr delete $_[0]->{rbuf}, 0, $len, ""));
}
else {
$_[0]->on_eof(undef);
if ($len) {
$_[0]->on_error(
sub {
$finish->(
undef,
{
Status => 599,
Reason => $_[2],
URL => $url
}
);
}
);
$_[0]->on_read(
sub {
$finish->(
(
substr delete $_[0]{rbuf}, 0,
$len, ""
),
) if $len <= length $_[0]{rbuf};
}
);
} else {
$_[0]->on_error(
sub {
$! == Errno::EPIPE || !$!
? $finish->( delete $_[0]{rbuf})
: $finish->(
undef,
{
Status => 599,
Reason => $_[2],
URL => $url
}
);
}
);
$_[0]->on_read( sub { } );
}
}
} elsif ($ret == -1) {
$cb->(
Status => 599,
Reason => "HTTP response header parsing error",
URL => $url
);
} elsif ($ret == -2) {
# parsed correctly, but incomplete response.
# (may not reach here in the real world...)
}
}
);
$state{handle} = $handle;
};
defined wantarray && AnyEvent::Util::guard { %state = () }
}
1;
Published: 2010-06-02(Wed) 00:49