tokuhirom's Blog

AnyEvent で HTTP::Response::Parser をつかってみる。

AnyEvent で HTTP::Response::Parser(picohttpparser の xs wrapper) をつかうとどんなかんじかなーということで、かいてみた。SSL, リダイレクト, keep-alive, Cookie-jar などのサポートをはしょってるけど、それなりにうごくはず。

で、ベンチ結果からいうと、AnyEvent::HTTP にくらべるとだいぶいいけど、AnyEvent::Curl にくらべると、まだまだといったかんじ。あとは、nytprof をみるかんじだと、AnyEvent::Socket あたりのアドレス解決部分とかで時間くってるっぽい。

まあ結論としては、全部Cでやる Curl を素直につかえという話か。

Benchmark: timing 5 iterations of anyevent, anyevent_fast, curl, curl_http_response, lwp...
  anyevent:  0 wallclock secs ( 0.19 usr +  0.03 sys =  0.22 CPU) @ 22.73/s (n=5)
            (warning: too few iterations for a reliable count)
anyevent_fast:  0 wallclock secs ( 0.14 usr +  0.02 sys =  0.16 CPU) @ 31.25/s (n=5)
            (warning: too few iterations for a reliable count)
      curl:  0 wallclock secs ( 0.06 usr +  0.02 sys =  0.08 CPU) @ 62.50/s (n=5)
            (warning: too few iterations for a reliable count)
curl_http_response:  0 wallclock secs ( 0.05 usr +  0.00 sys =  0.05 CPU) @ 100.00/s (n=5)
            (warning: too few iterations for a reliable count)
       lwp:  1 wallclock secs ( 0.54 usr +  0.05 sys =  0.59 CPU) @  8.47/s (n=5)
package AnyEvent::HTTP::HRP;
# does not supports:
#   redirection
#   ssl
#   keep-alive
use strict;
use warnings;

use HTTP::Response::Parser qw/parse_http_response/;
our $VERSION = '0.01';
my $TIMEOUT = 300;
my $USERAGENT = __PACKAGE__ . "/$VERSION";
my $CRLF = "\015\012";

sub http_request {
    my $cb = pop;
    my ($method, $uri, %arg) = @_;
    $method = uc $method;

    my $timeout = $arg{timeout} || $TIMEOUT;

    my %hdr;
    if ( my $hdr = $arg{headers} ) {
        while ( my ( $k, $v ) = each %$hdr ) {
            $hdr{ lc $k } = $v;
        }
    }

    # parse uri
    my ($uscheme, $uauthority, $upath, $query, $fragment) =
        $url =~ m|(?:([^:/?#]+):)?(?://([^/?#]*))?([^?#]*)(?:\?([^#]*))?(?:#(.*))?|;
    $uscheme = lc $uscheme;
    my $uport =
        $uscheme eq "http"  ? 80
        : $uscheme eq "https" ? 443
        : return $cb->(
        undef,
        {
            Status => 599,
            Reason => "Only http and https URL schemes supported",
            URL    => $url
        }
        );
    $uauthority =~ /^(?: .*\@ )? ([^\@:]+) (?: : (\d+) )?$/x
            or return $cb->(undef, { Status => 599, Reason => "Unparsable URL", URL => $url });
    my $uhost = $1;
        $uport = $2 if defined $2;
    $hdr{host} = defined $2 ? "$uhost:$2" : "$uhost"
            unless exists $hdr{host};
    $uhost =~ s/^\[(.*)\]$/$1/;
        $upath .= "?$query" if length $query;
    $upath =~ s%^/?%/%;
    my ($rhost, $rport, $rscheme, $rpath) = ($uhost, $uport, $uscheme, $upath);
    $hdr{"user-agent"} ||= $USERAGENT                     unless exists $hdr{"user-agent"};
    $hdr{"content-length"} = length $arg{body}
            if length $arg{body} || $method ne "GET";
    $hdr{'connection'} ||= 'close';

    my %state;
    $state{sock} = AnyEvent::Socket::tcp_connect $rhost, $rport, sub {
        my $fh = shift or die "cannot open socket";
        my $handle = AnyEvent::Handle->new(fh => $fh, timeout => 1, peername => '127.0.0.1');
        $handle->push_write(
            join('', "$method $rpath HTTP/1.0$CRLF",
                    (map "\u$_: $hdr{$_}$CRLF", grep defined $hdr{$_}, keys %hdr),
                        "$CRLF",
                        '')
        );
        %hdr = (); # reduce memory usage, save a kitten
        $handle->on_error(
            sub {
                $cb->(
                    Status => 599,
                    Reason => "Various socket error: $!",
                    URL    => $url
                );
            }
        );
        $handle->on_eof(
            sub { warn "EOF" },
        );
        $handle->on_read(
            sub {
                my $res = {};
                my $ret = parse_http_response($_[0]->{rbuf}, $res);
                if ($ret > 0) {
                    $_[0]->{rbuf} = substr($_[0]->{rbuf}, $ret); # remove header part
                    my $len = $res->{_headers}->{'content-length'};
                    my $finish = sub {
                        $cb->($_[0], $_[1] || $res);
                        $handle->on_error(undef);
                        $handle->on_eof(undef);
                        $handle->on_read(undef);
                        %state = ();
                    };
                    if (   $res->{_rc} =~ /^(?:1..|[23]04)$/
                        or $method eq "HEAD"
                        or ( defined $len && !$len ) )
                    {
                        $finish->(""); # no body
                    }
                    elsif ( $len <= length( $_[0]->{rbuf} ) ) {
                        $finish->((substr delete $_[0]->{rbuf}, 0, $len, ""));
                    }
                    else {
                        $_[0]->on_eof(undef);
                        if ($len) {
                            $_[0]->on_error(
                                sub {
                                    $finish->(
                                        undef,
                                        {
                                            Status => 599,
                                            Reason => $_[2],
                                            URL    => $url
                                        }
                                    );
                                }
                            );
                            $_[0]->on_read(
                                sub {
                                    $finish->(
                                        (
                                            substr delete $_[0]{rbuf}, 0,
                                            $len,                      ""
                                        ),
                                    ) if $len <= length $_[0]{rbuf};
                                }
                            );
                        } else {
                            $_[0]->on_error(
                                sub {
                                    $! == Errno::EPIPE || !$!
                                        ? $finish->( delete $_[0]{rbuf})
                                        : $finish->(
                                        undef,
                                        {
                                            Status => 599,
                                            Reason => $_[2],
                                            URL    => $url
                                        }
                                        );
                                }
                            );
                            $_[0]->on_read( sub { } );
                        }
                    }
                } elsif ($ret == -1) {
                    $cb->(
                        Status => 599,
                        Reason => "HTTP response header parsing error",
                        URL    => $url
                    );
                } elsif ($ret == -2) {
                    # parsed correctly, but incomplete response.
                    # (may not reach here in the real world...)
                }
            }
        );
        $state{handle} = $handle;
    };

    defined wantarray && AnyEvent::Util::guard { %state = () }
}

1;