tokuhirom's Blog

EBook::EPUB is awesome.

I used pandoc first to generate epub from markdown text. But I can't generate the epub has satisficated format.
Then, I try to generate epub by Perl5. There is EBook::EPUB module on CPAN.

EBook::EPUB is great module. It can generate good epub.

Here is a source code to generate perl-testing-handbook.epub.

#!/usr/bin/env perl
use strict;
use warnings;
use utf8;
use 5.010000;
use autodie;

use EBook::EPUB;
use Text::Markdown 'markdown';
use File::Basename;
use Time::Piece;
use HTML::TreeBuilder::XPath;
use Text::MicroTemplate qw/render_mt/;

binmode *STDOUT, ':utf8';

my $HEADER = 
      qq{<?xml version="1.0" encoding="UTF-8"?>\n}
    . qq{<!DOCTYPE html\n}
    . qq{     PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"\n}
    . qq{    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">\n}
    . qq{\n}
    . qq{<html xmlns="http://www.w3.org/1999/xhtml">\n}
    . qq{<head>\n}
    . qq{<title></title>\n}
    . qq{<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>\n}
    . qq{<link rel="stylesheet" href="styles.css" type="text/css" />\n}
    . qq{</head>\n}
    . qq{\n}
    . qq{<body>\n}
;

my $FOOTER = <<'...';
</body>
</html>
...

my @source_files = (
    map { $_->[0] }
    sort { $a->[1] <=> $b->[1] }
    map {
        my $number = $_;
        $number =~ s!^ja/([0-9]+).+!$1!;
        [$_, $number]
    } <ja/*.mkdn>
);

my $play_order = 1;
my @contents;
for my $path (@source_files) {
    say "add $path";
    my $fname = basename($path);
    $fname =~ s/\.mkdn$/\.html/g;
    my ($html, $title) = mkdn($path);
    push @contents, [$fname, $title, $html];
}

my $epub = EBook::EPUB->new();
$epub->add_title('Perl Testing Handbook');
$epub->add_author('Tokuhiro Matsuno');
$epub->add_language('ja');
$epub->add_date(localtime->strftime('%Y-%m-%d'));
# put toc
{
    my $html = render_mt(<<'...', @contents);
<h1>目次</h1>
<ul>
? for (@_) {
    <li><a href="<?= $_->[0] ?>"><?= $_->[1] ?></a></li>
? }
</ul>
...
    my $chapter_id = $epub->add_xhtml('toc.html', $HEADER . $html . $FOOTER);
    my $navpoint = $epub->add_navpoint(
        label      => '目次',
        id         => $chapter_id,
        content    => 'toc.html',
        play_order => 1,                # should always start with 1
    );
}

for my $row (@contents) {
    my ($fname, $title, $html) = @$row;
    my $chapter_id = $epub->add_xhtml($fname, $HEADER . $html . $FOOTER);
    say $title;
    my $navpoint = $epub->add_navpoint(
        label      => $title,
        id         => $chapter_id,
        content    => $fname,
        play_order => $play_order++,                # should always start with 1
    );
}

# insert images
for my $path (<ja/img/*.png>) {
    say "add $path";
    my $base = basename($path);
    $epub->copy_image($path, "img/$base");
}
$epub->copy_stylesheet('styles.css', 'styles.css');
$epub->pack_zip('perl-testing-handbook-ja.epub');
exit;

sub slurp {
    my $fname = shift;
    open my $fh, '<:utf8', $fname;
    do { local $/; <$fh> };
}

sub mkdn {
    my $fname = shift;
    my $src = slurp($fname);
    my $html = markdown($src);
    my $parser = HTML::TreeBuilder::XPath->new();
    $parser->parse($html);
    my ($title) = $parser->findvalue('//h1');
    $parser->delete;
    return ($html, $title);
}

Operation is very easy. Put meta data to EBook::EPUB object, and put htmls, images, css, and pack it.