Revision: 11730
Updated Code
at November 11, 2010 02:29 by iblis
Updated Code
#!/usr/bin/env perl
use strict;
use warnings;
use Encoding "utf8";
use Text::BibTeX;
use WebService::ISBNDB::API::Books;
use Getopt::Long;
use Pod::Usage;
my %options;
GetOptions('usage|?' => \$options{usage},
'h|help' => \$options{help}
);
pod2usage(1) if $options{usage};
pod2usage(-verbose => 2) if $options{help};
my $api_key = $ENV{ISBNDB_KEY} || 'TMDKWJSX';
my $dir = shift || '.';
my $file = shift || '&STDOUT';
my $bib = Text::BibTeX::File->new('>'.$file);
opendir my $dh, $dir
or die "Cannot open $dir: $!\n";
my @files = grep { -f && m{/\d{9}[x|\d]\.pdf$}i }
map {"$dir/$_"}
readdir $dh;
foreach my $file (@files) {
# extract isbn from file name
my $isbn = $file =~ /(\d{9}[x|\d])\.pdf$/i ? $1 : '0000000000' ;
# check database for isbn number, loop if failed
my $book = WebService::ISBNDB::API::Books->find( { api_key => $api_key, isbn => $isbn } );
next unless $book;
# set new bibtex entry
my $entry = new Text::BibTeX::Entry;
$entry->set_metatype(BTE_REGULAR);
$entry->set_type('book');
$entry->set_key($isbn);
# set title field
$entry->set( 'title', $book->get_longtitle || $book->get_title );
# set author or editor field
my $authors = $book->get_authors_text;
# some clean-up
$authors =~ s/^by //;
$authors =~ s/,$//;
$authors =~ s/,\s+/ and /g;
$authors =~ s/;\s+/ and /g;
# authors or editors ?
if ( $authors =~ /^\s*\[?edited by\s+\]?(.*)$/i ) {
(my $editors = $1) =~ s/with/and/;
$entry->set('editor', $editors);
}
elsif ( $authors =~ /\(Editor\)/i ) {
$authors =~ s/\s*\(Editor\)//gi;
}
else {
$entry->set('author', $authors);
}
# parse publisher and edition fields for publisher and year data
if ( $book->get_publisher_text =~ m/^(.*?),\s+c?(\d{4}).?$/ ) {
$entry->set( 'publisher', $1 ) ;
$entry->set( 'year', $2 );
}
else {
$entry->set( 'publisher', $book->get_publisher_text ) ;
if ( $book->get_edition_info =~ m/(\d{4})/ ) {
$entry->set( 'year', $1 );
}
}
# miscellaneous fields
my $notes = $book->get_notes;
$entry->set( 'notes', $notes ) if $notes ;
my $abstract = $book->get_summary;
$entry->set( 'abstract', $abstract ) if $abstract ;
$entry->set( 'local-url', $file);
$entry->write($bib);
# sleep 2;
}
__END__
=head1 NAME
isbn2bibtex.pl - Convert ISBN file names to BibTeX records
=head1 SYNOPSIS
isbn2bibtex.pl [-? | --help] | [directory] [outfile.bib]
=head1 DESCRIPTION
Scans a directory for PDF files whose name are ISBN-10 identifiers,
fetches the corresponding book's data from isbndb.com, parses data
fields to get rid of inconsistencies, and finally, outputs a bibtex
file with all fields set accordingly.
-? print usage
-h --help verbose help message
If no directory is given, scans the current directory. Outputs result
to STDOUT, unless a second argument is given.
An API key is required to access isbndb.com services. You can either
paste it in the source code or set the environment variable ISBNDB_KEY.
=head1 LICENSE
Free to use and modifiy, same terms as Perl itself.
=head1 AUTHOR
i-blis, I<i-blis yandex ru>.
=cut
Revision: 11729
Initial Code
Initial URL
Initial Description
Initial Title
Initial Tags
Initial Language
at February 16, 2009 21:19 by iblis
Initial Code
#!/usr/bin/env perl
use strict;
use warnings;
use Encoding "utf8";
use Text::BibTeX;
use WebService::ISBNDB::API::Books;
use Getopt::Long;
use Pod::Usage;
my %options;
GetOptions('usage|?' => \$options{usage},
'h|help' => \$options{help}
);
pod2usage(1) if $options{usage};
pod2usage(-verbose => 2) if $options{help};
my $api_key = $ENV{ISBNDB_KEY} || 'TMDKWJSX';
my $dir = shift || '.';
my $file = shift || '&STDOUT';
my $bib = Text::BibTeX::File->new('>'.$file);
opendir my $dh, $dir
or die "Cannot open $dir: $!\n";
my @files = grep { -f && m{/\d{9}[x|\d]\.pdf$}i }
map {"$dir/$_"}
readdir $dh;
foreach my $file (@files) {
# extract isbn from file name
my $isbn = $file =~ /(\d{9}[x|\d])\.pdf$/i ? $1 : '0000000000' ;
# check database for isbn number, loop if failed
my $book = WebService::ISBNDB::API::Books->find( { api_key => $api_key, isbn => $isbn } );
next unless $book;
# set new bibtex entry
my $entry = new Text::BibTeX::Entry;
$entry->set_metatype(BTE_REGULAR);
$entry->set_type('book');
$entry->set_key($isbn);
# set title field
$entry->set( 'title', $book->get_longtitle || $book->get_title );
# set author or editor field
my $authors = $book->get_authors_text;
# some clean-up
$authors =~ s/^by //;
$authors =~ s/,$//;
$authors =~ s/,\s+/ and /g;
$authors =~ s/;\s+/ and /g;
# authors or editors ?
if ( $authors =~ /^\s*\[?edited by\s+\]?(.*)$/i ) {
(my $editors = $1) =~ s/with/and/;
$entry->set('editor', $editors);
}
elsif ( $authors =~ /\(Editor\)/i ) {
$authors =~ s/\s*\(Editor\)//gi;
}
else {
$entry->set('author', $authors);
}
# parse publisher and edition fields for publisher and year data
if ( $book->get_publisher_text =~ m/^(.*?),\s+c?(\d{4}).?$/ ) {
$entry->set( 'publisher', $1 ) ;
$entry->set( 'year', $2 );
}
else {
$entry->set( 'publisher', $book->get_publisher_text ) ;
if ( $book->get_edition_info =~ m/(\d{4})/ ) {
$entry->set( 'year', $1 );
}
}
# miscellaneous fields
my $notes = $book->get_notes;
$entry->set( 'notes', $notes ) if $notes ;
my $abstract = $book->get_summary;
$entry->set( 'abstract', $abstract ) if $abstract ;
$entry->set( 'local-url', $file);
$entry->write($bib);
# sleep 2;
}
__END__
=head1 NAME
isbn2bibtex.pl - Convert ISBN file names to BibTeX records
=head1 SYNOPSIS
isbn2bibtex.pl [-? | --help] | [directory] [outfile.bib]
=head1 DESCRIPTION
Scans a directory for PDF files whose name are ISBN-10 identifiers,
fetches the corresponding book's data from isbndb.com, parses data
fields to get rid of inconsistencies, and finally, outputs a bibtex
file with all fields set accordingly.
-? print usage
-h --help verbose help message
If no directory is given, scans the current directory. Outputs result
to STDOUT, unless a second argument is given.
An API key is required to access isbndb.com services. You can either
paste it in the source code or set the environment variable ISBNDB_KEY.
=head1 LICENSE
Free to use and modifiy, same terms as Perl itself.
=head1 AUTHOR
i-blis, I<[email protected]>.
=cut
Initial URL
Initial Description
Initial Title
Convert ISBN file names to BibTeX records
Initial Tags
web
Initial Language
Perl