#!/usr/bin/perl -w ###----------------------------------------------------------- ### opac2rss.pl ### ### version 1.2a - Jan 27, 2007 - Sacha Chua ### Modified to work with TPL, changed output format slightly ### ### version 1.2 - August 29, 2004 - Peter Rukavina ### ### Connects to a Dynix(tm) web-based library OPAC and ### retrieves the current Items Out. ### ### Thanks to the Alibi project for providing information ### on how to authenticate with a session id: ### ### http://alibi.nslug.ns.ca/download.html ### ### Thanks to O'Reilly's "Spidering Hacks" book for help with ### HTML::TreeBuilder. ### ### Attribution-NonCommercial-ShareAlike 2.0 ### http://creativecommons.org/licenses/by-nc-sa/2.0/ ### Reinvented Inc. ###----------------------------------------------------------- use strict; use LWP::Simple; use HTML::TreeBuilder; use XML::RSS; ###----------------------------------------------------------- ### Set up some initial values that we'll use later... ###----------------------------------------------------------- my $username = "00000000000000"; ## This is usually your library card number my $password = "0000"; ## This is your PIN - on PEI it's the last four digits of your phone my $name = "Your Name"; ## Your name - used to title the RSS feed my $opacurl = "http://hip.tpl.toronto.on.ca/ipac20/ipac.jsp"; ## The main URL of your Dynix OPAC -- on PEI it's http://142.176.41.205/ipac20/ipac.jsp my $rssfile = "library.rdf"; ## Where the resulting RSS file should be save -- logically this would be web-accessible ###----------------------------------------------------------- ### Start a new RSS file ###----------------------------------------------------------- my $rss = new XML::RSS(version => '0.91'); $rss->channel( title => $name . "'s Library Books", link => "$opacurl?profile=pac&menu=account&submenu=itemsout", description => 'Books I have checked out of the library', ); ###----------------------------------------------------------- ### Get the authentication request page, which will give us ### a session variable called 'session' that we'll use later. ###----------------------------------------------------------- my $url = "$opacurl?profile=pac&menu=account&submenu=itemsout&sortby=duedate"; my $page = get( $url ) or die $!; my $p = HTML::TreeBuilder->new_from_content( $page ); my $links = $p->look_down( _tag => 'input', name => 'session' ); my $session = $links->attr('value'); $p = $p->delete; # don't need it anymore ###----------------------------------------------------------- ### Grab the Items Out page and parse it ###----------------------------------------------------------- $url = "$opacurl?profile=pac&menu=account&submenu=itemsout&session=$session&sec1=$username&sec2=$password&sortby=duedate"; $page = get( $url ) or die $!; $p = HTML::TreeBuilder->new_from_content( $page ); # The titles of the items out are styed with a 'mediumBoldAnchor' class my @links = $p->look_down( _tag => 'a', class => 'mediumBoldAnchor' ); my @rows = map { $_->parent->parent->parent->parent->parent } @links; for my $row (@rows) { my %book; my $bookcell; my $bookparent; my @columns = $row->look_down(_tag => 'td'); $bookcell = $columns[1]->look_down( _tag => 'a', class => 'mediumBoldAnchor'); $book{url} = $bookcell->attr("href"); $book{url} =~ s/\?session\=(.*)&profile/?profile/; $bookparent = $bookcell->parent(); $book{title} = $bookparent->as_trimmed_text; $book{title} =~ s/ \///; $book{checkout} = $columns[8]->as_trimmed_text; $book{due} = $columns[9]->as_trimmed_text; $book{author} = $columns[3]->as_trimmed_text; $book{description} = 'Due: ' . $book{due}; if (length($book{author}) > 2) { $book{description} = $book{author} . ' - ' . $book{description}; } $rss->add_item( title => $book{title}, link => $book{url}, description => $book{description} ); } $p = $p->delete; # don't need it anymore ###----------------------------------------------------------- ### Save the RSS file and exit. ###----------------------------------------------------------- $rss->save($rssfile); ###---- The End.