לא היה לי ערוץ RSS להודעות באתר העירייה אז התוצאה לפניכם, יכול להיות שזה יעבוד גם לאחרים אם גם העירייה שלהם משתמשת במערכת ההודעות דומה.
#!/usr/bin/perl use LWP 5.64; # Loads all important LWP classes, and makes use HTTP::Cookies; # Allow work with cookies use XML::RSS; use Encode qw(decode encode); my $browser = LWP::UserAgent->new; if ( $#ARGV != 0 ) { print "incorrect number of params, you should provide only basepath example : $0 http://url.muni.tld\n"; die ""; } $browser->cookie_jar( HTTP::Cookies->new( 'file' => '/tmp/headers', # where to read/write cookies 'autosave' => 1, # save it to disk when done )); $browser->agent('Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)'); my $basepath = "$ARGV[0]"; my $url = "$basepath/openning.asp?Lang=1"; my $response = $browser->get( $url ); #for each cookie we got, add it to the current jar $browser->cookie_jar->extract_cookies( $response ); $response = $browser->get( "$basepath/apps/hebrew/resulttest.asp?AppId=4&TableName=MESSAGES&Categories=65&From=" ); my $rss = XML::RSS->new( version => '0.9' ); $rss->channel( title => "MUNI for $basepath", link => "$basepath", description => "MUNI RSS"); #the intersting stuff is inside Parse_Html_Template(...) #I was too lazy to use better modules so I just went to the source , please don't shoot me ;) # # for my $item ($response->content =~ /Parse_Html_Template\((.*?)\)/g) { #first line always have garbage data (just skip it) next unless ($item =~ /.*,.*/) ; #Format #### , "title","date" # ### may be a version number # # $item =~ s/[^,]*,[^,]*,[^,]*,//;#remove useless headers #the next come in the format : #"<a href='relative_path'>text</a>","<a href='relative path'>date</a>" while ( $item =~ q/"([^"]*)","([^"]*)"/) { my $firstpart = $1 ; my $secondpart = $2 ; $firstpart=~ /<a.*href=["']([\s\S]+?)['"].*>/; $link = $1; $firstpart=~ /<a.*href.*>([\s\S]+?)<\/a>/; $text = $1; $text = decode("iso-8859-8", $text); $rss->add_item( title => $text, link => $basepath . $link); $item =~ s/"([^"]*)","([^"]*)"//; } } $rss->save("$0.rss"); __END__ =head1 NAME muni RSS - a script to get an RSS from old municipal websites which are lacking an RSS/Atom features. =head1 SYNOPSIS Create an RSS 0.9 file based on the messages page. =head1 DESCRIPTION This script provide a basic example on how one could get the RSS from the rusty pages without the RSS interface, =head1 BUGS =head1 AUTHOR Original code:Boris Shtrasman =head1 COPYRIGHT Copyright (c) 2015 Boris Shtrasman =head1 LICENSE this script is free software. You can redistribute it and/or modify it under the same terms as Perl itself. =head1 CREDITS Rael Dornfest <rael@oreilly.com> Jonathan Eisenzopf <eisen@pobox.com> Wojciech Zwiefka <wojtekz@cnt.pl> Chris Nandor <pudge@pobox.com> =head1 SEE ALSO perl(1), XML::Parser(3), LWP(3), XML::RSS(3),HTTP::Cookies(3) =cut
אין תגובות:
הוסף רשומת תגובה