#!/usr/bin/perl
use strict;
use LWP::UserAgent;
use XML::RSS;
my $ua = LWP::UserAgent->new;
# Given a base pipermail URL (including the YYYY-Month), return the threads
sub get_threads {
my ($url) = @_;
my $response = $ua->get("$url/thread.html");
if($response->is_success) {
my $content = $response->content;
# OK, so now we have the content. It just so happens that all the links
# we're looking for are inside of capitolized LI tags, and run to the end
# of the line. Lucky us!
my @messages = ($content =~ /
(.*)/g);
# Now we'll do some cleanul -- add in the closing and remove the list
# subject indicator
@messages = map { $_ . "" } @messages;
@messages = map { s/\[Phoenix-pm\] //; $_ } @messages;
# Now we'll create a list of threads. Actually we'll stick them into a
# hash so that we get a unique list of subjects. We'll keep the URL and
# bare subject for later use
my %thread;
foreach my $msg (@messages) {
my ($msg_url, $topic) = $msg =~ /(.*?)<\/A>/;
$msg_url = "$url/$msg_url";
$thread{$topic} = [$msg_url, $topic] unless $thread{$msg};
}
# Thats it! Now all of our [$url, $topic] pairs are the values of %thread
my @threads = values %thread;
# The threads are all mixed up from keeping them in a hash. Sort them by
# URL, which happens to also give us most-recent-first order we like
@threads = reverse sort { $a->[0] cmp $b->[0] } @threads;
return @threads;
} else {
# Well, since we didn't get the page correctly lets just DIE!
die $response->status_line;
}
}
# This will help us generate the URL based on the current day
my @months = qw( January February March April May June July
August September October November December );
my $base_url = 'http://mail.pm.org/pipermail/phoenix-pm';
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime;
# Find the threads for the current month, and if there aren't enough go back
# to last month, and so on
my @threads;
my $offset = 0;
while(scalar @threads < 15) {
my $month = ($year + 1900) . '-' . $months[$mon];
push @threads, get_threads("$base_url/$month");
$mon--;
if($mon < 0) {
$mon = 11;
$year--;
}
}
# Now we've got all of our threads, we just spit out some RSS
my $rss = new XML::RSS;
$rss->channel(
title => 'Phoenix.PM Mailing List Topics',
detail => 'Scraped topic list for Phoenix.PM');
foreach my $thread (@threads) {
my ($url, $topic) = @$thread;
$rss->add_item(
link => $url,
title => $topic);
}
print $rss->as_string;