| 1 |
package Plagger::Plugin::CustomFeed::Simple; |
|---|
| 2 |
use strict; |
|---|
| 3 |
use base qw( Plagger::Plugin ); |
|---|
| 4 |
|
|---|
| 5 |
use Encode; |
|---|
| 6 |
use HTML::TokeParser; |
|---|
| 7 |
use HTML::ResolveLink; |
|---|
| 8 |
use HTML::TreeBuilder::XPath; |
|---|
| 9 |
use Plagger::UserAgent; |
|---|
| 10 |
use Plagger::Util qw( decode_content extract_title ); |
|---|
| 11 |
|
|---|
| 12 |
sub register { |
|---|
| 13 |
my($self, $context) = @_; |
|---|
| 14 |
$context->register_hook( |
|---|
| 15 |
$self, |
|---|
| 16 |
'customfeed.handle' => \&handle, |
|---|
| 17 |
); |
|---|
| 18 |
} |
|---|
| 19 |
|
|---|
| 20 |
sub handle { |
|---|
| 21 |
my($self, $context, $args) = @_; |
|---|
| 22 |
|
|---|
| 23 |
if ( my $match = $args->{feed}->meta->{follow_link} || $args->{feed}->meta->{xpath} ) { |
|---|
| 24 |
$args->{match} = $match; |
|---|
| 25 |
return $self->aggregate($context, $args); |
|---|
| 26 |
} |
|---|
| 27 |
|
|---|
| 28 |
return; |
|---|
| 29 |
} |
|---|
| 30 |
|
|---|
| 31 |
sub aggregate { |
|---|
| 32 |
my($self, $context, $args) = @_; |
|---|
| 33 |
|
|---|
| 34 |
my $url = $args->{feed}->url; |
|---|
| 35 |
$context->log(info => "GET $url"); |
|---|
| 36 |
|
|---|
| 37 |
my $agent = Plagger::UserAgent->new; |
|---|
| 38 |
my $res = $agent->fetch($url, $self); |
|---|
| 39 |
|
|---|
| 40 |
if ($res->http_response->is_error) { |
|---|
| 41 |
$context->log(error => "GET $url failed: " . $res->status); |
|---|
| 42 |
return; |
|---|
| 43 |
} |
|---|
| 44 |
|
|---|
| 45 |
my $content = decode_content($res); |
|---|
| 46 |
my $title = extract_title($content); |
|---|
| 47 |
|
|---|
| 48 |
my $feed = Plagger::Feed->new; |
|---|
| 49 |
$feed->title($title); |
|---|
| 50 |
$feed->link($url); |
|---|
| 51 |
|
|---|
| 52 |
my $re = $args->{match}; |
|---|
| 53 |
|
|---|
| 54 |
if( $args->{feed}->meta->{follow_link} ) { |
|---|
| 55 |
my $resolver = HTML::ResolveLink->new(base => $url); |
|---|
| 56 |
$content = $resolver->resolve($content); |
|---|
| 57 |
|
|---|
| 58 |
my %seen; |
|---|
| 59 |
my $parser = HTML::TokeParser->new(\$content); |
|---|
| 60 |
while (my $token = $parser->get_tag('a')) { |
|---|
| 61 |
next unless ($token->[1]->{href} || '') =~ /$re/; |
|---|
| 62 |
|
|---|
| 63 |
my $text = $parser->get_trimmed_text('/a'); |
|---|
| 64 |
next if !$text || $text eq '[IMG]'; |
|---|
| 65 |
|
|---|
| 66 |
my $url = URI->new_abs($token->[1]->{href}, $url); |
|---|
| 67 |
next if $seen{$url->as_string}++; |
|---|
| 68 |
|
|---|
| 69 |
my $entry = Plagger::Entry->new; |
|---|
| 70 |
$entry->title($text); |
|---|
| 71 |
$entry->link($url); |
|---|
| 72 |
$feed->add_entry($entry); |
|---|
| 73 |
|
|---|
| 74 |
$context->log(debug => "Add $token->[1]->{href} ($text)"); |
|---|
| 75 |
} |
|---|
| 76 |
} |
|---|
| 77 |
else { |
|---|
| 78 |
my $tree = HTML::TreeBuilder::XPath->new; |
|---|
| 79 |
$tree->parse($content); |
|---|
| 80 |
$tree->eof; |
|---|
| 81 |
|
|---|
| 82 |
for my $child ( $tree->findnodes($re || '//a') ) { |
|---|
| 83 |
my $href = $child->attr('href') or next; |
|---|
| 84 |
my $title = $child->attr('title') || $child->as_text; |
|---|
| 85 |
|
|---|
| 86 |
my $entry = Plagger::Entry->new; |
|---|
| 87 |
$entry->title($title); |
|---|
| 88 |
$entry->link($href); |
|---|
| 89 |
$feed->add_entry($entry); |
|---|
| 90 |
|
|---|
| 91 |
$context->log(debug => "Add $href ($title)"); |
|---|
| 92 |
} |
|---|
| 93 |
} |
|---|
| 94 |
|
|---|
| 95 |
$context->update->add($feed); |
|---|
| 96 |
|
|---|
| 97 |
return 1; |
|---|
| 98 |
} |
|---|
| 99 |
|
|---|
| 100 |
1; |
|---|
| 101 |
|
|---|
| 102 |
__END__ |
|---|
| 103 |
|
|---|
| 104 |
=head1 NAME |
|---|
| 105 |
|
|---|
| 106 |
Plagger::Plugin::CustomFeed::Simple - Simple way to create title and link only custom feeds |
|---|
| 107 |
|
|---|
| 108 |
=head1 SYNOPSIS |
|---|
| 109 |
|
|---|
| 110 |
- module: Subscription::Config |
|---|
| 111 |
config: |
|---|
| 112 |
feed: |
|---|
| 113 |
- url: http://sportsnavi.yahoo.co.jp/index.html |
|---|
| 114 |
meta: |
|---|
| 115 |
follow_link: /headlines/ |
|---|
| 116 |
|
|---|
| 117 |
- module: CustomFeed::Simple |
|---|
| 118 |
|
|---|
| 119 |
=head1 DESCRIPTION |
|---|
| 120 |
|
|---|
| 121 |
|
|---|
| 122 |
=head1 AUTHOR |
|---|
| 123 |
|
|---|
| 124 |
Tatsuhiko Miyagawa |
|---|
| 125 |
|
|---|
| 126 |
=head1 SEE ALSO |
|---|
| 127 |
|
|---|
| 128 |
L<Plagger> |
|---|
| 129 |
|
|---|
| 130 |
=cut |
|---|
| 131 |
|
|---|
| 132 |
|
|---|
| 133 |
|
|---|
| 134 |
1; |
|---|
| 135 |
|
|---|