エントリ本文からスクリプトを取り除くPlagger用のFilterプラグインです。
以下を削除します。
Filter::EntryFullTextで取得する全文HTML内や、フィードのcontent内にスクリプトが含まれている時に有用でしょう。
package Plagger::Plugin::Filter::StripScript;
use strict;
use base qw( Plagger::Plugin );
use HTML::TokeParser::Simple;
sub register {
my($self, $context) = @_;
$context->register_hook(
$self,
'update.entry.fixup' => \&filter,
);
}
sub filter {
my($self, $context, $args) = @_;
my $body = $args->{entry}->body;
return unless $body && $body->is_html;
my $old_body = $body->data;
my $new_body = '';
my $parser = HTML::TokeParser::Simple->new(\$old_body);
my @events = qw(onClick onDblClick onKeyDown onKeyPress onKeyUp
onMouseDown onMouseUp onMouseOver onMouseOut onMouseMove
onLoad onUnload onFocus onBlur onSubmit onReset onChange
onResize onMove onDragDrop onAbort onError onSelect);
my $is_script = 0;
while ( my $token = $parser->get_token ) {
if ($token->is_tag('script')) {
$is_script = $token->is_start_tag;
next;
}
if ($token->is_start_tag) {
my $attr = $token->return_attr();
foreach (@events) {
next unless (exists($attr->{$_}));
$token->delete_attr($_);
}
}
next if ($token->is_text and $is_script);
$new_body .= $token->as_is;
}
$args->{entry}->body($new_body);
}
1;
__END__
=head1 NAME
Plagger::Plugin::Filter::StripScript - Filters strip script from body HTML
=head1 SYNOPSIS
- module: Filter::StripScript
=head1 DESCRIPTION
This plugin delete script tag, script in script tag,
on_* event attributes.
=head1 AUTHOR
Makio Tsukamoto
=head1 SEE ALSO
L<Plagger>, L<HTML::TokeParser::Simple>
=cut