So.. here is the module HtmlTableParser. Is there any way to condense it (I don't care if it stays as a module or it is just done directly in HTML:
arser) into as few char's as possible (new lines don't count) regardless of the readability as long as the functionality stays the same.
There really is no goal in this other than fun.. (plus the forum has been kind of boring lately).
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
![[noevil] [noevil] [noevil]](/data/assets/smilies/noevil.gif)
Travis - Those who say it cannot be done are usually interrupted by someone else doing it; Give the wrong symptoms, get the wrong solutions;
There really is no goal in this other than fun.. (plus the forum has been kind of boring lately).
Code:
package HTML::TableContentParser;
use HTML::Parser;
@ISA = qw(HTML::Parser);
use strict;
our $VERSION = 0.13;
our $DEBUG = 0;
# The tags we're interested in.
my @tag_names = qw(table tr td th caption);
sub start
{
my ($self, $tag, $attr, $attrseq, $origtext) = @_;
$tag = lc($tag);
# Store the incoming details in the current 'object'.
if ($tag eq 'table') {
my $table = $attr;
push @{$self->{STORE}->{tables}}, $table;
$self->{STORE}->{current_table} = $table;
} elsif ($tag eq 'th') {
my $th = $attr;
push @{$self->{STORE}->{current_table}->{headers}}, $th;
$self->{STORE}->{current_header} = $th;
$self->{STORE}->{current_element} = $th;
} elsif ($tag eq 'tr') {
my $tr = $attr;
push @{$self->{STORE}->{current_table}->{rows}}, $tr;
$self->{STORE}->{current_row} = $tr;
$self->{STORE}->{current_element} = $tr;
} elsif ($tag eq 'td') {
my $td = $attr;
push @{$self->{STORE}->{current_row}->{cells}}, $td;
$self->{STORE}->{current_data_cell} = $td;
$self->{STORE}->{current_element} = $td;
} elsif ($tag eq 'caption') {
my $cap = $attr;
$self->{STORE}->{current_table}->{caption} = $cap;
$self->{STORE}->{current_element} = $cap;
} else {
## Found a non-table related tag. Push it into the currently-defined td
## or th (if one exists).
my $elem = $self->{STORE}->{current_element};
if ($elem) {
$self->debug('TEXT(tag) = ', $origtext) if $DEBUG;
$elem->{data} .= $origtext;
}
}
$self->debug($origtext) if $DEBUG;
}
sub text
{
my ($self, $text) = @_;
my $elem = $self->{STORE}->{current_element};
if (!$elem) {
return undef;
}
$self->debug('TEXT = ', $text) if $DEBUG;
$elem->{data} .= $text;
}
sub end
{
my ($self, $tag, $origtext) = @_;
$tag = lc($tag);
# Turn off the current object
if ($tag eq 'table') {
$self->{STORE}->{current_table} = undef;
$self->{STORE}->{current_row} = undef;
$self->{STORE}->{current_data_cell} = undef;
$self->{STORE}->{current_header} = undef;
$self->{STORE}->{current_element} = undef;
} elsif ($tag eq 'th') {
$self->{STORE}->{current_row} = undef;
$self->{STORE}->{current_data_cell} = undef;
$self->{STORE}->{current_header} = undef;
$self->{STORE}->{current_element} = undef;
} elsif ($tag eq 'tr') {
$self->{STORE}->{current_row} = undef;
$self->{STORE}->{current_data_cell} = undef;
$self->{STORE}->{current_header} = undef;
$self->{STORE}->{current_element} = undef;
} elsif ($tag eq 'td') {
$self->{STORE}->{current_data_cell} = undef;
$self->{STORE}->{current_header} = undef;
$self->{STORE}->{current_element} = undef;
} elsif ($tag eq 'caption') {
$self->{STORE}->{current_element} = undef;
} else {
## Found a non-table related close tag. Push it into the currently-defined
## td or th (if one exists).
my $elem = $self->{STORE}->{current_element};
if ($elem) {
$self->debug('TEXT(tag) = ', $origtext) if $DEBUG;
$elem->{data} .= $origtext;
}
}
$self->debug($origtext) if $DEBUG;
}
sub parse
{
my ($self, $data) = @_;
$self->{STORE} = undef;
# Ensure the following keys exist
$self->{STORE}->{current_data_cell} = undef;
$self->{STORE}->{current_row} = undef;
$self->{STORE}->{current_table} = undef;
$self->SUPER::parse($data);
return $self->{STORE}->{tables};
}
sub debug
{
my ($self) = shift;
my $class = ref($self);
warn "$class: ", join('', @_), "\n";
}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
![[noevil] [noevil] [noevil]](/data/assets/smilies/noevil.gif)
Travis - Those who say it cannot be done are usually interrupted by someone else doing it; Give the wrong symptoms, get the wrong solutions;