Can anyone help to increase the speed of my code snippet. I use the code to parse a web log that is formatted in any variation of the Common Log Format (CLF). It breaks every record into fields that I will use for analysis later.
Your help is appreciated.
Thank you.
Michael Libeson
Code:
$main::tlp = $main::readline;
$main::currentField = 0;
@main::line = ();
$main::spaceTest = 0;
$main::quoteTest = 0;
$main::squoteTest = 0;
$main::bracketTest = 0;
$main::parenTest = 0;
while (length($main::tlp) > 0) {
$main::c = substr($main::tlp,0,1);
$main::tlp = substr($main::tlp,1);
if ($main::spaceTest > 0) {
if ($main::c =~ /^\s/) {
### End of field.
$main::spaceTest = 0;
$main::tlp =~ s/^\s+//;
$main::currentField++;
} elsif ($main::c eq "\\") {
### Next character does not terminate field if ' '.
if ($main::tlp =~ /^\s/) {
$main::line[$main::currentField] .= $main::c . ' ';
} else {
$main::line[$main::currentField] .= $main::c;
}
} else {
### Still in current field.
$main::line[$main::currentField] .= $main::c;
}
} elsif ($main::quoteTest > 0) {
if (($main::c eq '"') && ($main::tlp =~ /^(\s+|$)/)) {
### End of field.
$main::line[$main::currentField] .= '"';
$main::quoteTest = 0;
$main::tlp =~ s/^\s+//;
$main::currentField++;
} elsif ($main::c eq "\\") {
### Next character does not terminate field if '"'.
if ($main::tlp =~ /^\"/) {
$main::line[$main::currentField] .= $main::c . '"';
$main::tlp =~ s/^\"//;
} else {
$main::line[$main::currentField] .= $main::c;
}
} else {
### Still in current field.
$main::line[$main::currentField] .= $main::c;
}
} elsif ($main::squoteTest > 0) {
if (($main::c eq '\'') && ($main::tlp =~ /^(\s+|$)/)) {
### End of field.
$main::line[$main::currentField] .= '\'';
$main::squoteTest = 0;
$main::tlp =~ s/^\s+//;
$main::currentField++;
} elsif ($main::c eq "\\") {
### Next character does not terminate field if '\''.
if ($main::tlp =~ /^\'/) {
$main::line[$main::currentField] .= $main::c . '\'';
$main::tlp =~ s/^\'//;
} else {
$main::line[$main::currentField] .= $main::c;
}
} else {
### Still in current field.
$main::line[$main::currentField] .= $main::c;
}
} elsif ($main::bracketTest > 0 ) {
if ($main::c eq ']') {
### End of field.
$main::line[$main::currentField] .= ']';
$main::bracketTest = 0;
$main::tlp =~ s/^\s+//;
$main::currentField++;
} elsif ($main::c eq "\\") {
### Next character does not terminate field if ']'.
if ($main::tlp =~ /^\]/) {
$main::line[$main::currentField] .= $main::c . ']';
$main::tlp =~ s/^\]//;
} else {
$main::line[$main::currentField] .= $main::c;
}
} else {
### Still in current field.
$main::line[$main::currentField] .= $main::c;
}
} elsif ($main::parenTest > 0 ) {
if ($main::c eq '(') {
### Next ')' character does not terminate field if '('.
$main::line[$main::currentField] .= '(';
$main::parenTest++;
} elsif ($main::c eq ')') {
### Check End of field.
if ($main::parenTest > 1) {
$main::line[$main::currentField] .= ')';
$main::parenTest--;
} else {
$main::line[$main::currentField] .= ')';
$main::parenTest = 0;
$main::tlp =~ s/^\s+//;
$main::currentField++;
}
} elsif ($main::c eq "\\") {
### Next character does not terminate field if ')'.
if ($main::tlp =~ /^\)/) {
$main::line[$main::currentField] .= $main::c . ')';
$main::tlp =~ s/^\)//;
} else {
$main::line[$main::currentField] .= $main::c;
}
} else {
### Still in current field.
$main::line[$main::currentField] .= $main::c;
}
} else {
if ($main::c =~ /^\s$/) {
if ($main::tlp !~ /^\s*[\"\'\[\(]/) { $main::spaceTest++; }
$main::tlp =~ s/^\s+//;
$main::currentField++;
} elsif (($main::c eq '"') && ($main::spaceTest == 0)) {
$main::quoteTest++;
$main::line[$main::currentField] = '"';
$main::tlp =~ s/^\s+//;
} elsif (($main::c eq '\'') && ($main::spaceTest == 0)) {
$main::squoteTest++;
$main::line[$main::currentField] = '\'';
$main::tlp =~ s/^\s+//;
} elsif (($main::c eq '[') && ($main::spaceTest == 0)) {
$main::bracketTest++;
$main::line[$main::currentField] = '[';
$main::tlp =~ s/^\s+//;
} elsif (($main::c eq '(') && ($main::spaceTest == 0)) {
$main::parenTest++;
$main::line[$main::currentField] = '(';
$main::tlp =~ s/^\s+//;
} else {
$main::spaceTest++;
$main::line[$main::currentField] .= $main::c;
}
}
}
Your help is appreciated.
Thank you.
Michael Libeson