#!/usr/bin/perl
#
#Utilitaire de parse de fichier HTML
#
# balise = % {
#			   texte => string,
#			   args => \%Args = { names => values }
#			   name => string
#			   type => [start | alone | end ]
#			   statut => [ open | closed ]
#			  }
#
#parseHTML = % {
#			   file => nom fichier,
#			   listeTag = \@ [ \%balise ]
#			   }


package parseHTML;

my $file;
my @parent;
my %alone;
my $debug;

%alone=qw{IMG 1 INPUT 1 OPTION 1 BR 1 HR 1 META 1 FRAME 1 PARAM 1 LINK 1};

#=============================
#
#	   SUB's

sub new {
	my $type = shift;
	my $debug=shift;
	my %params = @_;
	my $self = {};
	$self->{file}=$params{'file'} if ($params{'file'});
	$self->{listeTag}[0] = "";
	$self->{debug}=1 if (defined $debug);
	bless $self, $type;
} 

sub getBaliseType {
	# [ start | alone | end ]
	my $self=shift;
	my $str=shift;

	return 'alone' if (substr($str,-1) eq '/');
	return 'end' if (substr($str,0,1) eq '/');
	return 'comment' if (substr($str,0,3) eq '!--');
	return 'start';
}

sub getBaliseName_Args {
	# get Name
	my $self=shift;
	my $str=shift;
	my $name,$strArgs;

	($name,$strArgs)=($str=~/^[ |\/]*([^ ]*) *(.*)/);
	return (uc($name),$strArgs);
}

sub parseArgs {
    my $self=shift;
    my $strArgs=shift;
    my $elt,$value,$rightString;
    my %lArgs;

    print STDERR "parse args...$strArgs\n" if ($self->{debug});
    $strArgs=~s/^ *//;
    $strArgs=~s/\/*$//;
    $rightString=$strArgs;
    while ($rightString ne "") {
	print STDERR "Still parsing...($rightString)\n" if ($self->{debug});
	$rightString =~s/^ *//;
	$rightString =~s/ *$//;
	($elt,$rightString)=$self->readUntil($rightString,'[ |=]');
	if (! $elt) {print STDERR "Error while parsing ! (rightString=$rightString, elt=$elt)\n"; exit ;}
	($sep,$elt)=(substr($elt,-1),substr($elt,0,-1));
	if ($sep eq '=' ) {
	    my $del;
	    $del=substr($rightString,0,1);
	    if ($del =~ /[\'\"]/) {
		my $lastValue;
		($value,$rightString)=$self->readOne($rightString);
		($lastValue,$rightString)=$self->readUntil($rightString,$del);
		$value.=$lastValue;
	    } else {
		($value,$rightString)=$self->readUntil($rightString,'[ |>]');
		$value=~s/^ *//; $value=~s/ *$//; # Sans quote, se mefier des espaces
		$value='"'.$value.'"'; # on force les quotes !
	    }
	} else {
	    $elt.=$sep;
	    $value="";
	}
	$lArgs{$elt}=$value;
    }
    print STDERR "End parsing !\n" if ($self->{debug});
    return \%lArgs;
}

sub readOne {
	my $self=shift;
	return (substr($_[0],0,1),substr($_[0],1));
}

sub readUntil {
	my $self=shift;
	my $str=shift;
	my $char=shift;
	my $strInit;
	my $charRead;

	while (($charRead !~ /$char/) && ($str ne "")) {
		($charRead,$str)=$self->readOne($str);
		$strInit.=$charRead;
	}
	return ($strInit,$str);
}
  
sub getBalise {
	# construit une balise
	my $self=shift;
	my $str=shift;
	# On analyse la balise seulement :
	my %balise;
	my $strBal,$strArgs,$i;
	print STDERR "Ananlysing > $str <... \n" if ($self->{debug});
	($strBal,$str)=($str=~/<([^>]*)[\/]*>(.*)/);

	print STDERR "First result : balise=$strBal and text=$str \n" if ($self->{debug});

	$balise{type}=$self->getBaliseType($strBal);
	($balise{name},$strArgs)=$self->getBaliseName_Args($strBal);
	$balise{type}='alone' if ($alone{$balise{name}});

	print STDERR $balise{name}.":\n" if ($self->{debug});
	
	if ( ($balise{type} ne 'end') && ($balise{type} ne 'comment')) {
		$balise{args}=$self->parseArgs($strArgs);
	} 
	if ($balise{type} eq 'comment') {
		$balise{args}={substr($strArgs,0,-2) => "0"};
	}
	$str =~ s/   *//g;
	$balise{texte}=$str;
	return \%balise;
}

sub setFile {
	my $self=shift;
	my $file=shift;
	if (-e $file) {
		$self->{'file'}=$file;
	} else {
		return 0;
	}
	return 1;
}


sub parseFile {
	# construit la liste des balises
	my $self=shift;
	my $file,$longstr,@totfile;
	my $i=0;
	my @parent;
	my $curDepth=0;

	if ($_[0]) {
		$file=$_[0] if ($self->setFile($_[0]));
	} else {
		$file=$self->{'file'};
	}
		
	return 0 unless $file;

	open (F,$file);
	@totfile=<F>;
	close F;
	chomp(@totfile);
	$longstr=join('',@totfile);

	# Remplacement Chars genants :
	my $n=chr(10);
	$longstr =~ s/^J//g;
	$longstr =~ s/\t//g;
	
	@bal=($longstr=~/(<[^>]*>[^<]*)/g);
	

	for $elt (@bal) {
		my $ref_bal=$self->getBalise($elt);
		$self->{listeTag}[$i++]=$ref_bal;

		if ($ref_bal->{type} eq 'start') {
			$curDepth++;
			push (@{$self->{parent}},$ref_bal);
			$ref_bal->{statut}='open';
			$ref_bal->{depth}=$curDepth;
		}
		else {
			if (($ref_bal->{type} ne 'alone') && ($ref_bal->{type} ne 'comment')) { # $ref_bal->{type} eq 'end'
				my $ref_bal_parent;
				$ref_bal_parent=$self->getNearestParent($ref_bal);

				if ($ref_bal_parent->{name}) {
					$ref_bal_parent->{statut}='closed';
					$curDepth=$ref_bal_parent->{depth};
					$ref_bal->{depth}=$curDepth;
					$curDepth--;
				}
				else {
					$curDepth--;
				}
			}
			else {
				$ref_bal->{depth}=$curDepth+1;
			}
		}
	}
	return 1;
}



sub getNearestParent {
	my $self=shift;
	my $ref_bal=shift;
	my $ref_bal_parent;
	my $bal_name;
	my @localParent;

	#print STDERR "Looking for parents...\n";

	@localParent=@{$self->{parent}};
	$bal_name=$ref_bal->{name};
	
	do {
		$ref_bal_parent=pop(@localParent);
	} 
	until ((($ref_bal_parent->{name} eq $bal_name) && $ref_bal_parent->{statut} eq 'open') || scalar(@localParent)==0);
	return $ref_bal_parent;
}

sub printOneBal {
	# options :
	# notexte noindent nooption
	my $self=shift;
	my $ref_bal=shift;
	my $ecart=shift;
	my $ref_opt=shift;
	my $end;
	my @args;
	return unless $ref_bal;
	$ecart="" unless $ecart;

	print STDERR "printing Bal : name=".$ref_bal->{name}.", text : ".$ref_bal->{texte}."\n" if ($self->{debug});

	for $key (keys(%{$ref_bal->{args}})) {
		if ($ref_bal->{args}->{$key}) {
			push(@args,"$key=".$ref_bal->{args}->{$key});
		} else {
			push(@args,"$key");
		}
	}

	# Starting Bal
	if ($ref_bal->{type} eq 'start') {
		print "$ecart<".$ref_bal->{name};
		if (@args && ($ref_opt->{nooption}!=1)) {
			print " ".join(' ',@args);
		}
		print ">";
		print "".$ref_bal->{texte} unless ($ref_opt->{notexte}==1);
	}

	# Alone Bal
	if ($ref_bal->{type} eq 'alone') {
		print "$ecart<".$ref_bal->{name};
		if (@args && (! $ref_opt->{nooption})) {
			print " ".join(' ',@args);
		}
		print "/>";
		print "".$ref_bal->{texte} unless ($ref_opt->{notexte}==1);
	}

	# Comments 
	print $ecart."<".$ref_bal->{name}." ".$args[0]." -->" if ($ref_bal->{type} eq 'comment');

	# Ending Bal
	if ($ref_bal->{type} eq 'end') {
	    print $ecart."</".$ref_bal->{name}.">" ;
	    print "".$ref_bal->{texte} unless ($ref_opt->{notexte}==1);
	}
	
	return 1;
}

sub print {
	my $self=shift;
	my $ecart="	 ";
	for $ref_bal (@{$self->{listeTag}}) {
	$self->printOneBal($ref_bal,$ecart x $ref_bal->{depth});
	print "\n";
	}
	return 1;
}


sub extractBalName {
	my $self=shift;
	my $balName=shift;
	my @result;
	for $ref_bal (@{$self->{listeTag}}) {
	push (@result,$ref_bal) if ($ref_bal->{name} eq $balName);
	}
	return \@result;
}
	
sub extractBalBlock {
	my $self=shift;
	my $ref_lTag=shift;
	my @result;
	for $ref_bal (@{$self->{listeTag}}) {
		push (@result,$ref_bal) if (exists $ref_lTag->{$ref_bal->{name}});
	}
	return \@result;
}
	

sub printHash {
	my $self=shift;
	my $ref_h=shift;

	for $key (keys(%{$ref_h})) {
	print "$key => ".$ref_h->{$key}."\n";
	}
}

1;


