#!/usr/bin/perl -w

use strict;
use Spreadsheet::ParseExcel;
use Data::Dumper;
use File::Basename;

sub usage {
    print "usage:\n
xls2csv.pl -i <input> -o <output> [-s 'separator'] [-d] [-n 'new newline'] [-m] [-n N]
convert a xls file to a csv file using ';' as separator (or 'sep' if specified)
Arg 'd' specifies if debug info should be sent to STDERR.
Arg 'm' specifies if multiple sheets should be written to different files.
Arg 'n' discards the first N rows of each sheet.
";
} 

sub getArgs {
	my @args=@_;
	my $oldKey="";
	my %opts=();
	my $el;
	for $el (@args) {
		if ($el=~/^-/) {
			$el=~s/-+//g;
			$opts{$el}="";
			$oldKey=$el;
		}
		else {
			$opts{$oldKey}=$el;
		}
	}
	return %opts;
}

my $sourcename;
my $destname;
my $nl=" ";
my $sep=";";
my %opts;
my $debug=0;
my $multiple=0;
my $discard_first_rows=0;

%opts=getArgs(@ARGV);

if (exists $opts{help}) {
	&usage();
	exit;
}
if (exists $opts{d}) {
	$debug=1;
}
if (exists $opts{n}) {
	$nl=$opts{n};
}
if ((!exists $opts{i}) || (!exists $opts{o})) {
	print STDERR "Missing 'i' option !\n" if (!exists $opts{i});
	print STDERR "Missing 'o' option !\n" if (!exists $opts{o});
	&usage();
	exit;
}
if (exists $opts{s}) {
	$sep=$opts{s};
}
if (exists $opts{m}) {
	$multiple=1;
}
if (exists $opts{n}) {
	$discard_first_rows=$opts{n} || 1;
}


$sourcename=$opts{i};
$destname=$opts{o};

my $source_excel = new Spreadsheet::ParseExcel;

my $source_book = $source_excel->Parse($sourcename)
	or die "Could not open source Excel file $sourcename: $!";

	my $storage_book;

foreach my $source_sheet_number (0 .. $source_book->{SheetCount}-1)
{
	my $source_sheet = $source_book->{Worksheet}[$source_sheet_number];

	print STDERR "--------- SHEET:", $source_sheet->{Name}, "\n" if ($debug);

# sanity checking on the source file: rows and columns should be sensible
	next unless defined $source_sheet->{MaxRow};
	next unless $source_sheet->{MinRow} <= $source_sheet->{MaxRow};
	next unless defined $source_sheet->{MaxCol};
	next unless $source_sheet->{MinCol} <= $source_sheet->{MaxCol};

        my $flag_discard_rows = $discard_first_rows;

	foreach my $row_index ($source_sheet->{MinRow} .. $source_sheet->{MaxRow})
	{
		if ($flag_discard_rows && $discard_first_rows)
		{
                    $flag_discard_rows--;
                    next;
                }

		foreach my $col_index ($source_sheet->{MinCol} .. $source_sheet->{MaxCol})
		{
			my $source_cell = $source_sheet->{Cells}[$row_index][$col_index];
			if ($source_cell)
			{
				my $Value = $source_cell->Value;
				if ($Value eq 'GENERAL')
				  {
					$Value = $source_cell->{Val};
				  }
				print STDERR "( $row_index , $col_index ) =>", $Value, "\n" if ($debug);
				$storage_book->{$source_sheet->{Name}}->{$row_index}->{$col_index} = $Value;
			} else {
				$storage_book->{$source_sheet->{Name}}->{$row_index}->{$col_index} = '';
			}
		} # foreach col_index
	} # foreach row_index
} # foreach source_sheet_number

print STDERR "Data format (sheet/row/column order):\n" if ($debug);
print Dumper $storage_book if ($debug);

print STDERR "\n\nSaving recognized data in $destname..." if ($debug);

open(DEST,">$destname") || die "Can't open $destname!" unless ($multiple);
foreach my $sheet (keys %$storage_book)
{
	if ($multiple)
	{
                my ($name,$path,$suffix) = fileparse($destname,'\..*');
		my $sheet_name = $sheet;
		$sheet_name =~ tr/A-Z /a-z_/;
		my $file = "${path}${name}_${sheet_name}${suffix}";
		open(DEST,">$file") || die "Can't open $file!";
	}

	foreach my $row (sort {$a <=> $b} (keys %{$storage_book->{$sheet}}))
	{
		my @lrow;
		foreach my $col (sort {$a <=> $b} (keys %{$storage_book->{$sheet}->{$row}}))
		{
			my $msg = $storage_book->{$sheet}->{$row}->{$col};
			$msg =~ s/\n/$nl/g;
			push @lrow,$msg;
		} # foreach column
		print DEST join($sep,@lrow);
		print DEST "\n";
	} # foreach row
	close(DEST) if ($multiple);
} # foreach sheet

print STDERR "done!\n" if ($debug);


