#!/usr/bin/perl -w

# Author: lh3

use strict;
use warnings;
use Getopt::Std;
use Cwd qw/abs_path/;
use Sys::Hostname qw/hostname/;

my $version = "0.1.8";

my %opts = (R=>'', j=>'', k=>0, q=>'', c=>'', w=>'', M=>'', n=>'', m=>'', W=>'');
getopts('R:j:k:q:c:pw:M:n:m:W:', \%opts);
&usage if (-t STDIN && @ARGV == 0);
my $is_run = ($opts{k} > 0 && $opts{j})? 1 : 0;
if (!$is_run) { # prepare to run
  my $is_sge = &which('bsub')? 0 : 1;
  $opts{j} ||= sprintf("asub_$$%.3d", rand(1000));
  my ($fh, $K);
  $K = 0;
  open($fh, ">$opts{j}.sh") || die;
  while (<>) {
	s/&\s*$//; # remove "&" at the end of the command
	next if (/^\s*$/);
	print $fh $_;
	++$K;
  }
  close($fh);
  $_ = $opts{j};
  my $cmd;
  $opts{c} = $K if ($opts{c} && $opts{c} > $K);
  $opts{q} = qq/-q $opts{q}/ if ($opts{q});
  if (!$is_sge) { # LSF
	$opts{R} = qq/-R "$opts{R}"/ if ($opts{R});
	$opts{c} = qq/\%$opts{c}/ if ($opts{c});
	$opts{w} = qq/-w "$opts{w}"/ if ($opts{w});
	$opts{n} = qq/-n $opts{n}/ if ($opts{n});
	$opts{m} = qq/-m $opts{m}/ if ($opts{m});
	$opts{W} = qq/-W $opts{W}/ if ($opts{W});
	if ($opts{n}) {
	  if ($opts{R}) {
		chop($opts{R}); # chop the tailing "
		$opts{R} .= qq/ span[hosts=1]"/;
	  } else {
		$opts{R} = qq/-R "span[hosts=1]"/;
	  }
	}
	if ($opts{M}) {
	  if ($opts{R}) {
		chop($opts{R}); # chop the tailing "
		$opts{R} .= qq/ select[mem>$opts{M}] rusage[mem=$opts{M}]"/;
	  } else {
		$opts{R} = qq/-R "select[mem>$opts{M}] rusage[mem=$opts{M}]"/;
	  }
	  $opts{R} .= ' -M ' . ($opts{M}*1000);
	}
	$cmd = qq(mkdir -p $_.out $_.err && echo '$0 -j $_.sh -k \${LSB_JOBINDEX}' | bsub -J$_)
	  . qq("[1-$K]$opts{c}" -o $_.out/\%I.out -e $_.err/\%I.err $opts{n} $opts{R} $opts{q} $opts{w} $opts{m} $opts{W});
  } else { # SGE
	$opts{R} = qq/-l "$opts{R}"/ if ($opts{R});
	$opts{w} = qq/-hold_jid $opts{w}/ if ($opts{w});
	my $asub = abs_path($0);
	$cmd = qq(mkdir -p $_.out $_.err && echo '$asub -j $_.sh -k \${SGE_TASK_ID}' | qsub -N $_ -cwd )
	  . qq(-t 1-$K -o '$_.out/\$TASK_ID.out' -e '$_.err/\$TASK_ID.err' $opts{R} $opts{q} $opts{w});
  }
  defined($opts{p})? (print "$cmd\n") : system($cmd);
} else { # run the command
  warn("[asub] Hostname: ".hostname.", Arch-OS: ".&get_cpu_sys."\n");
  my $fh;
  my $k = 0;
  open($fh, $opts{j}) || die;
  while (<$fh>) {
	chomp;
	last if ((++$k) == $opts{k});
  }
  my $cmd = $_;
  close($fh);
  warn("[asub] Command: $_\n");
  warn("[asub] --- BEGIN OF COMMAND STDERR ---\n");
  my $ret = system($_);
  warn("[asub] --- END OF COMMAND STDERR ---\n");
  warn("[asub] Return value: $ret\n");
  exit($ret);
}

sub get_cpu_sys {
  my $dir = `uname -p 2>/dev/null`;
  $dir = `uname -m 2>/dev/null` if (!$dir || $dir =~ "unknown");
  $dir .= '-'.`uname -s`;
  $dir = lc($dir);
  $dir =~ s/\s//g;
  return $dir;
}

sub which {
  my $file = shift;
  my $path = (@_)? shift : $ENV{PATH};
  return if (!defined($path));
  foreach my $x (split(":", $path)) {
	$x =~ s/\/$//;
	return "$x/$file" if (-x "$x/$file");
  }
  return;
}

sub usage {
  die qq(
Program: asub (Array Job submission with bsub/qsub)
Version: $version
Contact: Heng Li <lh3\@sanger.ac.uk>\n
Usage:   asub [options] <cmd_file>\n
Options: -R STR    resources string [null]
         -w STR    job dependency [null]
         -q STR    queue name [default queue]
         -j STR    job name [auto]
         -c INT    number of processors (LSF only) [max]
         -M INT    maximum memory in MB (Sanger LSF only) [null]
         -W INT    runtime limit (LSF only) [none]
         -n INT    # CPUs for a parallel job (LSF only) [null]
         -m STR    host group (LSF only) [null]
         -p        print the submission command only

Note: For option -R and -w, SGE and LSF have different syntax. Please
      check SGE/LSF manual page for details. Here are some examples for
      LSF:

        -R "select[type==X86_64&&mem>800] rusage[mem=800]"
        -w "done(my_job_001)"

      And for SGE:

        -R h_cpu=86400,h_data=1000000000
        -w my_job_001

);
}