#!/usr/local/bin/perl # BioMirror.pm # Bio-Mirror perl packages # d.gilbert # package main; ## tests # $debug= 1; # BioMirror::initEnv(); # BioMirror::usage(1); # BioMirror::SRS::install(); # BioMirror::perldoc(); # BioMirror::printData(); # BioMirror::printMirrors(); # BioMirror::printMirrorPacks(); # BioMirror::printMirrorPacks('ftp://bio-mirror.jp.apan.net/'); =head1 NAME BioMirror - manage biology databanks for Bio-Mirror use =head1 DESCRIPTION Manages biology databanks, including transport from Internet source or Bio-Mirror archive, and databank search indexing via calls to SRS software. See {http,ftp}://bio-mirror.net/ for BioMirror datasets. Uses these modules BioMirror::Data; BioMirror::DataBanks; BioMirror::Mirror; BioMirror::Mirrors; BioMirror::SRS; URI::URL; Local configuration BioMirror packages are loaded by loadPackages(). Put your BioMirror package files in ~/biomirror or ~/.biomirror to have them auto-loaded. =head1 AUTHOR D.G. Gilbert, Nov. 1999, software@bio.indiana.edu =head1 SOURCE http://bio-mirror.net/biomirror/software/biomirror/ =head1 METHODS =cut package BioMirror; # BEGIN { eval{ use lib '/Users/gilbertd/bio/perlib';}; } use lib '/Users/gilbertd/bio/perlib'; use BioMirror::Data; use BioMirror::DataBanks; use BioMirror::Mirror; use BioMirror::Mirrors; use BioMirror::SRS; use URI::URL; use Carp (); sub Version { $VERSION; } =head1 BioMirror::loadPackages() Utility method to load user-defined BioMirror packages. Requires any .pm or .pl file in a 'biomirror' directory. Called without option to initialize from @INC directory list. =cut # perly - put before BEGIN block to ensure we can call here sub loadPackages { my( $refdirlist, $level, $atdir)= @_; return if (!$level && $packsAreLoaded); $packsAreLoaded= 1; my @dirlist; if ($refdirlist) { @dirlist= @{$refdirlist}; } else { @dirlist= @INC; } foreach my $dir ( @dirlist ) { ## how to tell user dirs from system ? $dir ="$atdir/$dir" if ($atdir); $dir =~ s|/|:|g if ($^O =~ /Mac/i); next unless( -d $dir); # print STDERR "loadPackages $level '$dir'\n"; # if $debug; if ($dir =~ m/biomirror/i) { loadPacksFrom($dir); } elsif ($level<2) { local(*D); if (opendir(D,$dir)) { my @dlist= grep( !/^\./, readdir(D)); close(D); loadPackages( \@dlist, $level+1, $dir); ## limit recursion } } } } sub loadPacksFrom($) { my($dir)= @_; local(*DD); if (opendir(DD,$dir)) { my @dlist= grep( /\.(pl|pm)$/, readdir(DD)); close(DD); PMOD: foreach my $pl (@dlist) { ## ? assume it is a BioMirror:: package? test w/ grep of contents? ## ? can we get perl to tell us if $pl is already loaded ? foreach my $pm (@perlmods) { next PMOD if ( $pl eq $pm ); } # print STDERR "requiring package $pl\n"; # if $debug; ## need to put $dir into @INC then require $pl w/o dir ! ## -- make simpler assumption that @INC already contains any user folder? eval { # print "require $dir/$pl; \n"; # if $debug require "$dir/$pl"; push( @perlmods, $pl); }; warn $@ if ($@); } } } =head1 BioMirror::BEGIN() Default settings and unix system commands. Locally configurable via the BioMirror/local-env.pm file. =cut BEGIN { eval{ use lib '/Users/gilbertd/bio/perlib';}; $VERSION = "1.0"; @perlmods= qw( BioMirror.pm Data.pm DataBanks.pm Mirror.pm Mirrors.pm SRS.pm ); $SourceUrl= 'http://bio-mirror.net/biomirror/software/biomirror/'; $AgentID= "BioMirror/$VERSION (bio-mirror.net)"; ## these are site-configs by loading local config BioMirror-env.pl/BioMirror:: $debug= 0; $debug= $main::debug if ($main::debug); $SRSRoot= undef; $zpath = '/bio/biomir-pub/biomirror'; $dpath = '/bio/data'; $doffpath = '/bio/data-offline'; $Logfile= "/tmp/biomirror$$.log"; $ArchiveDbInfo = '$zpath/docs/about-databanks.txt'; # auto-update this doc, if exists $ArchiveDbSummary = '$zpath/docs/databanks-table.html'; # auto-update this doc, if exists ## define Unix system methods $Zcat= 'zcat'; # 'gzcat' can do also? $Gzcat= 'gzcat'; # 'zcat' sometimes $CopyFileDate= 'touch -r'; ## 'touch -r oldfile newfile' ## when archive file is uncompressed $CopyUncompressedArchiveFile= '/bin/cp -fp'; ## or '/bin/ln -s' ?? $Diskusage= '/usr/bin/du -sk '; $FtpMirror= '/usr/local/lib/mirror/mirror.pl ';##?? ## this is default install for version 2.9 ## $FtpMirror= '/usr/local/sbin/mirror'; $doMakedirs= 1; # make archive_dir, expanded_dir if needed ? site-config $Makedirs= 'mkdir -p'; $logout = 1; $view= 0; $forceindex= 0; $noupdate= 0; $doSrsCheck= 1; if ($ENV{BIOMIRROR_HOME}) { $appdir= $ENV{BIOMIRROR_HOME}; } else { $appdir= $ENV{'PWD'}; } ## this can be bad if we call from outside of BIOMIRROR_HOME ! unshift(@INC, $appdir); ## before chdir, need to $appdir in @INC for requires unshift(@INC,"$appdir/BioMirror") if (-d "$appdir/BioMirror"); unshift(@INC, "$ENV{'HOME'}/.biomirror") if (-d "$ENV{'HOME'}/.biomirror"); unshift(@INC, "$ENV{'HOME'}/biomirror") if (-d "$ENV{'HOME'}/biomirror"); loadPackages(); ## need to define before BEGIN block ! } #-- data sub getDataclass( $) { my $classname= shift; my %hash= datahash(); my $classob= $hash{$classname}; if (!defined $classob) { ## try caseless foreach (keys %hash) { if (/$classname/i) { return $hash{$_}; } } } return $classob; } sub datakeys() { my %hash= datahash(); return (sort keys %hash); } sub datalist() { if (!defined @datalist) { @datalist = BioMirror::Data::elements(); } return @datalist; } sub datahash() { if (!defined %datahash) { my @list= datalist(); foreach $obj (@list) { my $classname= ref($obj); $datahash{$classname}= $obj; } } return %datahash; } sub getDataFromSrsname( $) { my $srsname = uc(shift); my @list= datalist(); foreach $obj (@list) { my $srsdb= uc($obj->getSrsdb()); return $obj if ($srsdb eq $srsname); } return undef; } #-- mirrors sub getMirrorclass( $) { my $classname= shift; my %hash= mirrorhash(); my $classob= $hash{$classname}; if (!defined $classob) { ## try caseless foreach (keys %hash) { if (/$classname/i) { return $hash{$_}; } } } return $classob; } sub mirrorkeys() { my %hash= mirrorhash(); return (sort keys %hash); } sub mirrorlist() { if (!defined @mirrorlist) { @mirrorlist = BioMirror::Mirror::elements(); } return @mirrorlist; } sub mirrorhash() { if (!defined %mirrorhash) { my @list= mirrorlist(); foreach $obj (@list) { my $classname= ref($obj); $mirrorhash{$classname}= $obj; } } return %mirrorhash; } sub getMirrorFromName( $) { my $name = uc(shift); my @list= mirrorlist(); foreach $obj (@list) { my $id= uc($obj->id()); return $obj if ($id =~ m/$name/); $id= uc($obj->country()); return $obj if ($id =~ m/$name/); } return undef; } #---------- sub getSrsSections() { if (!defined %srssections) { my @list= datalist(); foreach $obj (@list) { ## my $dosrs= (($obj->makeflags & $BioMirror::Data::kDoSrsIndex) != 0); ## ^^ we are overloading this srssections to mean any data sections now !! ## can't screen here on whether to make srs or not my $srsdb= uc( $obj->getSrsdb() ); ## or ->get_srsdb() or ->srsdb ## keep uc() to hash right? $srssections{$srsdb}= $obj if ($srsdb); push( @srssections, $srsdb); } } return %srssections; } =head1 BioMirror::main() main entry for updating databanks. Call from other scripts, or for usage help C run with arguments C =cut sub main { ##? local @ARGV = @_; my($doArchiveSummary,$doArchiveInfo,$useMirror,$doFetchIcarus, $doEditdbi,$doMakesrsdb,$doAllSrsDbs,$doSrsServers,$doSRSfiles, $doSrsCheck,$doSrssection,$doSRSinfo,$doSetrelease, $doLDIF, $doMirrorPacks, $dodoc,$dohelp, ); my @argenv= (); # my @ARGV= @_ || @main::ARGV; if (scalar(@_)) { @saveargs= @ARGV; push(@ARGV,@_); } my $callline= "# BioMirror::main( ".join (",",@ARGV)." )\n"; usage() if ($#ARGV < 0); require Getopt::Long; # but dang, this doens't allow opt packing: -DNRfhsv $Getopt::Long::autoabbrev= 1; $Getopt::Long::ignorecase= 0; $Getopt::Long::order= $Getopt::Long::PERMUTE; $optokay= Getopt::Long::GetOptions( D => \$debug, 'debug!' => \$debug, 'E=s@' => \@argenv, 'env=s@' => \@argenv, N => \$noupdate, R => \$doSetrelease, 'release!' => \$doSetrelease, 'force!' => \$forceindex, 'help!' => \$dohelp, 'doc' => \$dodoc, 'package:s' => \$doMirrorPacks, 'ldif:s' => \$doLDIF, 'info!' => \$doSRSinfo, 'view!' => \$view, 'logout!' => \$logout, 'srssection!' => \$doSrssection, 'srscheck!' => \$doSrsCheck, 'srsfiles!' => \$doSRSfiles, 'srsservers' => \$doSrsServers, 'srsalldbs' => \$doAllSrsDbs, 'makesrsdb:s' => \$doMakesrsdb, 'editdbi:s' => \$doEditdbi, 'geticarus' => \$doFetchIcarus, 'domirrordb' => \$doMirrorDb, 'mirror=s' => \$useMirror, 'noarcdoc!' => \$noarcdoc, 'archinfo:s' => \$doArchiveInfo, 'archsum:s' => \$doArchiveSummary, ); usage($dodoc) if ($dohelp || $dodoc || !$optokay); $view= 1 if ($doSRSinfo||$doSRSfiles); $logout = 0 if ($view); foreach $ae (@argenv) { my ($key,$val)= split(/=/,$ae); $ENV{$key}= $val; } initEnv(); BioMirror::SRS::init($SRSRoot); getSrsSections(); $useMirror= getMirrorFromName($useMirror) if ($useMirror); $doupdate= !(defined($doMirrorPacks) || $doSRSinfo || $doSRSfiles); $didsrscheck= 0; ## flag to run checkAll this way? ## change these if/else to %cmd list as per meow/flybase datagen ? if (defined($doMirrorPacks)) { if ($useMirror) { printMirrorPacks($useMirror->ftp); } else { printMirrorPacks($doMirrorPacks); } return $error; } if (defined($doLDIF)) { getPackageLDIF($doLDIF); return $error; } if (defined($doMakesrsdb)) { return makeSrsdb($doMakesrsdb); } if (defined($doFetchIcarus)) { return fetchSrsIcarus(@ARGV); } if (defined($doSrsServers)) { return printSrsServers(); } if (defined($doAllSrsDbs)) { return printAllSrsDbInfo(@ARGV); } if (defined($doArchiveInfo)) { return printArchiveDbInfo($doArchiveInfo); } if (defined($doArchiveSummary)) { return printSummaryTable($doArchiveSummary,$useMirror); } ##? hash dbs, but process in order requested in ARGV ... my @dbs= @ARGV; ## whatever remains is list of databank sections? my %dbhash; my @dborder; while (@dbs) { $_= uc( shift(@dbs) ); ## keep uc() for srsdb case compat ## print STDERR "Arg: $_\n" if $debug; my $db= $_; if ($db eq 'ALL') { $doall= 1; # special case for auto-updating %dhash= %srssections; push( @dborder, @srssections); } else { my $noob= 1 if ($db =~ s/^NO[_-]//); my $obj= getDataFromSrsname($db); if ($obj) { if ($noob) { delete $dhash{$db}; # @dborder= grep( ne $db, @dborder); @dborder= map{ $_ ne $db ? $_ : () } @dborder; } else { $dhash{$db}= $obj; push( @dborder, $db); } } } } #?? do this earlier - after usage() ? if ($logout) { close(STDERR); open(STDERR, ">> $Logfile"); select(STDERR); $|= 1; close(STDOUT); open(STDOUT, "> $Logfile"); select(STDOUT); $|= 1; } print STDERR $callline; if ((!defined($doSrssection) && $doupdate) || $doSrssection) { print STDERR "SRS::section()\n" if $debug; BioMirror::SRS::section(); } print STDERR "processing db sections: @dborder\n" if $debug; while (@dborder) { $_= uc( shift(@dborder) ); my $obj= $dhash{$_}; next unless ($obj); delete $dhash{$_}; ## my $dosrs= (($obj->makeflags & $BioMirror::Data::kDoSrsIndex) != 0); ## ?? respect $dosrs flag ? if ( $doSRSinfo ) { print BioMirror::SRS::info($_); } elsif ( $doSRSfiles ) { checkSrsData($_, $obj); } elsif ( $doMirrorDb ) { my $mirrorurl= ($useMirror) ? $useMirror->ftp : ''; my $testmirror= $view; $view= 0; $error= mirrorDatabank( $_, $mirrorurl, $testmirror); $view= $testmirror; } elsif ( $doSetrelease ) { setRelease($obj); } ## is -release flag exclusive of updateData ? elsif (defined($doEditdbi)) { updateDbFiles($obj, $doEditdbi); } elsif ( $doupdate ) { updateData( $obj, $doSetrelease, $doSrsCheck); } ##? update options? ## updateData- add dataobj methods to do updateDbFiles - esp. for genbanknew & genbank } if ($doupdate && !$noarcdoc && !$error) { $error= BioMirror::SRS::checkAll() if ($doall && $didsrscheck); ##? replace individual lib checks w/ checkAll? if ($view) { print "printArchiveDbInfo(UPDATE)\n"; } else { printArchiveDbInfo('UPDATE.html'); printArchiveDbInfo('UPDATE'); printSummaryTable('UPDATE.html',$useMirror); } } return $error; } =head1 BioMirror::checkSrsData($srsdb,$dataobj) Compare available data files to those known to SRS =cut sub checkSrsData { local($srsdb,$dataobj)= @_; startSub("checkSrsData",@_); # loadPackages(); print "Section $srsdb = ".$dataobj->comment()."\n"; my @sfiles= BioMirror::SRS::getDataPathnames($_); my $spath= shift(@sfiles); @sfiles= sort @sfiles; print "SRS data \t$spath\n"; ## print "\tfiles: @sfiles\n"; my @afiles= $dataobj->getDataPathnames(); my $apath= shift(@afiles); @afiles= sort @afiles; print "Available data \t$apath\n"; ## print "\tfiles: @afiles\n"; ## print "Matching files\n"; print "Available\t SRS \t data files\n"; my $more= (@sfiles || @afiles); my $s= shift(@sfiles); my $a= shift(@afiles); while ($more) { $more= (@sfiles || @afiles); if ($a && ($a lt $s)) { print "$a\t -- \n"; $a= shift(@afiles); } elsif ($s && ($s lt $a)) { print " -- \t$s\n"; $s= shift(@sfiles); } else { print "$a\t$s\n"; $a= shift(@afiles); $s= shift(@sfiles); } } print "\n"; } =head1 BioMirror::fetchSrsIcarus($srsurl,$srsdb,$ifile,$outpath) Fetch icarus files from SRS server for given databank. $srsurl - SRS server url $srsdb - databank or 'all' $ifile - icarus file (i, it, is) or 'all' $outpath - put in this folder [ add methods to manage archive of these - diff to compare? ] =cut sub fetchSrsIcarus { # local @args= @_; # $error= BioMirror::SRS::fetchIcarusDoc(@args); local($srsurl,$srsdb,$ifile,$outpath) = @_; startSub("fetchSrsIcarus",@_); # loadPackages(); my $many= 1; my @ifile; if ($ifile && $ifile !~ /all/i) { @ifile= ($ifile); } else { $many= 1; @ifile= qw( i is it); } my @srsdb; if ($srsdb && $srsdb !~ /all/i) { @srsdb= ($srsdb); } else { @srsdb= BioMirror::SRS::getDblist($srsurl); $many= 1; } foreach $srsdb (@srsdb) { foreach $ifile (@ifile) { my $outfile; if ($many) { $outpath = '.' unless($outpath); my $fn= lc($srsdb); $outfile= "$outpath/$fn.$ifile"; } else { $outfile= $outpath; } print STDERR " fetchIcarusDoc($srsurl,$srsdb,$ifile,$outfile) \n" if ($view||$debug); $error= BioMirror::SRS::fetchIcarusDoc($srsurl,$srsdb,$ifile,$outfile) unless ($view); last if $error; } } return $error; } sub printSrsServers { print "SRS server list\n"; %srsservers= BioMirror::SRS::getSrsServers(); my ($sname, $surl); foreach $sname (sort keys %srsservers) { print "$sname \t $srsservers{$sname} \n"; } return 0; } sub printVal { ## generic - put somewhere else? local($val)= @_; if (ref($val) =~ /HASH/) { print '{ '; foreach (sort keys %$val) { my $kval= $$val{$_}; if (/\s/) { s/'/\\'/g; print "'$_' => "; } else { print "$_ => "; } printVal($kval); print ', '; } print " }\n"; } elsif (ref($val) =~ /ARRAY/ ) { print '( '; foreach (@$val) { printVal($_); print ', '; } print " )\n"; } else { $val =~ s/'/\\'/g; print "'$val'"; } # print "\n"; } =head1 BioMirror::printAllSrsDbInfo() Fetch and print SRS databank of databanks information about each public SRS databank, including documentation, available servers and their data release date. =cut sub printAllSrsDbInfo { my ($serverurl,$flags); my @getdbs= (); # loadPackages(); foreach (@_) { if ( m|http\:\/|i ) { $serverurl= $_; } elsif ( /^doc/i ) { $flags |= $BioMirror::SRS::kDocument; } elsif ( /^serv/i ) { $flags |= $BioMirror::SRS::kServers; } else { push(@getdbs,$_); } } # my %alldbs= BioMirror::SRS::allInfo( ); my %alldbs= BioMirror::SRS::allInfo( $serverurl, $flags, \@getdbs); my $mtime= POSIX::strftime("%A, %d %B %Y", localtime(time) ); print "#!/usr/local/bin/perl\n\n"; print "$title = 'List of Biology Databanks available thru SRS';\n"; print "$source= 'from $serverurl at $mtime';\n\n"; print "sub loadSrsDBs {\n"; foreach my $db (sort keys %alldbs) { print "## Databank $db ------------------\n"; my $val= $alldbs{$db}; ## printVal($val); print "\$alldbs{$db}= {\n"; my $v; my %dbh= %{$val}; if ($v= $dbh{name}) { print "name => "; printVal($v); print ",\n"; } if ($v= $dbh{servers}) { print "servers => \n"; printVal($v); print ",\n"; } if ($v= $dbh{doc}) { my @v= split(/\n/,$v); @v= @v[0..4] unless($flags & $BioMirror::SRS::kDocument); $v= join("\n",@v); # option pick max size! $v =~ s/\@/\\\@/g; print "doc => qq[\n"; print $v; print "],\n"; } print "\n};\n\n"; } print "\n} ##loadSrsDBs\n\n"; ## ? and this utility? print <$outf")||die "opening $outf"; print O "List of SRS Databanks\n"; print O "

List of Biology Databanks available thru SRS

\n"; print O "Fetched from $serverurl at $mtime

\n"; print O "\n"; foreach my $db (sort keys %alldbs) { my $sl= ''; my %ss= %{ $alldbs{$db}{servers} }; foreach my $s (keys %ss) { if ($s =~ /,\s*([^,]+)$/) { $sl .= ', ' if ($sl); $sl .= $1; } } my $doc= $alldbs{$db}{doc} ; if ($doc) { $doc =~ s/<[^>]+>//g; $doc= $1 if ($doc =~ /^([^\.:;]+)[\.:;]/); } print O "\n"; } print O "
DatabankServersAbout
$db$sl$doc
\n"; close(O); } EOF return $error; } =head1 BioMirror::perldoc() Print perl documentation of BioMirror:: modules, C or try 'perldoc modulefile'. =cut sub perldoc() { require Pod::Text; ## will need to use @INC to locate files ?? print "\nPerl documents of BioMirror modules ( @perlmods )\n\n"; foreach my $pm (@perlmods) { if (!-r $pm) { my $gotit= 0; foreach my $d (@INC) { if (-r "$d/$pm" ) { $pm= "$d/$pm"; $gotit= 1; last; } } if (!$gotit && -r "BioMirror/$pm" ) { $pm= "BioMirror/$pm"; } } print "[ $pm ]----------------------------------\n\n"; print Pod::Text::pod2text($pm) if (-r $pm); print "\n\n"; } } =head1 BioMirror::makeSrsdb($outfile) Make SRS srsdb.i configuration file for available databanks C $outfile -- optional output file (default is SRSDB:srsdb.i) =cut sub makeSrsdb { local($outfile) = @_; # loadPackages(); my @datalist= datalist(); print STDERR "BioMirror::SRS::makeSrsdb( $outfile, \@datalist)\n" if ($view||$debug); if (!$view) { $error= BioMirror::SRS::makeSrsdb( $outfile, @datalist); BioMirror::SRS::section(); } return $error; } sub updateDbFiles { local($dataobj, $outfile) = @_; startSub("updateDbFiles",@_); my $dosrs= (($dataobj->makeflags & $BioMirror::Data::kDoSrsIndex) != 0); return 0 unless($dosrs); my $srsdb= $dataobj->getSrsdb(); my @afiles= $dataobj->getDataPathnames(0); ## (1) gets all existing; (0) gets from @data pattern/list shift @afiles; ## drop 1st pathto -- need also drop non .seq ? print STDERR "BioMirror::SRS::updateDbFileList( $srsdb, @afiles, $outfile )\n" if ($view||$debug); if (!$view) { $error= BioMirror::SRS::updateDbFileList( $srsdb, \@afiles, $outfile ); BioMirror::SRS::section(); } return $error; } =head1 BioMirror::printMirrorPacks() Print package file for use with mirror.pl. With no argument, use source of data, or specify an ftp url. No path on url implies a Bio-Mirror path. C C or call from main C =cut sub printMirrorPacks { my($mirrorurl)= @_; startSub("printMirrorPacks",@_); # loadPackages(); ## need option to choose from obj->source or BioMirror/other source my @list= datalist(); foreach $obj (@list) { my $pack= $obj->getFtpMirrorPackage($mirrorurl); print "$pack\n" if ($pack); } } sub getPackageLDIF { my($dn)= @_; startSub("getPackageLDIF",@_); # loadPackages(); ## need option to choose from obj->source or BioMirror/other source my @list= datalist(); foreach $obj (@list) { my $pack= $obj->getPackageLDIF($dn); print "$pack\n" if ($pack); } } =head1 BioMirror::mirrorDatabank($dbname,$mirrorurl,$dotest,$packfile) Mirror a databank package. See BioMirror::DataBanks and C output packages. Requires mirror.pl be installed, and located by $FtpMirror variable. Install from ftp://sunsite.org.uk/packages/mirror/mirror.tar.gz $dbname -- name of databank (in BioMirror::DataBanks.pm) $mirrorurl -- ftp url for mirroring (default will mirror from databank source) $dotest -- test only (C) $packfile -- optional package file to get $dbname package or call from main as C to test view what would mirror: C =cut ## ## this and getFtpMirrorPackage should be designed to allow plug-in replacements ## to mirror.pl - e.g., rsync use, other mirroring software ## sub mirrorDatabank( $;$$$) { local($dbname,$mirrorurl,$dotest,$packfile) = @_; startSub("mirrorDatabank",@_); # loadPackages(); ## warn("Bad package '$dbname'") unless ($dbname); # require "$FtpMirror"; ##? can we require it in here, or need to callSystem() ? ## -- has no package or sub main() - need to set @ARGV for it... my $opt= ''; $opt= '-n' if ($dotest); #? set -Cconfig_file my $istemp= 0; unless ($packfile) { $packfile= "/tmp/biomirror$$.pack"; my $obj= getDataFromSrsname($dbname); my $pack= $obj->getFtpMirrorPackage($mirrorurl); unless ($pack) { Carp::carp("No mirror package for $dbname"); $error= 1; return $error; } unless (open(T,">$packfile")) { Carp::carp("Can't make $packfile"); $error= 1; return $error; } print T "$pack\n"; close(T); $istemp= 1; } my $packname= lc($dbname); #? always or get from $obj? $error= callSystem("$FtpMirror $opt -p$packname $packfile"); unlink $packfile if ($istemp); return $error; } =head1 BioMirror::printData() Print databank class info C =cut sub printData { # loadPackages(); @dkeys= BioMirror::datakeys(); print "\@BioMirror::datakeys() = qw( "; foreach (@dkeys) { print "$_\t"; } print " );\n"; print "\n# BioMirror::datahash() key/values:\n"; %dhash= BioMirror::datahash(); foreach (sort keys %dhash) { my $obj= $dhash{$_}; print $obj->toString() . "\n\n"; } } =head1 BioMirror::printSummaryTable($outfile, $mirrorObj) Write summary table of databanks Local mirror Description Home site name desc name $outfile - optional output file, or UPDATE to update $ArchiveDbSummary $mirrorObj -- local mirror server object =cut sub printSummaryTable { my($outfile,$mirrorObj)= @_; my $dohtml= 1; my $doc= getSummaryTable( $mirrorObj); if ($outfile =~ m/^UPDATE/i) { ##? my $aout= replaceVars($ArchiveDbSummary); ## $aout =~ s/\.\w+$/.html/; if (-w $aout) { $outfile = $aout; } elsif (! -e $aout) { my $adir= $aout; $adir =~ s|/[^/]+$||; $outfile = $aout if (-d $adir); } print STDERR "Update SummaryTable to $outfile\n" if ($view||$debug); $outfile= '' if ($view); } if ($outfile) { open(SAVEOUT,">&STDOUT"); open(STDOUT, ">$outfile"); } print $doc; if ($outfile) { close(STDOUT); open(STDOUT, ">&SAVEOUT"); } return 0; } sub getSummaryTable { my $mirrorObj= shift; startSub("getSummaryTable",@_); unless ($mirrorObj) { if ($thisBioMirror) { $mirrorObj= $thisBioMirror; } else { $mirrorObj= new BioMirror::IUBio_USA(); } } $mirrorurl= $mirrorObj->ftp(); # $mirname= $mirrorObj->name; # $mirweb = $mirrorObj->web; # $srsserver= $mirrorObj->srsserver; my $table =< Local mirror Description Home site TEOF foreach my $klass ( BioMirror::datakeys()) { my $obj= BioMirror::getDataclass($klass); $tabrow= $obj->getSummaryHtml($mirrorurl); $table .= $tabrow if ($tabrow); } $table .= < TEOF return $table; } =head1 BioMirror::printArchiveDbInfo($outfile,$dohtml) Print pretty list summarizing local archive of data, including total size, up-date, summary line of data. Section Size(Mb) Update Databank source blast 2159 29-Nov-1999 Blast DB from NCBI blocks/data-blocks 5 25-Nov-1999 BLOCKS from NCBI $outfile - optional output file, or UPDATE to update $ArchiveDbInfo =cut sub printArchiveDbInfo { my($outfile,$dohtml)= @_; $dohtml= 1 if ($outfile =~ m/\.html$/i); my $doc= getArchiveDbInfo($dohtml); if ($outfile =~ m/^UPDATE/i) { ##? my $aout= replaceVars($ArchiveDbInfo); $aout =~ s/\.\w+$/.html/ if ($dohtml); if (-w $aout) { $outfile = $aout; } elsif (! -e $aout) { my $adir= $aout; $adir =~ s|/[^/]+$||; $outfile = $aout if (-d $adir); } print STDERR "Update ArchiveDbInfo to $outfile\n" if ($view||$debug); $outfile= '' if ($view); } if ($outfile) { open(SAVEOUT,">&STDOUT"); open(STDOUT, ">$outfile"); } print $doc; if ($outfile) { close(STDOUT); open(STDOUT, ">&SAVEOUT"); } return 0; } sub getArchiveDbInfo { my($dohtml)= @_; startSub("getArchiveDbInfo",@_); require POSIX; my $srsserver= ''; #? assume local host my ($mirname,$mirweb); if ($thisBioMirror) { $mirname= $thisBioMirror->name; $mirweb = $thisBioMirror->web; if ($thisBioMirror->srsserver) { $srsserver= $thisBioMirror->srsserver; } else { $srsserver= $mirweb; } #? or use null } my $fmt= "%-18s %9.0f %10s %s\n"; my %mdone= (); foreach my $klass (BioMirror::datakeys()) { my $obj= BioMirror::getDataclass($klass); next if ( ($obj->sourceflags & $BioMirror::Data::kSuperSource) != 0); my $mdir= BioMirror::replaceVars( $obj->mirror_dir ); next unless (-d $mdir); next unless ($mdir =~ m/^$zpath/); ## not part of biomirror archive next if $mdone{$mdir}; #? how to list BlastDB/BlastDB Daily, etc. ? ## $mdone{$mdir}= 1; my $disku= `$Diskusage $mdir`; ($disku)= $disku =~ m/^\s*(\d+)/; $disku = int((1023+$disku) / 1024); # from kb to mb - add commas ? decimals ? my @mtimes; local(*D); if (opendir(D,$mdir)) { while (my $f= readdir(D)) { next if ($f =~ /^\./); my @st= stat( "$mdir/$f"); push (@mtimes, $st[9]); } closedir(D); } @mtimes= sort {$b <=> $a} @mtimes; my $mtime= POSIX::strftime("%d-%b-%Y", localtime($mtimes[0]) ); my $comment= ${$obj->source}{comment}; #? my $name= $obj->getName(); my $dosrs= (($obj->makeflags & $BioMirror::Data::kDoSrsIndex) != 0); my $srsdb= $obj->getSrsdb(); my $mname= $mdir; $mname =~ s|^$zpath/?||; $mname =~ s|/$||; if ($dohtml) { $mdoc= "$mname$disku$mtime"; my $weburl= ${$obj->source}{web}; if ($weburl) { $mdoc .= "$comment"; } else { $mdoc .= "$comment"; } if ($dosrs) { my $srsurl= BioMirror::SRS::libInfoUrl($srsserver,$srsdb); $mdoc .= "$srsdb"; } $mdoc .= "\n"; $mdone{$mdir}= $mdoc; } else { $fmt= "%-18s %9.0f %12s %s\n"; # my $mdoc= sprintf $fmt, $mname, $disku, $mtime, $comment; my $mdoc= sprintf "%-18s", $mname; ## fix for some longer than this my $n= 9; if (length($mdoc)>18) { $n= 18+$n - length($mdoc); } my $xfmt= " %${n}.0f %12s %s\n"; $mdoc .= sprintf $xfmt, $disku, $mtime, $comment; $mdone{$mdir}= $mdoc; } } my $totaldisku= `$Diskusage $zpath`; ($totaldisku)= $totaldisku =~ m/^\s*(\d+)/; $totaldisku = int((1023+$totaldisku) / 1024000); # from kb to GB my $mtime= POSIX::strftime("%A, %d %B %Y", localtime(time) ); my ($title, $doc); if ($mirname) { $title= "$mirname, Databank status"; } else { $title= "Bio-Mirror Databank status"; } if ($dohtml) { $doc = < $title

$title

$totaldisku Gigabytes (compressed)
$mtime

TEOF } else { $doc = " $title\n"; $doc .= " $totaldisku Gigabytes (compressed)\n"; $doc .= " $mtime\n"; $doc .= "----------------------------------------------------------------\n"; my $hfmt= $fmt; $hfmt =~ s/.0f/s/g; $doc .= sprintf $hfmt, 'Section', 'Size(Mb)', 'Updated', 'Databank source'; $doc .= sprintf $hfmt, '---------------', '--------', '--------', '----------------'; } foreach (sort keys %mdone) { $doc .= $mdone{$_}; } if ($dohtml) { $doc .= "
SectionSize (Mb)Update Databank sourceLocal SRS
\n"; } return $doc; } =head1 BioMirror::printMirrors() Print mirror class info C =cut sub printMirrors { # loadPackages(); @dkeys= BioMirror::mirrorkeys(); print "\@BioMirror::mirrorkeys() = qw( "; foreach (@dkeys) { print "$_\t"; } print " );\n"; print "\n# BioMirror::mirrorhash() key/values:\n"; %dhash= BioMirror::mirrorhash(); foreach (sort keys %dhash) { my $obj= $dhash{$_}; print $obj->toString() . "\n\n"; } } sub usage { local($details,$outfile)= @_; if ($outfile) { close(STDOUT); open(STDOUT, ">$outfile"); } getSrsSections(); my @mirnames= BioMirror::mirrorkeys(); for (my $i=0; $i<=$#mirnames; $i++) { $mirnames[$i] =~ s/^\w+:://; } print <getData(); print "$klass =\t" .$obj->getName() ." :\t" .$obj->getSrsdb() ." :\t".join(', ', @$data) ."\n"; } print "\nMirror_class =\tName :\tFTP :\tWeb\n"; foreach $klass (BioMirror::mirrorkeys()) { my $obj= BioMirror::getMirrorclass($klass); print "$klass =\t" .$obj->getName() ." :\t" .$obj->getFtp() ." :\t" .$obj->getWeb() ."\n"; } print "\nEnviron settings\n"; dumpEnv(); perldoc(); } exit(0); } sub timeit { return if (!$dotime); local($tstart)= @_; $tstart= $tat if (!$tstart); $tend= (times)[0]; printf " time=%.2f secs\n", $tend - $tstart; $tat= $tend; } sub dumpEnv { foreach (sort keys %ENV) { print "$_=$ENV{$_}\n"; } } sub startSub { local($name,@parms)= @_; $insub= $name; print STDERR "\nsub $insub(@parms)\n" if ($view||$debug); } sub testErr { local($err,$msg)= @_; ## die "$insub() error= $err, $msg\n" if ($err); Carp::croak "$insub() error= $err, $msg\n" if ($err); } sub replaceVars { local($eval)= @_; my $oval= $eval; while ($eval =~ m/(\$\w+)/) { my $val= eval($1); $eval =~ s/(\$\w+)/$val/; # print STDERR "eval($oval) => $eval\n" if ($debug); } return $eval; } ## removeVars == $eval =~ s/(\$\w+)//g; sub initEnv { # startSub("initEnv",@_); # $ENV{'JAVA_HOME'}= "/usr/java" unless ($ENV{'JAVA_HOME'}); # $ENV{'CLASSPATH'}= "$ENV{JAVA_HOME}/lib/classes.zip" unless ($ENV{'CLASSPATH'}); $ENV{'SERVER_PATH'}= "/bio/server" unless ($ENV{'SERVER_PATH'}); $SERVER_PATH= $ENV{'SERVER_PATH'}; $dpath= $ENV{'BIODATA_PATH'} if ($ENV{'BIODATA_PATH'}); $doffpath= $ENV{'BIODATAOFF_PATH'} if ($ENV{'BIODATAOFF_PATH'}); if (! -d $dpath) { print STDERR "BIODATA_PATH: need to mkdir $dpath\n"; Carp::warn("didn't mkdir $dpath: $!") unless ($doMakedirs && mkdir($dpath, 0777)); } $ENV{'BIODATA_PATH'}= $dpath; $ENV{'BIODATAOFF_PATH'}= $doffpath; ## dang - $zpath may be a remote URL $zpath= $ENV{'BIOMIRROR_PATH'} if ($ENV{'BIOMIRROR_PATH'}); if ($zpath) { my $isurl= ($zpath =~ m|://|); ##? check for file:/ ? if (!$isurl && ! -d $zpath) { print STDERR "BIOMIRROR_PATH: need to mkdir $zpath\n"; Carp::warn("didn't mkdir $zpath: $!") unless ($doMakedirs && mkdir($zpath, 0777)); } $ENV{'BIOMIRROR_PATH'}= $zpath; } ## ?? this should be in BioMirror::SRS::init() - need user pref not to require/use SRS # $SRSRoot= $ENV{'SRSROOT'} if ($ENV{'SRSROOT'}); $appdir= $ENV{'PWD'}; push(@INC,$appdir); ## before chdir, need to $appdir in @INC for requires ## chdir($workpath) || die "chdir $workpath: $!"; ## may not need, but do for safety ## dumpEnv() if ($dorun && $debug); loadPackages(); } sub callSystem { local( $cmd)= @_; my $error= 0; if ($view||$debug) { print STDERR "system( $cmd )\n"; } if (!$view) { $error= system("$cmd"); } return $error; } sub remoteArchiveToExpandedFile( $ $) { local($archiveurl, $expandedf)= @_; startSub("remoteArchiveToExpandedFile",@_); $error= 0; if ( $forceindex || isTargetOlderThanUrl($archiveurl, $expandedf)) { ## work out net transport - use mirror.pl if available ? ## use NET.pm, FTP.pm, which HTTP.pm? } return $error; } sub copyArchiveToExpandedFile( $ $ $) { local($archivef, $expandedf, $onlinef)= @_; # startSub("copyArchiveToExpandedFile",@_); $error= 0; my $zkind= 0; ZKIND: { if (-r $archivef) { if ($archivef =~ /.Z$/) { $zkind= 1; last ZKIND; } if ($archivef =~ /.gz$/) { $zkind= 2; last ZKIND; } else { $zkind= 0; last ZKIND; } } if (-r "$archivef.Z") { $zkind= 1; $archivef = "$archivef.Z"; last ZKIND; } if (-r "$archivef.gz") { $zkind= 2; $archivef = "$archivef.gz"; last ZKIND; } Carp::carp "can't read '$archivef' \n"; return $error; } my $doit= $forceindex; unless($doit) { $doit= isOldTarget($archivef, $onlinef) if (-r $onlinef); $doit= isOldTarget($archivef, $expandedf) unless(-r $onlinef && !$doit); # if ($doit && -r $expandedf); } if ( $doit ) { if ($doMakedirs) { # make path to $expandedf if need be my $epath= $expandedf; my $e= rindex($epath,"/"); if ($e>0) { $epath= substr($epath,0,$e); $error= callSystem("$Makedirs $epath") unless ( -d $epath); } } callSystem("chmod u+w $expandedf") #? unlink $expandedf if (-r $expandedf); if ($error) { } elsif ($zkind == 1) { $error= callSystem("$Zcat $archivef > $expandedf"); $error= callSystem("$CopyFileDate $archivef $expandedf") if ( !$error && -e $expandedf ); } elsif ($zkind == 2) { $error= callSystem("$Gzcat $archivef > $expandedf"); $error= callSystem("$CopyFileDate $archivef $expandedf") if ( !$error && -e $expandedf ); } else { $error= callSystem("$CopyUncompressedArchiveFile $archivef $expandedf"); } } elsif ($view && !$debug) { print STDERR " no\n"; } return $error; } sub updateData( $ $; $) { local($dataobj, $dosetrelease, $dosrscheck )= @_; startSub("updateData",@_); local $didupdate= 0; ## updateData- add dataobj methods to do updateDbFiles - esp. for genbanknew & genbank my $dosrs= $dosrscheck && (($dataobj->makeflags & $BioMirror::Data::kDoSrsIndex) != 0); if (!$noupdate) { $error= $dataobj->updateArchiveToExpanded(); if (!$error && $dosrs) { my $srsdb= $dataobj->getSrsdb(); # my $dpath= BioMirror::replaceVars( $dataobj->expanded_dir ); # my @data= @{$dataobj->getData()}; ##^!! @data can be regex - fix getData !! my @data= $dataobj->getDataPathnames(); my $dpath= shift(@data); $didsrscheck++; ## flag to run checkAll this way? ##?? ::SRS::forceindex($forceindex); $didupdate= BioMirror::SRS::check($srsdb, $dpath, @data); ## ^^ need fix for $srsdb == genbanknew but icarus files == genbank.i/.is ## ? do a symlink? store a variable in dataclass ? $dosetrelease ||= $didupdate; } } setRelease($dataobj) if ($dosrs && $dosetrelease); } sub setRelease( $) { local($dataobj)= @_; startSub("setRelease",@_); my $dosrs= (($dataobj->makeflags & $BioMirror::Data::kDoSrsIndex) != 0); if ($dosrs) { BioMirror::SRS::setRelease( $dataobj->getSrsdb(), $dataobj->getRelease()); } } sub saveOldFile( $) { local($file) = @_; if (-e $file) { my $oldf= "$file.old"; print STDERR "rename ($file, $oldf) \n" if ($debug|$view); rename ($oldf, "${oldf}er") if (-e $oldf); rename ($file, $oldf); } } sub isTargetOlderThanUrl( $ $) { local($source,$target) = @_; my $res= 0; # use http HEAD, ftp getfiletime... print STDERR "isTargetOlderThanUrl() not ready\n"; print STDERR (($res) ? "yes\n" : "no\n") if $debug; return $res; } sub getLinkOriginal($) { local($source) = @_; my $rsource= readlink($source); return $source unless ($rsource); if ($rsource =~ m/^\.\./) { my $at= rindex( $source,'/'); $rsource= substr($source,0,$at) . '/' . $rsource; } return $rsource; } sub isOldTarget( $ $) { # usage: if (isOldTarget( $sourcefile, $targetfile)) { blah; } local($source,$target) = @_; # print STDERR "isOldTarget: $target older than $source? " if $debug; my $res= 0; $target= getLinkOriginal($target) if ( -l $target ); if (! -r $target) { $res= 1; } ## check existing $target else { my $targtime= -M $target; ## -M is file age in days.hrs before now ## if data is a directory if ( $source =~ s|/([^/]*\*[^/]*)$||) { my $pat= $1; $pat =~ s|\*|.*|; ## print STDERR "dir=$source match=$pat\n" if $debug; if (opendir( DIR, $source)) { my @ffiles= grep( /$pat/, readdir(DIR)); closedir(DIR); foreach $ff (@ffiles) { if ((-M "$source/$ff") < $targtime) { $res= 1; last; } } } else { Carp::carp "Can't test age of '$source'"; } } elsif ( -l $source ) { $source= getLinkOriginal($source); $res= (-M $source) < $targtime; } elsif ( -f $source ) { $res= (-M $source) < $targtime; } else { Carp::carp "Can't test age of '$source'"; $res= 0; } } # print STDERR "isOldTarget: $target older than $source? " if $debug; # print STDERR (($res) ? "yes\n" : "no\n") if ($debug); print STDERR "isOldTarget: '$target' OlderThan '$source'\n" if ($debug && $res); return $res; } =head1 BioMirror::getDocFromUrl($surl,$striphtml) Fetch and return a document from url. Returns doc as $string. HTTP and FTP urls are supported. Requires LWP::UserAgent, HTTP::Request. =cut sub getDocFromUrl { local($surl,$striphtml) = @_; Carp::carp("Bad url '$surl'?") unless ($surl =~ m=^\w+://=); require LWP::UserAgent; my $ua = new LWP::UserAgent; $ua->agent($BioMirror::AgentID); require HTTP::Request; my $req = new HTTP::Request(GET => $surl); print STDERR "getDocFromUrl Request:\n". $req->as_string()."\n" if $debug; my $res = $ua->request($req); if ($res->is_success) { my $text= $res->content(); $text= striphtml($text) if ($striphtml); return $text; } else { my $rq= $req->as_string(); Carp::carp("getDocFromUrl error for '$rq' :\n" . $res->content()); return ''; } } sub striphtml { local($_)= @_; study; s/<[^>]*>//g; s/\ / /g; s/\<//g; s/\&/&/g; return $_; } ##striphtml # require HTML::FormatText; # my $h2t = new HTML::FormatText; # $_= $h2t->format($_); 1; # perly __END__ ## useful part - adapt for loading user configs - .pm files? if( $load_defaults ){ local( $dir, $mp ); foreach $dir ( @INC ){ local( $f ) = "$dir/$defaults_file"; if( -f $f ){ $mp = $f; last; } } } ## test sub packlist { # print "INC = @INC\n"; %packhash= %BioMirror::; ## %main:: ## %packhash= %{$packhash{'SRS'}}; foreach $symname (sort keys %packhash ) { local *sym= $packhash{$symname}; print "sym=$symname : \t"; print "\$$symname is defined :\t" if defined $sym; print "\@$symname is defined :\t" if defined @sym; print "\%$symname is defined :\t" if defined %sym; ## packages show here, end with '::' print "\&$symname is defined :\t" if defined *sym{CODE}; ## print "\*$symname is defined :\t" if defined *sym{GLOB}; print "\n"; } }