#!/usr/bin/perl -w $too_many_cels = 100; ##more than this is too much. $numfiles=@ARGV; $default_output_file = "cel2r.tab"; ##first we figure out what are the cdf file and the cel files. if($numfiles>2){ ##this means arguments were give @cels = @ARGV; $cdf = shift(@cels); $outfile = pop(@cels); } elsif($numfiles==2){ #this means arguments were given, put not enough die "You must specify at least one cdf, one cel file, and one output file.\n"; } else{ ##if no arguments are given we assume all .cel files are of interest. if 1 arg this is the outputfile ## and that there exactly one cdf file exists @cdfs = glob("*.[Cc][Dd][Ff]"); $ncdf = @cdfs; if($ncdf != 1){ die "There are $ncdf cdf files in this directory.\nIf you don't pass arguments there must be exactly one.\n"; } else{ $cdf = $cdfs[0]; } @cels = glob("*.[Cc][Ee][Ll]"); $numcel = @cels; if($numcel<1 || $numcel>$too_many_cels){ die "You specified $numcel cel files.\nYou must specify between 1 and $too_many_cels.\n"; } if($numfiles){ ##if 1, its the output $outfile = $ARGV[0]; } else{ $outfile = $default_output_file; } } ###now we now that $cdf is the cdf file and @cels contains the cel files ##to be put together $info = $cdf; $info =~ s/[Cc][Dd][Ff]/info/; open(INFO,">$info") || die "Sorry, I cound't open a Info file!\n"; open(CDF,$cdf) || die "Sorry, I coulnd't open a CDF file!\n"; $found = 0; ##first get number of columns and rows while(defined($_=) & $found< 2){ chomp; if (/Cols./){ s/Cols=//; $cols = $_; chop($cols); $found +=1; } elsif (/Rows./){ s/Rows=//; $rows = $_; chop($rows); $found +=1; } } ##result will contain all the information of the pm and mm @result = (0 .. ($cols*$rows-1)); $gene_id_counter = 0; while(){ chomp; if (/\[.+\]/) { ##if surounded by [ ] it is a new qc or new block. s/\[|\]//g; ##get rid of [ ] } if (/QC./){ ##this is a control probe chop; ##get rid of anoying ^M $gene_name = $_; $pm_or_mm = -1; while (){ ##now we go get the index for this QC chomp; if (/NumberCells./){ ##this says how many cell, so we can loop $n = $_; $n =~ s/NumberCells=//; last; ##done, get out of looking for NumAtoms; } } while (){ ##now look for CellHeader chomp; if(/CellHeader./){ for($i=0; $i<$n;$i++){ ##we found it, now we extract names $check = 0; while($check<=5){ ##make sure you find something and not a hole! @line = split("\t",); chomp(@line); $check = @line; } $x = $line[0]; $x =~ s/Cell\d+=//; $y = $line[1]; $result[$line[5]]= ##$line[5] has the index join("\t",0,$i,$pm_or_mm,$x,$y); } last; ##we are done, get out of this QC } } } elsif (/Unit\d+_Block\d+./){ ##this is a block with gene info while(){ ##look for name chomp; if(/Name=./){ $gene_id_counter = $gene_id_counter + 1; $gene_name = $_; ##next line is gene name chop($gene_name); ##get rid of anoying ^M $gene_name =~ s/Name=//; #take name= out print INFO "$gene_name\n"; last; } } while() { ##go until you fin NumAtoms chomp; if(/NumAtoms./){ $n = $_; $n =~ s/NumAtoms=//; last; ##done, get out of looking for NumAtoms; } } while (){ ##now look for CellHeader chomp; if(/CellHeader./){ for($i=0; $i<$n;$i++){ ##we found it, now we extract names $check = 0; while($check<=5){ ##make sure you find something and not a hole! @line1 = split("\t",); chomp(@line1); $check = @line1; } $x1 = $line1[0]; $x1 =~ s/Cell\d+=//; $y1 = $line1[1]; $check = 0; while($check<=5){ ##make sure you find something and not a hole! @line2 = split("\t",); chomp(@line2); $check = @line2; } $x2 = $line2[0]; $x2 =~ s/Cell\d+=//; $y2 = $line2[1]; ##this assumes they come in pairs if($y1 < $y2){ $pm_or_mm1 = 1; $pm_or_mm2 = 0; } else{ $pm_or_mm1 = 0; $pm_or_mm2 = 1; } $result[$line1[11]]= ##$line[11] has the index join("\t",$gene_id_counter,$i,$pm_or_mm1,$x1,$y1); $result[$line2[11]]= join("\t",$gene_id_counter,$i,$pm_or_mm2,$x2,$y2); } last; #and we are done } } } } close(CDF); close(INFO); ##check for spots on array with no gene $total = 0; for ($i=0; $i<($cols*$cols); $i++){ if ($result[$i] eq $i){ $x = $i % $cols; $y = ($i - $x)/$cols; $result[$i] = join("\t",-1,1,-1,$x,$y); $total += 1; } } ##NOW WE ARE READY TO START WITH THE CEL FILES $names = ""; foreach $cel (@cels){ open(CEL,$cel) || die "Sorry, I coulnd't open $cel!\n"; $chip_name = $cel; chomp($chip_name); $chip_name =~ s/\.[Cc][Ee][Ll]//; $names = join("\t",$names,$chip_name); while(){ chomp; if (/CellHeader./){ ##here we go; while(){ @line=split("\t",$_); $length = @line; if($length > 2){ $key= $line[0]+$line[1]*$cols; $result[$key] = join("\t",$result[$key],$line[2]); } else{ ##we are done last; } } } } close(CEL); } $names = "$names\n"; open(OUT,">$outfile") || die "Sorry, I coulnd't open $outfile!\n";; $temp = join("\t","id","number","pm.or.mm","x","y"); $temp = "$temp $names"; print OUT $temp; for ($i=0; $i<($cols*$rows); $i++){ print OUT "$result[$i]\n"; } close(OUT);