|
|
(6 intermediate revisions by the same user not shown) |
Line 1: |
Line 1: |
− | ==Perl and [[MySQL]]==
| |
− | This section will just list a bunch of random examples. They are useful to give you an idea of what you can do with Perl, MySQL, and the [[:Category:Linux Command Line Tools|CLI]].
| |
| | | |
− | ''Note: These examples were taken from my course in Comparative Microbial Genomics at CBS (in Denmark).''
| |
− |
| |
− | <pre>
| |
− | mysql -B -e "update cmp_genomics.features set note = '' where user = USER() and note not like 'tcs%' or note is null"
| |
− | foreach accession (AE017042 AE016879 AL111168 AL645882 AP008232 AP009048 BA000021 CP000034)
| |
− | foreach type (ecf s54 s70)
| |
− | cat source/$accession.proteins.$type.sigmas.hmmsearch | \
| |
− | perl -ne 'next unless /^CDS_(\d+)\-(\d+)_DIR([\-\+]+)\s+([0-9\-\.e]+)\s+([0-9\-\.e]+)\s+/;\
| |
− | my ($start,$stop,$dir,$score,$evalue) = ($1,$2,$3,$4,$5);\
| |
− | next unless $score > 0;\
| |
− | print "update cmp_genomics.features set note = \"Sigma Factor '$type'\" \
| |
− | where start=$start and stop = $stop and user = user() and accession = \"'$accession'\";\n";'\
| |
− | | mysql
| |
− | end
| |
− | end
| |
− | </pre>
| |
− | <pre>
| |
− | mysql -B -e "update cmp_genomics.features set note = '' where user = USER() and note not like 'sigma%' or note is null"
| |
− | foreach accession (AE017042 AE016879 AL111168 AL645882 AP008232 AP009048 BA000021 CP000034)
| |
− | foreach type (RRreciever HisKA_1 HisKA_2 HisKA_3 HWE_HK)
| |
− | cat source/$accession.$type.TCS.hmmsearch | \
| |
− | perl -ne 'next unless /^CDS_(\d+)\-(\d+)_DIR([\-\+]+)\s+([0-9\-\.e]+)\s+([0-9\-\.e]+)\s+/;\
| |
− | my ($start,$stop,$dir,$score,$evalue) = ($1,$2,$3,$4,$5);\
| |
− | next unless $score > 0; \
| |
− | print "update cmp_genomics.features set note = \"TCS '$type'\" \
| |
− | where start=$start and stop = $stop and user = user() and accession = \"'$accession'\";\n";'\
| |
− | | mysql -B
| |
− | end
| |
− | end
| |
− | </pre>
| |
− | <pre>
| |
− | rm -f source/all.16s.fsa
| |
− | foreach accession (AE017042 AE016879 AL111168 AL645882 AP008232 AP009048 BA000021 CP000034)
| |
− | mysql -B -N -D cmp_genomics -e "select substr(seq,start,stop-start+1) as sequence,\
| |
− | replace(substring_index(genomes.organism,' ',4),' ','_') as name,dir from genomes,features where \
| |
− | genomes.accession = features.accession and genomes.user = features.user and \
| |
− | features.user = user() and featuretype='rRNA' and features.accession = '$accession'" | \
| |
− | perl -ane '$F[0] = reverse ( $F[0] ) if $F[2] eq "-";$F[0] =~ tr/ATGC/TACG/ if $F[2] eq "-";\
| |
− | print ">$F[1]\n"; \
| |
− | for ( $x = 0 ; $x < length ( $F[0] ) ; $x = $x+60) { print substr ( $F[0] , $x , 60),"\n"; } last;' \
| |
− | >> source/all.16s.fsa
| |
− | end
| |
− | less source/all.16s.fsa
| |
− | </pre>
| |
− | <pre>
| |
− | cat ~/Ex3/source/CP000034.codon-usage.aa.list \
| |
− | | perl -ane 'next unless $F[1] =~ /^[A-Z]+/;print "\t$F[1]"; ' \
| |
− | > aa.dat
| |
− | mysql -B -N -e 'select accession,organism from cmp_genomics.genomes where user=USER()' \
| |
− | > organism.names
| |
− | foreach accession (AE017042 AE016879 AL111168 AL645882 AP008232 AP009048 BA000021 CP000034)
| |
− | cat ~/Ex3/source/$accession.codon-usage.aa.list \
| |
− | | perl -ane 'BEGIN { foreach (`cat organism.names`) \
| |
− | {chomp; @A = split (/\t+/,$_);$NAMES{$A[0]} = $A[1];} } \
| |
− | next unless $F[1] =~ /^[A-Z]+/;$FREQ{$F[1]} = $F[3]; push @AA,$F[1]; \
| |
− | END { print "\n"; print "$NAMES{'$accession'}"; foreach $aa (@AA){print "\t",$FREQ{$aa};}}' \
| |
− | >> aa.dat
| |
− | end
| |
− | </pre>
| |
− | <pre>
| |
− | mysql -e "delete from localization where user=USER() and type='signalp'" cmp_genomics
| |
− | foreach accession (AE017042 AE016879 AL111168 AL645882 AP008232 AP009048 BA000021 CP000034)
| |
− | perl -ne 'if(/^CDS_(\d+)-(\d+)_DIR([+-])\s+0.\d+\s+\d+\s+[YN]\s+0.\d+\s+\d+\s+[YN]\s+[10].\d+\s+\d+\s+Y\s+[10].\d+\s+Y\s+[10].\s+\s+Y\s+CDS_\d+-\d+_DIR[+-]\s+Q\s+[10].\d+\s+[10]\s+Y\s+[10].\d+\s+Y/) \
| |
− | {print "insert into localization (user, protid, type) \
| |
− | select USER() as user, ft.id as protid, \"signalp\" as type \
| |
− | from features as ft \
| |
− | where ft.user=USER() and \
| |
− | ft.accession = \"'$accession'\" and \
| |
− | ft.start = $1 and ft.stop = $2 and \
| |
− | ft.dir = \"$3\";\n";}' $accession.signalp \
| |
− | | mysql -D cmp_genomics
| |
− | end
| |
− | </pre>
| |
− | <pre>
| |
− | perl -e 'foreach $i (1..11) { open IN , "col$i"; while (<IN>) { chomp;($a , $c) = split (/\t+/,$_); $DATA{$a}{$i} = $c;} close IN;} \
| |
− | END {foreach $a ( reverse sort keys %DATA) {foreach $i (1..11) {$DATA{$a}{$i} = 0 if $DATA{$a}{$i} eq "" and $i > 1; \
| |
− | print $DATA{$a}{$i}; print "\t" if $i < 11;}print "\n";}}' \
| |
− | > final.dat
| |
− |
| |
− | R
| |
− | postscript ('final.ps')
| |
− | source ( "/home/people/kiil/Exercises/files/heatmap.R" )
| |
− | data <- as.matrix ( read.table ( 'final.dat' , row.names=1, header= TRUE,sep = "\t" ) )
| |
− | pal <- maPalette(low = rgb(10,10,10,maxColorValue = 10), high = rgb(0,0,10,maxColorValue = 10), mid= rgb(5,5,10,maxColorValue = 10),, k = 1024)
| |
− | par(oma=c(0, 0, 0, 0.5))
| |
− | data <- scale(data)
| |
− | new_heatmap ( data , cexCol=1,cexRow=1,colors = pal)
| |
− | dev.off()
| |
− | quit(save="no")
| |
− | </pre>
| |