Commit e7742327 authored by luc.moulinier's avatar luc.moulinier

debuggae PCI mode

parent 2ae56b05
......@@ -77,7 +77,16 @@ set VariablesAuDepart [info globals]
### Launch ORDALIE ######
# Main program
InitLesDefauts
trace add variable ::Defauts(DownloadPDB) write GetTrace
proc GetTrace {a b op} {
global $a
puts "a= $a b= $b"
puts "\n@@@@@@@"
puts "${a}($b)"
catch {puts [info level 0]}
catch {puts [info level -1]}
return
}
set retour [InterpreteLaLigneDeCommande $argv]
if {[ModeI]} {
LoadTkAndPackages
......
#
# ordali_StrucObj.tcl
#
proc SetupPDBObject {} {
......@@ -57,7 +60,6 @@ proc SetupPDBObject {} {
set nom [string range [self] 2 end]
set cok [my _DecortiqueUnPdbObject $Llignes $nom]
if {$cok == 0} {
return 0
}
......@@ -197,9 +199,9 @@ proc SetupPDBObject {} {
$db eval {insert into pdb values(NULL,$nom,$source,$Header)}
set pkp [$db last_insert_rowid]
foreach n $ChnIdn {
set Ct [set TypChn($n)]
puts "$n $Ct"
if {$Ct eq "DNA" || $Ct eq "Water"} {
continue
}
......@@ -364,7 +366,9 @@ proc SetupPDBObject {} {
set Obsolete 0
set lignesPDB [ExtraitLignesAtomesDuPDB $Llignes]
set Header [ExtraitHeadDuPDB $Llignes]
if {$lignesPDB == -1} {return 0}
if {$lignesPDB == -1} {
return 0
}
# Is this entry superseeded ?
set Superseeded [my _superseed $Header]
......@@ -374,7 +378,7 @@ proc SetupPDBObject {} {
return $Obsolete
}
set ChnIdn [list]
set ChnIdn {}
# Traite chaines apres chaines
set Lter [lsearch -regexp -all $lignesPDB {^TER}]
set Lter [linsert $Lter 0 "0"]
......@@ -392,13 +396,17 @@ proc SetupPDBObject {} {
set d $f
continue
}
if {[lsearch -regexp $LLignesChn {^ATOM }] != -1} {set polymer 1} {set polymer 0}
if {[lsearch -regexp $LLignesChn {^ATOM }] != -1} {
set polymer 1
} else {
set polymer 0
}
my _LectureDeChainePdbObject $LLignesChn $polymer
set d $f
}
# Traitement des chaines
set ChnIdn [lunique $ChnIdn]
......
......@@ -3383,12 +3383,14 @@ proc ComputeIdentity {} {
proc CherchePCI {w args} {
global TPCI nsq1 nsq2 pciLbl pl1Lbl pl2Lbl
global TDesPCI TPCI nsq1 nsq2 pciLbl pl1Lbl pl2Lbl
if {$nsq1 eq "" || $nsq2 eq "" || $nsq1 eq "Select" || $nsq2 eq "Select"} {return}
if {$nsq1 eq "" || $nsq2 eq "" || $nsq1 eq "Select" || $nsq2 eq "Select"} {
return
}
set pair1 "$nsq1,$nsq2"
if {! [info exists TDesPCI($pair1)]} {
if {! [info exists TPCI($pair1)]} {
CalculeLesPCI "" "" $nsq1 $nsq2
update
}
......@@ -3396,11 +3398,11 @@ proc CherchePCI {w args} {
set pci [lindex $val 0]
set pl1 [lindex $val 1]
set pl2 [lindex $val 2]
set pci [format "%5.1f" [expr {$pci*100.}]]
set pci [format "%5.1f" [expr {$pci * 100.}]]
set pciLbl "Identity : $pci %"
set pl1Lbl "length 1 : $pl1"
set pl2Lbl "lenght 2 : $pl2"
set pl1Lbl "length seq 1 : $pl1"
set pl2Lbl "lenght seq 2 : $pl2"
return
}
......@@ -8814,7 +8816,6 @@ proc InitSeqsOut {{Etat ""}} {
proc AfficheFenetreOrdali {} {
global Defauts NomFenetreOrdali
#set w [winfo toplevel $NomFenetreOrdali]
set w $NomFenetreOrdali
grid rowconfig $w {0 1 3 4 5} -weight 0
......@@ -8844,6 +8845,7 @@ proc LanceOrdali {{behave slave} {aNomFenetreOrdali ""}} {
# init compulsory variables and arrays
set Threshold [set Defauts(Threshold)]
set ListeTypesDeFeatures {}
set FrmBtnFea {}
set ListeDesFragments {}
set NomSeqSel {}
set BufferSeq {}
......@@ -9440,12 +9442,12 @@ proc CheckInfosSeqs {{Liste ""}} {
return
}
array set Res $out
puts "\nOUTPUT:\n"
foreach k [array names Res "*,currAC"] {
puts "$k $Res($k)"
}
exit
set LaccNew [list]
foreach k [array names Res "*,currAC"] {
......
......@@ -975,7 +975,7 @@ proc RunRPCA {Ld} {
set NbRow [llength $Ld]
Rpipe "rm(list = ls())"
Rpipe "set.seed($::Defs(RSeed))"
Rpipe "set.seed($::Defauts(RSeed))"
Rpipe "data <- c([join [concat {*}$Ld] ,])"
Rpipe "data <- matrix(data,nrow=$NbRow,byrow=T)"
......
This diff is collapsed.
......@@ -90,7 +90,7 @@ proc ChangeMode {{mode ""}} {
ChangeMode $modcou
}
"donnepci" {
if {$modcou ne "ordali" && $modcou ne "feature"} {
if {$modcou ni {"ordali" "feature"}} {
set pmod [string totitle $mode]
FaireLire "Please leave $pmod mode first !"
return
......@@ -379,13 +379,15 @@ proc AfficheBoutonsAnnotation {} {
proc AfficheBoutonsPCI {} {
global LNOrdali FrmBouton OrdEtcDir nsq1 nsq2 pciLbl pl1Lbl pl2Lbl LesPCI ListeTypesDeFeatures ZoneSelect ZonePCI
global LNOrdali FrmBouton OrdEtcDir nsq1 nsq2 pciLbl pl1Lbl pl2Lbl TDesPCI TPCI ListeTypesDeFeatures ZoneSelect ZonePCI
if {[TypeAli] eq "pasdali"} {return}
set LNoms "Select"
foreach n $LNOrdali {
if {$n ne ""} {lappend LNoms $n}
if {$n ne ""} {
lappend LNoms $n
}
}
set LMx [PlusLongEltDe $LNoms]
......@@ -430,16 +432,16 @@ proc AfficheBoutonsPCI {} {
grid columnconfig $wp.falcl 1 -weight 1
button $wp.bdo \
-background yellow \
-text "Compute" \
-background yellow \
-command [list ComputeIdentity]
grid $wp.bdo -row 0 -column 1 -sticky nsw -padx 20 -pady 5
set nsq1 [lindex $LNoms 0]
set nsq2 [lindex $LNoms 0]
set pciLbl "Identity :"
set pl1Lbl "length 1 :"
set pl2Lbl "length 2 :"
set pl1Lbl "length seq 1 :"
set pl2Lbl "length seq 2 :"
frame $wp.fse
label $wp.fse.ls1 \
......@@ -510,6 +512,7 @@ proc AfficheBoutonsPCI {} {
}
AfficheZonesSelectionnees $ZonePCI
StockPosition
array set TPCI [array get TDesPCI]
tkwait window $FrmBouton.fpci
......
......@@ -956,6 +956,77 @@ proc DonneListeCleValeur {options} {
}
proc ConvertAliHelp {} {
puts "\nUsage : convertali <input-file> <output-file> ?-convert <format>?\n"
puts "The output file format may be specified using the file extension or by specifying a format after the -convert keyworkd. Valid extensions and format are :"
puts " - .tfa, .fasta, .fa : fasta format"
puts " - .macsim, .macsims, .xml : macsims format"
puts " - .msf : msf format"
puts " - .ord : ordalie format"
puts ""
exit
}
proc InterpreteArgv0 {ligne} {
set Largs [split $ligne " "]
set prog [file tail $::argv0]
switch $prog {
"ordali" -
"ordalie" {
if {$Largs == {}} {
puts "\ntype 'ordalie help' to access on-line arguments\n"
LoadTclPackages
}
}
"convertali" {
if {$Largs == {}} {
ConvertAliHelp
}
if {[llength $Largs] ni {2 4}} {
puts "Error :\nWrong number of arguments !"
ConvertAliHelp
}
if {[llength $Largs] == 2} {
lassign $Largs in out
set ext [string tolower [string range [file extension $out] 1 end]]
switch $ext {
"fasta" -
"tfa" -
"fa" {set format "tfa"}
"macsims" -
"macsim" -
"xml" {set format xml}
"aln" -
"clustal" {set format aln}
"msf" {set format msf}
default {
puts "\nError :\nUndefined output format !"
ConvertAliHelp
}
}
} else {
# file-in file-out -convert format
if {[lindex $Largs 2] ne "-convert"} {
puts "\nError :\nBad keyword [lindex $Largs 2]"
ConvertAliHelp
}
set format [lindex $Largs 3]
if {$format ni {tfa aln xml msf}} {
puts "Error :\nBad format >$format< !"
ConvertAliHelp
}
}
set ligne "$in -convert $format $out"
}
}
return [split $ligne " "]
}
proc InterpreteLaLigneDeCommande {Ligne} {
global env AExecuter APutser
......@@ -969,18 +1040,16 @@ proc InterpreteLaLigneDeCommande {Ligne} {
Espionne "options : $::argv"
Espionne "---------------------------------------------------"
Espionne ""
# Pour Julie, enleve les = au cas ou ...
set Ligne [string map {= " "} $Ligne]
regsub -all { +} $Ligne " " Ligne
set Ligne [string trim $Ligne]
set Ligne [InterpreteArgv0 $Ligne]
# Pas d'arguments
if {$Ligne eq ""} {
puts "\ntype 'ordalie help' to access on-line arguments\n"
LoadTclPackages
return
}
......@@ -1160,7 +1229,6 @@ proc InterpreteLaLigneDeCommande {Ligne} {
LesDefauts DownloadPDB dont
LesDefauts Mode Batch
LesDefauts Exit 1
LesDefauts DownloadPDB none
set AExecuter "SauveLAlignement $fmt JLeSauveAs"
}
"-project" {
......@@ -4754,3 +4822,15 @@ proc LogProc {name args} {
update idletasks
}
proc plist {liste format} {
set out {}
foreach v $liste {
lappend out [format $format $v]
}
puts [join $out]
return
}
......@@ -1403,140 +1403,6 @@ proc DefineGapThreshold {nseq} {
}
proc WeightsDeLuc {Ln LPCI} {
# circular weights
#
# wi = sum_i!=j wj x d(si,sj)
# w = D x w
# (1 - D)x w = 0
#
# D = distance matrix
# w = vector of weights
#
# solved using eigen decomposition
# weights are coordinates of eigenvector of the
# highest eigenvalue
#
package require math::linearalgebra
set Ns [llength $Ln]
set b [::math::linearalgebra::mkVector $Ns 1.0]
set mt [::math::linearalgebra::mkMatrix $Ns $Ns 0.]
set mf [::math::linearalgebra::mkMatrix $Ns $Ns 0.]
# Matrix (1-D)
# D = 1 - pci so 1 - D = pci
array set T $LPCI
set Md [list]
foreach a $Ln {
set Lv [list]
foreach b $Ln {
lappend Lv [lindex [set T($a,$b)] 0]
}
lappend Md $Lv
}
# eigen decomposition
set Msg ""
catch {set R [::math::linearalgebra::eigenvectorsSVD $Md]} Msg
if {$Msg ne ""} {
set Ns [llength $Ln]
set v [expr {1.0/$Ns}]
set Lw [lrepeat $Ns $v]
FaireLire "Error while computing Eigen decomposition !\nAll weights are set to $v"
puts "Error while computing Eigen decomposition !\nAll weights are set to $v"
return $Lw
}
lassign $R V1 V2
# vector with highest eigenvalue ranks 0
set Lv [lindex $V1 0]
# puts weights so that sum wi = 1.
set b [lsort -real $Lv]
set min [lindex $b 0]
set max [lindex $b end]
set eps 0.01
set s 0.0
foreach v $Lv {
set x [expr {($v - $min + $eps)/($max - $min + $eps)}]
lappend Lx $x
set s [expr {$s + $x}]
}
set sum 0.0
set Lw [list]
foreach x $Lx {
set n [expr {$x / $s}]
lappend Lw $n
set sum [expr {$sum + $n}]
}
return $Lw
}
proc WeightDeHenikoff {ListePil} {
# Henikoff and Henikoff
#
# wi= 1/L sum 1/(Kx.Nx)
# L = alignment length
# Kx = number of amino acid types at position x
# Nx = number of amino acid at pos x that
# are the same as seq i
# inits
set NbSeq [string length [lindex $ListePil 0]]
for {set i 0} {$i < $ns} {incr i} {
set w($i) 0.0
}
# loop over all positions
foreach p $ListePil {
# number of elements
set lp [split $p ""]
set kx [llength [lsort -unique $lp]]
set i -1
foreach a $lp {
incr i
if {$a ne "."} {
set na [expr {$NbSeq - [string length [string map [list $a ""] $p]]}]
set w($i) [expr {[set w($i)] + 1./($na*$kx)}]
}
}
}
set Lw [list]
set len [expr {double([llength $Lpil])}]
for {set i 0} {$i < $NbSeq} {incr i} {
lappend Lw [expr {[set w($i)]/$len}]
}
return $Lw
}
proc WeightDeVingron {Ln LPCI} {
# Argos and Vingron
# wi = 1/(N-1) sum_i!=j d(si,sj)
# moyenne des distance de seq j aux autres
array set T $LPCI
set Ns [llength $Ln]
foreach a $Ln {
set sum 0.0
foreach b $Ln {
if {$a eq $b} {continue}
# Attention ! distance, pas PCI
set sum [expr {$sum + (1. - [lindex [set T($a,$b)] 0])}]
}
lappend Lw [expr {$sum/($Ns-1)}]
}
return $Lw
}
##########################################
#
# SCORING FUNCTIONS
......
......@@ -1799,24 +1799,21 @@ proc TraiteAABizarres {{force 0}} {
proc AssigneLePoidsDesSeqs {} {
global LePoidsDesSeqs
global IdXml
global LesPoidsDesSequences
global LNOrdali
global FichierMSF
global FichierXML
global LePoidsDesSeqs IdXml LesPoidsDesSequences FichierXML FichierMSF LNOrdali
set LePoidsDesSeqs {}
if {[info exists FichierMSF]} {
LitLesPoidsDuMSF $FichierMSF
foreach n $LNOrdali {
if {$n eq ""} {
continue
}
set p [set LesPoidsDesSequences($n)]
lappend LePoidsDesSeqs $p
}
}
if {[info exists FichierXML]} {
ExtraitLesPoids $IdXml LePoidsDesSeqs
ExtraitLesPoidsDuXML $IdXml LePoidsDesSeqs
}
return
......@@ -2914,5 +2911,173 @@ proc tbug {} {
}
proc CalculeLesPoidsDesSeqs_mean {{normalise 1}} {
# Argos and Vingron
# wi = 1/(N-1) sum_i!=j d(si,sj)
# moyenne des distance de seq j aux autres
global LPCI TDesPCI LNOrdali
set Lwgt {}
set nSeqs 0
foreach n $LNOrdali {
if {$n eq ""} {
continue
}
incr nseqs
set sum 0.0
foreach n1 $LNOrdali {
if {$n1 eq "" || $n eq $n1} {
continue
}
set sum [expr {$sum + (1. - [lindex $TDesPCI($n,$n1) 0])}]
}
lappend Lsum $sum
}
set Lwgt [lmap x $Lsum {expr {$x / ($nSeqs - 1)}}]
if {$normalise} {
set sumTot [::tcl::mathop::+ {*}$Lwgt]
set Lwgt [lmap x $Lwgt {expr {$x / $sumTot}}]
}
return $Lwgt
}
proc CalculeLesPoidsDesSeqs_eigen {} {
# Valdar et all , scoring residue conservation
# circular weights
#
# wi = sum_i!=j wj x d(si,sj)
# w = D x w
# (1 - D)x w = 0
#
# D = distance matrix
# w = vector of weights
#
# solved using eigen decomposition
# weights are coordinates of eigenvector of the
# highest eigenvalue
#
global LNOrdali TDesPCI
package require math::linearalgebra
set Ln {}
foreach n $LNOrdali {
if {$n ne ""} {
lappend Ln $n
}
}
set Nseq [llength $Ln]
# Matrix (1-D)
# D = 1 - pci so 1 - D = pci
set Md {}
foreach a $Ln {
set Lv {}
foreach b $Ln {
lappend Lv [expr {1.0 - [lindex [set TDesPCI($a,$b)] 0]}]
}
lappend Md $Lv
}
set usv [::math::linearalgebra::determineSVD $Md]
set eigenVectors [lindex $usv 2]
set singular [lindex $usv 1]
puts "singular"
plist $singular %6.3f
puts [::tcl::mathop::+ {*}$singular]
set factor [expr {1.0 / ([llength $Md] - 1)}]
set factor [expr {1.0 / ([llength $Md])}]
set factor 1.0
set eigenValues {}
set eigenValues [lmap x $singular {expr {$x*$x * $factor}}]
puts "eVal"
plist $eigenValues "%6.3f"
puts [::tcl::mathop::+ {*}$eigenValues]
set min [::tcl::mathfunc::min {*}$eigenValues]
set eps 0.001
set tmp [lmap x $eigenValues {expr {$x - $min + $eps}}]
set sum [::tcl::mathop::+ {*}$tmp]
plist [lmap x $tmp {expr {$x / $sum}}] %6.3f
puts "\neVectors"
lmap x $eigenVectors {plist $x %6.3f}
# eigen decomposition
set Msg ""
catch {set R [::math::linearalgebra::eigenvectorsSVD $Md]} Msg
puts "\nindex 0 de R"
lmap i [lindex $R 0] {plist $i %6.3f}
puts "\nindex 1"
plist [lindex $R 1] "%6.3f"
puts [::tcl::mathop::+ {*}[lindex $R 1]]
set min [::tcl::mathfunc::min {*}[lindex $R 1]]
set tmp [lmap x [lindex $R 1] {expr {$x - $min + $eps}}]
set sum [::tcl::mathop::+ {*}$tmp]
plist [lmap x $tmp {expr {$x / $sum}}] %6.3f
exit
lassign $R V1 V2
# vector with highest eigenvalue ranks 0
set Lv [lindex $V1 0]
# puts weights so that sum wi = 1.
set b [lsort -real $Lv]
set min [lindex $b 0]
set eps 0.01
set Lx [lmap x $Lv {expr {$x-$min+$eps}}]
set s [::tcl::mathop::+ {*}$Lx]
set Lw [lmap x $Lx {expr {$x/$s}}]
return $Lw
}
proc CalculeLesPoidsDesSeqs_henikoff {ListePil} {
# Henikoff and Henikoff
#
# wi= 1/L sum 1/(Kx.Nx)
# L = alignment length
# Kx = number of amino acid types at position x
# Nx = number of amino acid at pos x that
# are the same as seq i
# inits
set NbSeq [string length [lindex $ListePil 0]]
for {set i 0} {$i < $NbSeq} {incr i} {
set w($i) 0.0
}
# loop over all positions
foreach p $ListePil {
# number of elements
set lp [split $p ""]
set kx [llength [lsort -unique $lp]]
set i -1
foreach a $lp {
incr i
if {$a ne "."} {
set na [expr {$NbSeq - [string length [string map [list $a ""] $p]]}]
set w($i) [expr {[set w($i)] + 1./($na*$kx)}]
}
}
}
set Lw [list]
set len [expr {double([llength $Lpil])}]
for {set i 0} {$i < $NbSeq} {incr i} {
lappend Lw [expr {[set w($i)]/$len}]
}
return $Lw
}
......@@ -2440,7 +2440,7 @@ proc PDBQuery {args} {
if {([llength $args] % 2)} {
return {}
}
package require http
set pdbXML "<?xml version=\"1.0\" encoding=\"UTF-8\"?>
<orgPdbCompositeQuery version=\"1.0\">
"
......
......@@ -55,6 +55,7 @@ proc LesDefauts {args} {
} else {
set Defauts(UseSpeedCode) "tcl"
}
set Defauts(RSeed) 101112
if {$args ne "OldDef"} {
......
#
# ordali_thr.tcl
#
proc MyPuts {args} {
global MyLog
......
......@@ -450,16 +450,17 @@ proc ExtraitLesTaxId {id aLTax} {
proc ExtraitLesPoids {id aLWgt} {
proc ExtraitLesPoidsDuXML {id aLWgt} {
global LNOrdali
upvar $aLWgt LnonesPoidsDesSeqs
set LesPoidsDesSeqs {}
set RId [$id documentElement]
set LesPoidsDesSeqs {}
foreach n $LNOrdali {
if {$n eq ""} {
continue
}
set qds "//*\[string(seq-name)='$n'\]/descendant::*\[name()='weight'\]"
set nds [$RId selectNodes "$qds"]
if {$nds != ""} {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment