Commit 4873623f authored by luc.moulinier's avatar luc.moulinier

bugs, idmapping

parent f43d8474
......@@ -31,9 +31,13 @@ proc SetupPDBObject {} {
# Arguments
# - nom : file or text or lines or id
# - init : 1 -> load from db, 0 load fram <nom>
if {$init eq ""} {set init 0}
if {$init eq ""} {
set init 0
}
# - InDb : 1 store in db, 0 don't
if {$InDb eq ""} {set InDb 1}
if {$InDb eq ""} {
set InDb 1
}
if {$init} {
my UpdateFromDB
......@@ -48,7 +52,11 @@ proc SetupPDBObject {} {
set src "list"
} elseif {[file extension $nom] eq ""} {
#set Llignes [RecupereUnFichierSurWeb [string toupper $nom]]
set textePDB [TextePDB [string toupper $nom] ALL "" "OnTheWeb"]
package require TclCurl
set url "https://files.rcsb.org/download/XXXX.pdb"
regsub -all "XXXX" $url $nom url
curl::transfer -url $url -bodyvar textePDB
#set textePDB [TextePDB [string toupper $nom] ALL "" "OnTheWeb"]
set Llignes [split $textePDB \n]
if {$Llignes eq "ERREUR" || $Llignes eq ""} {
return 0
......
......@@ -664,6 +664,23 @@ proc MyProc {name argList body} {
}
proc DoMacPack {} {
cd /home/moumou
catch {file delete -force {*}[glob macpack*]}
file copy ./ordali/src/macpack.tcl .
exec ./tclkit sdx.kit qwrap macpack.tcl
exec ./tclkit sdx.kit unwrap macpack.kit
cd macpack.vfs
file copy -force ~/ordali/Build/macosx-x86_64 .
cd ..
exec ./tclkit sdx.kit wrap macpack -runtime mackit
exit
}
# http://kitcreator.rkeene.org/kitcreator
......
......@@ -13477,3 +13477,104 @@ proc cherie {} {
return
}
proc tseb {} {
package require Rtcl
set Ll [LesLignesDuFichier mat.dst]
set vals [concat {*}$Ll]
Rpipe "x <- c([join $vals ,])"
Rpipe "mat <- matrix(x,byrow=T,nrow=5, ncol=5)"
Rpipe "res <- eigen(x)"
return
}
proc MakeTransparentImage {img} {
LoadTkAndPackages
set id [image create photo -file $img]
set col {255 255 255}
set wdt [image width $id]
set hgt [image height $id]
for {set x 0} {$x < $wdt} {incr x} {
for {set y 0} {$y < $hgt} {incr y} {
if {[$id get $x $y] == $col} {
$id transparency set $x $y 1
}
}
}
$id write [file rootname $img].png
return
}
proc thom {} {
global db
set La [IDMapping_sql GENE_NAME ACC zufsp]
puts "La [llength $La]"
set La [lindex $La 0]
puts "La [llength $La]"
set Lh [IDMapping_sql ACC NCBI_TAXID $La]
puts "Lh [llength $Lh]"
set Lix [lsearch -all -exact $Lh "9606"]
foreach ix $Lix {
puts "human : ix=$ix [lindex $La $ix]"
}
set Ld [IDMapping_sql GENE_NAME NCBI_TAXID zufsp]
puts "\nLd [llength $Ld]"
set Ld [lindex $Ld 0]
set Lix [lsearch -all -exact $Ld "9606"]
foreach ix $Lix {
puts "direct ix=$ix [lindex $Ld $ix]"
}
set La [IDMapping_sql GENE_NAME ACC zufsp 9606]
puts "La $La"
set Lf [IDMapping_sql GENE_NAME GENE_SYNONYM zufsp 9606]
puts "\nLf $Lf\n"
cd /commun/bics/IDMapping/production/SQLDBS
puts "\nDeux etapes"
set Lh [IDMapping_sql GENE_NAME ACC zufsp 9606]
set Lh [lindex $Lh 0]
puts "[llength $Lh] $Lh"
set Lh [IDMapping_sql ACC GENE_SYNONYM $Lh]
puts "$Lh"
set taxid 9606
set Lid zufsp
set Lacc zufsp
set BankSource GENE_NAME
set BankTarget GENE_SYNONYM
set db [DbIDMappingSQL GENE_NAME open]
AttachIDMappingBank GENE_SYNONYM
AttachIDMappingBank NCBI_TAXID
#set cmd "WITH db1(id) AS (VALUES ('[join $Lid '),(']')) select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db2.acc = db3.acc"
#set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc '),(']')) SELECT tax.acc,tax.id FROM tmp AS dbin, NCBI_TAXID AS tax WHERE tax.acc = dbin.acc and tax.id = '$taxid'"
set cmd "select db1.id, db2.id from GENE_NAME as db1, GENE_SYNONYM as db2, NCBI_TAXID as db3 where db1.id in ('ZUFSP') and db1.acc = db2.acc and db2.acc = db3.acc and db3.id = '9606'"
#set cmd "select db1.id, db2.id from GENE_NAME as db1, GENE_SYNONYM as db2, NCBI_TAXID as db3 where db1.id='ZUFSP' and db1.acc = db2.acc and db2.acc = db3.acc and db3.id = '9606'"
puts "cmd= $cmd"
set Lres [$db eval $cmd]
puts "Lres $Lres"
exit
}
......@@ -3125,29 +3125,42 @@ proc Reigen {} {
foreach a $Ln {
set tmp {}
foreach b $Ln {
lappend tmp [lindex $TDesPCI($a,$b) 0]
lappend tmp [expr {1.0 - [lindex $TDesPCI($a,$b) 0]}]
}
lappend m $tmp
}
lmap x $m {plist $x %6.4f}
puts ""
package require Rtcl
Rpipe "x <- c([join [concat {*}$m] ,])"
Rpipe "x <- matrix(x, byrow = T, ncol=5, nrow=5)"
Rpipe "res <- eigen(x, symmetric=TRUE)"
#Rpipe "res <- eigen(x, symmetric=TRUE)"
Rpipe "res <- eigen(x)"
set val [::rtcl::getvalue "res\$values"]
set vec [lrange [::rtcl::getvalue "res\$vectors"] 0 4]
puts "val :"
plist $val %6.3f
set sum [::tcl::mathop::+ {*}$val]
puts "sum= $sum"
puts "sum = $sum"
puts "vec :"
plist $vec %6.3f
set k 0
set Lvec [::rtcl::getvalue "res\$vectors"]
while {$k < 25} {
set vec [lrange $Lvec $k $k+4]
incr k 5
plist $vec %6.3f
}
puts ""
set vec [lrange $Lvec 0 4]
set sum [+ {*}$vec]
set new [lmap x $vec {expr {$x/$sum}}]
plist $new %6.3f
return $vec
}
......
#
##
# ordali_service.tcl
#
proc jtd {Lacc BankSource BankTarget source target} {
set cmd "select $source,$target from $BankSource where $source in ('[join $Lacc ',']')"
proc jtd {Lacc bank BankSource BankTarget source target} {
set cmd "select $source,$target from $bank where $source in ('[join $Lacc ',']')"
set Lout [$::db eval $cmd]
return $cmd
return $Lout
}
proc jtD {Lacc BankSource BankTarget source target} {
proc jtD {Lacc bank BankSource BankTarget source target} {
AttachIDMappingBank ACC2ACC
#lulu
set cmd "select db1.$source,db2.acc from $BankSource as db1, ACC2ACC as db2 where db1.$source in ('[join $Lacc ',']') and db1.acc = db2.id"
return $cmd
if {$bank eq $BankTarget} {
# i.e. from ACC to REFSEQ
set Lnew [ConvertAcc2DashAcc $Lacc tmp tmp]
set cmd "select acc,id from $bank where acc in ('[join $Lnew ',']')"
set Lout [$::db eval $cmd]
# convert back to 'normal' acc
set Lres {}
foreach {in out} $Lout {
if {[set i [string last "-" $in]] > 0} {
set in [string range $in 0 $i-1]
}
lappend Lres $in $out
}
} else {
# i.e. from SEQRES to ACC
set cmd "select id,acc from $bank where id in ('[join $Lacc ',']')"
set Lout [$::db eval $cmd]
set Lad {}
foreach {id accD} $Lout {
lappend Lad $accD
}
set Lad [ConvertDashAcc2Acc $Lad]
set Lres {}
foreach {in out} $Lout acc $Lad {
lappend Lres $in $acc
}
}
return $Lres
}
proc jTd {Lacc BankSource BankTarget source target taxid} {
proc jTd {Lacc bank BankSource BankTarget source target taxid} {
AttachIDMappingBank NCBI_TAXID
set cmd "select db1.$source,db1.$target from $bank as db1, NCBI_TAXID as db2 where db1.$source in ('[join $Lacc ',']') and db2.id='$taxid' and db1.acc = db2.acc"
set Lout [$::db eval $cmd]
return $cmd
return $Lout
}
proc jTD {Lacc BankSource BankTarget source target taxid} {
proc jTD {Lacc bank BankSource BankTarget source target taxid} {
AttachIDMappingBank NCBI_TAXID
AttachIDMappingBank ACC2ACC
set cmd "select db1.$source,db3.acc from $bank as db1, NCBI_TAXID as db2, ACC2ACC as db3 where db1.$source in ('[join $Lacc ',']') and db2.id='$taxid' and db1.acc = db2.acc and db1.acc = db3.acc"
return $cmd
set Lres {}
if {$bank eq $BankTarget} {
# i.e. from ACC to REFSEQ
set Lnew [ConvertAcc2DashAcc $Lacc]
$::db eval {detach database ACC2ACC}
set Lout [jtD $Lnew $bank $BankSource $BankTarget $source $target]
set Ltax [$::db eval "select acc,id from NCBI_TAXID where acc in ('[join $Lacc ',']')"]
array set tax {}
foreach {ac id} $Ltax {
if {$id == $taxid} {
set tax($ac) $id
}
}
foreach {in out} $Lout {
if {[set i [string last "-" $in]] > 0} {
set in [string range $in 0 $i-1]
}
if {[info exis tax($in)]} {
lappend Lres $in $out
}
}
} else {
# i.e. from RESSEQ to ACC
#
# first, grab raw results from bank
set cmd "select id,acc from $bank where id in ('[join $Lacc ',']')"
set Lout [$::db eval $cmd]
# 2, go from accc-dashed to ass
set Ldash {}
foreach {id acc} $Lout {
lappend Ldash $acc
}
set Lasd [ConvertDashAcc2Acc $Ldash]
# 3, take only good taxid
set Ltax [$::db eval "select acc,id from NCBI_TAXID where acc in ('[join $Lasd ',']')"]
array set tax {}
foreach {acc tx} $Ltax {
if {$tx == $taxid} {
set tax($acc) $tx
}
}
foreach {in out} $Lout acc $Lasd {
if {[info exists tax($acc)]} {
lappend Lres $in $acc
}
}
}
return $Lres
}
......@@ -41,8 +117,9 @@ proc Jtd {Lacc BankSource BankTarget source target} {
AttachIDMappingBank $BankTarget
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2 where db1.id in ('[join $Lacc ',']') and db1.acc = db2.acc"
set Lout [$::db eval $cmd]
return $cmd
return $Lout
}
......@@ -50,19 +127,66 @@ proc JtD {Lacc BankSource BankTarget source target} {
AttachIDMappingBank $BankTarget
AttachIDMappingBank ACC2ACC
set cmd "select db1.id,db3.id from $BankSource as db1, ACC2ACC as db2, $BankTarget as db3 where db1.id in ('[join $Lacc ',']') and db1.acc = db2.acc and db2.id = db3.acc"
set Lres {}
set Lout [$::db eval "select id,acc from $BankSource where id in ('[join $Lacc ',']')"]
if {$BankSource in [ListOfDBsWithDashAccess]} {
# BankSource outputs dashed-acc
set Lasd {}
foreach {id accD} $Lout {
lappend Lasd $accD
}
# create list access sans dash (Lasd)
set Lasd [ConvertDashAcc2Acc $Lasd]
# create conversion array
array set t {}
foreach {id accDash} $Lout acc $Lasd {
lappend t($acc) $id
}
# request target db
set Lout2 [$::db eval "select acc,id from $BankTarget where acc in ('[join $Lasd ',']')"]
foreach {acc id} $Lout2 {
if {[info exists t($acc)]} {
foreach v $t($acc) {
lappend Lres $v $id
}
}
}
} else {
# BankTarget takes Dashed-acc as input
# i.e. from GENE_NAME to REFSEQ
set Lasd {}
foreach {id acc} $Lout {
lappend Lasd $acc
}
set Lasd [ConvertAcc2DashAcc $Lasd tmp tmp]
foreach {id accD} $Lout acc $Lasd {
lappend t($acc) $id
}
set Lout2 [$::db eval "select acc,id from $BankTarget where acc in ('[join $Lasd ',']')"]
foreach {acc id} $Lout2 {
if {[info exists t($acc)]} {
foreach v $t($acc) {
lappend Lres $v $id
}
}
}
}
return $cmd
return $Lres
}
proc JTd {Lacc BankSource BankTarget source target taxid} {
AttachIDMappingBank $BankTarget
AttachIDMappingBank NCBI_TAXID
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db1.acc = db2.acc and db2.acc = db3.acc and db3.id = '$taxid'"
set Lout [$::db eval $cmd]
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db1.acc = db3.acc"
return $cmd
return $Lout
}
......@@ -70,10 +194,87 @@ proc JTD {Lacc BankSource BankTarget source taxid} {
AttachIDMappingBank $BankTarget
AttachIDMappingBank NCBI_TAXID
AttachIDMappingBank ACC2ACC
set Lout [$::db eval "select id,acc from $BankSource where id in ('[join $Lacc ',']')"]
set Lres {}
if {$BankSource in [ListOfDBsWithDashAccess]} {
# i.e. from REFSEQ to GENE_NAME
#
# REFSEQ outputs accDash, convert them
set Lasd [lmap {id accD} $Lout {set accD}]
set Lasd [ConvertDashAcc2Acc $Lasd]
#
# from these acc, take the ones with good taxid
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lasd '),(']')) SELECT tax.acc,tax.id FROM tmp AS dbin, NCBI_TAXID AS tax WHERE tax.acc = dbin.acc and tax.id = '$taxid'"
set Ltax [$::db eval $cmd]
set Lacc {}
foreach {acc tx} $Ltax {
if {$tx == $taxid} {
set tax($acc) $tx
lappend Lacc $acc
}
}
# the Lasd and Lout lists have the same length
foreach {id accD} $Lout acc $Lasd {
if {[info exists tax($acc)]} {
lappend t($acc) $id
}
}
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc '),(']')) SELECT dbt.acc,dbt.id FROM tmp AS dbin, $BankTarget AS dbt WHERE dbt.acc = dbin.acc"
set Lout2 [$::db eval $cmd]
# create results list
parray t
foreach {acc id} $Lout2 {
puts "acc $acc id $id"
if {[info exists t($acc)]} {
lappend Lres [lindex $t($acc) 0] $id
}
}
puts "Lres $Lres"
} else {
# BankTaget takes DashAcc as input
# i.e. from GENE_NAME to REFSEQ
#
set Lacc1 [lmap {id acc} $Lout {set acc}]
# select acc with good taxid
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc1 '),(']')) SELECT tax.acc,tax.id FROM tmp AS dbin, NCBI_TAXID AS tax WHERE tax.acc = dbin.acc AND tax.id = '$taxid'"
set Ltax [$::db eval $cmd]
set LaccTax {}
foreach {acc id} $Ltax {
set tax($acc) $id
lappend LaccTax $acc
}
foreach {id acc} $Lout {
if {[info exists tax($acc)]} {
lappend t($acc) $id
}
}
# Convert acc to DashAcc
set LaccD [ConvertAcc2DashAcc $LaccTax Ta2d Td2a]
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3, ACC2ACC as db4 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db1.acc = db3.acc and db1.acc = db4.id"
# Now query the target database with Dash-acc
set cmd "WITH tmp(acc) AS (VALUES ('[join $LaccD '),(']')) SELECT dbt.acc,dbt.id FROM tmp AS dbin, $BankTarget AS dbt WHERE dbt.acc = dbin.acc"
set Lout2 [$::db eval $cmd]
# create results list with pairs :
# id bk1 - id bk2
foreach {accD id} $Lout2 {
if {[info exists Td2a($accD)]} {
set acc $Td2a($accD)
} else {
set acc $accD
}
foreach v $t($acc) {
lappend Lres $v $id
}
}
}
return $cmd
return $Lres
}
......@@ -87,7 +288,6 @@ proc AttachIDMappingBank {bank} {
proc OldIDMappingRequest {} {
if {0} {
puts "bank = $bank"
set db [DbIDMappingSQL $bank open]
if {! $jointure} {
if {$taxid eq ""} {
......@@ -96,7 +296,6 @@ proc OldIDMappingRequest {} {
} else {
set fdb [file join [idmHome] SQLDBS ACC2ACC.sql]
$db eval {attach database $fdb as ACC2ACC}
#lulu
set cmd "select db1.$source,db2.acc from $bank as db1, ACC2ACC as db2 where db1.$source in ('[join $Lacc ',']') and db1.acc = db2.id"
}
} else {
......@@ -129,7 +328,6 @@ proc OldIDMappingRequest {} {
} else {
# taxid = 1
if {! $checkDashAcc} {
# lulu
set fdb [file join [idmHome] SQLDBS NCBI_TAXID.sql]
$db eval {attach database $fdb as NCBI_TAXID}
set cmd "select db1.id, db2.id from $from as db1, $to as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db1.acc = db3.acc"
......@@ -139,7 +337,6 @@ proc OldIDMappingRequest {} {
} ; # fin taxid
}
puts "\n$cmd"
set Lout [$db eval $cmd]
$db close
}
......@@ -148,44 +345,83 @@ proc OldIDMappingRequest {} {
}
proc ConvertDashAcc2Acc {Lacc} {
set rep [$::db eval "select id,acc from ACC2ACC where id in ('[join $Lacc ',']')"]
foreach {id acc} $rep {
lappend t($id) $acc
}
set Lres {}
foreach acc $Lacc {
if {[info exists t($acc)]} {
foreach v $t($acc) {
lappend Lres $v
}
} else {
lappend Lres $acc
}
}
return $Lres
}
proc ConvertAcc2DashAcc {Lacc aTa2d aTd2a} {
upvar $aTa2d Ta2d $aTd2a Td2a
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc '),(']')) SELECT a2a.acc,a2a.id FROM tmp AS dbin, ACC2ACC AS a2a WHERE a2a.acc = dbin.acc"
set rep [$::db eval $cmd]
foreach {acc id} $rep {
lappend Ta2d($acc) $id
lappend Td2a($id) $acc
}
# now re-create access list
set Lnew {}
foreach acc $Lacc {
if {[info exists Ta2d($acc)]} {
lappend Lnew {*}$Ta2d($acc)
} else {
lappend Lnew $acc
}
}
return $Lnew
}
proc CreateIDMappingSQLRequest {Lacc bank BankSource BankTarget source target {taxid ""}} {
global db
set Join [expr {$BankSource ni {ACC ID ACC+ID} && $BankTarget ni {ACC ID}}]
set Taxo [expr {$taxid != ""}]
set Dash [expr {$BankSource in [ListOfDBsWithDashAccess] && $BankTarget ni [ListOfDBsWithDashAccess]}]
set Dash [expr {($BankSource in [ListOfDBsWithDashAccess] && $BankTarget ni [ListOfDBsWithDashAccess]) ^ ($BankSource ni [ListOfDBsWithDashAccess] && $BankTarget in [ListOfDBsWithDashAccess])}]
set db [DbIDMappingSQL $bank open]
if { ! $Join && ! $Taxo && ! $Dash } {
set cmd [jtd $Lacc $bank $BankTarget $source $target]
set Lout [jtd $Lacc $bank $bank $BankTarget $source $target]
}
if { ! $Join && ! $Taxo && $Dash } {
set cmd [jtD $Lacc $BankSource $BankTarget $source $target]
set Lout [jtD $Lacc $bank $BankSource $BankTarget $source $target]
}
if { ! $Join && $Taxo && ! $Dash } {
set cmd [jTd $Lacc $BankSource $BankTarget $source $Tax]
set Lout [jTd $Lacc $bank $BankSource $BankTarget $source $target $taxid]
}
if { ! $Join && $Taxo && $Dash } {
set cmd [jTD $Lacc $BankSource $BankTarget $source $Tax]
set Lout [jTD $Lacc $bank $BankSource $BankTarget $source $target $taxid]
}
if { $Join && ! $Taxo && ! $Dash } {
set cmd [Jtd $Lacc $BankSource $BankTarget $source $target]
set Lout [Jtd $Lacc $BankSource $BankTarget $source $target]
}
if { $Join && ! $Taxo && $Dash } {
set cmd [JtD $Lacc $BankSource $BankTarget $source $target]
set Lout [JtD $Lacc $BankSource $BankTarget $source $target]
}
if { $Join && $Taxo && ! $Dash } {
set cmd [JTd $Lacc $BankSource $BankTarget $source $target $taxid]
set Lout [JTd $Lacc $BankSource $BankTarget $source $target $taxid]
}
if { $Join && $Taxo && $Dash } {
set cmd [JTD $Lacc $BankSource $BankTarget $source $Tax]
set Lout [JTD $Lacc $BankSource $BankTarget $source $taxid]
}
puts "\n$cmd\n"
set Lout [$db eval $cmd]
$db close
return $Lout
}
......@@ -221,7 +457,7 @@ proc DASServers {} {
proc UniProtProteome {{pref 0}} {
# Taxonomy identifier of top node for query, e.g. 2 for Bacteria, 2157 for Archea, etc.
# (see http://www.uniprot.org/taxonomy)
# (se http://www.uniprot.org/taxonomy)
set top_node 131567
# Toggle this to 1 if you want reference instead of complete proteomes.
......@@ -915,9 +1151,12 @@ proc ListOfDBsWithDashAccess {} {
}
proc ListOfAllIDMappingBanks {} {
proc ListOfAllIDMappingBanks {{dir production}} {
if {! [info exists ::ListAllIDMappingBanks]} {
set ::ListAllIDMappingBanks [lsort [LesLignesDuFichier [file join [IDMappingDir] id_dbs.txt]]]
#set Ltmp [lsort [LesLignesDuFichier [file join [IDMappingDir] id_dbs.txt]]]
set Ltmp [lsort [glob [file join [IDMappingDir] .. $dir SQLDBS *.sql]]]
set ::ListAllIDMappingBanks [lmap bk $Ltmp {file tail [file rootname $bk]}]