set Lf [IDMapping_sql GENE_NAME GENE_SYNONYM zufsp 9606]
puts "\nLf $Lf\n"
cd /commun/bics/IDMapping/production/SQLDBS
puts "\nDeux etapes"
set Lh [IDMapping_sql GENE_NAME ACC zufsp 9606]
set Lh [lindex $Lh 0]
puts "[llength $Lh] $Lh"
set Lh [IDMapping_sql ACC GENE_SYNONYM $Lh]
puts "$Lh"
set taxid 9606
set Lid zufsp
set Lacc zufsp
set BankSource GENE_NAME
set BankTarget GENE_SYNONYM
set db [DbIDMappingSQL GENE_NAME open]
AttachIDMappingBank GENE_SYNONYM
AttachIDMappingBank NCBI_TAXID
#set cmd "WITH db1(id) AS (VALUES ('[join $Lid '),(']')) select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db2.acc = db3.acc"
#set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc '),(']')) SELECT tax.acc,tax.id FROM tmp AS dbin, NCBI_TAXID AS tax WHERE tax.acc = dbin.acc and tax.id = '$taxid'"
set cmd "select db1.id, db2.id from GENE_NAME as db1, GENE_SYNONYM as db2, NCBI_TAXID as db3 where db1.id in ('ZUFSP') and db1.acc = db2.acc and db2.acc = db3.acc and db3.id = '9606'"
#set cmd "select db1.id, db2.id from GENE_NAME as db1, GENE_SYNONYM as db2, NCBI_TAXID as db3 where db1.id='ZUFSP' and db1.acc = db2.acc and db2.acc = db3.acc and db3.id = '9606'"
proc jTd {Lacc bank BankSource BankTarget source target taxid} {
AttachIDMappingBank NCBI_TAXID
set cmd "select db1.$source,db1.$target from $bank as db1, NCBI_TAXID as db2 where db1.$source in ('[join $Lacc ',']') and db2.id='$taxid' and db1.acc = db2.acc"
proc jTD {Lacc bank BankSource BankTarget source target taxid} {
AttachIDMappingBank NCBI_TAXID
AttachIDMappingBank ACC2ACC
set cmd "select db1.$source,db3.acc from $bank as db1, NCBI_TAXID as db2, ACC2ACC as db3 where db1.$source in ('[join $Lacc ',']') and db2.id='$taxid' and db1.acc = db2.acc and db1.acc = db3.acc"
return $cmd
set Lres {}
if {$bank eq $BankTarget} {
# i.e. from ACC to REFSEQ
set Lnew [ConvertAcc2DashAcc $Lacc]
$::db eval {detach database ACC2ACC}
set Lout [jtD $Lnew $bank $BankSource $BankTarget $source $target]
set Ltax [$::db eval "select acc,id from NCBI_TAXID where acc in ('[join $Lacc ',']')"]
array set tax {}
foreach {ac id} $Ltax {
if {$id == $taxid} {
set tax($ac) $id
}
}
foreach {in out} $Lout {
if {[set i [string last "-" $in]] > 0} {
set in [string range $in 0 $i-1]
}
if {[info exis tax($in)]} {
lappend Lres $in $out
}
}
} else {
# i.e. from RESSEQ to ACC
#
# first, grab raw results from bank
set cmd "select id,acc from $bank where id in ('[join $Lacc ',']')"
set Lout [$::db eval $cmd]
# 2, go from accc-dashed to ass
set Ldash {}
foreach {id acc} $Lout {
lappend Ldash $acc
}
set Lasd [ConvertDashAcc2Acc $Ldash]
# 3, take only good taxid
set Ltax [$::db eval "select acc,id from NCBI_TAXID where acc in ('[join $Lasd ',']')"]
set cmd "select db1.id,db3.id from $BankSource as db1, ACC2ACC as db2, $BankTarget as db3 where db1.id in ('[join $Lacc ',']') and db1.acc = db2.acc and db2.id = db3.acc"
set Lres {}
set Lout [$::db eval "select id,acc from $BankSource where id in ('[join $Lacc ',']')"]
if {$BankSource in [ListOfDBsWithDashAccess]} {
# BankSource outputs dashed-acc
set Lasd {}
foreach {id accD} $Lout {
lappend Lasd $accD
}
# create list access sans dash (Lasd)
set Lasd [ConvertDashAcc2Acc $Lasd]
# create conversion array
array set t {}
foreach {id accDash} $Lout acc $Lasd {
lappend t($acc) $id
}
# request target db
set Lout2 [$::db eval "select acc,id from $BankTarget where acc in ('[join $Lasd ',']')"]
foreach {acc id} $Lout2 {
if {[info exists t($acc)]} {
foreach v $t($acc) {
lappend Lres $v $id
}
}
}
} else {
# BankTarget takes Dashed-acc as input
# i.e. from GENE_NAME to REFSEQ
set Lasd {}
foreach {id acc} $Lout {
lappend Lasd $acc
}
set Lasd [ConvertAcc2DashAcc $Lasd tmp tmp]
foreach {id accD} $Lout acc $Lasd {
lappend t($acc) $id
}
set Lout2 [$::db eval "select acc,id from $BankTarget where acc in ('[join $Lasd ',']')"]
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db1.acc = db2.acc and db2.acc = db3.acc and db3.id = '$taxid'"
set Lout [$::db eval $cmd]
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db1.acc = db3.acc"
set Lout [$::db eval "select id,acc from $BankSource where id in ('[join $Lacc ',']')"]
set Lres {}
if {$BankSource in [ListOfDBsWithDashAccess]} {
# i.e. from REFSEQ to GENE_NAME
#
# REFSEQ outputs accDash, convert them
set Lasd [lmap {id accD} $Lout {set accD}]
set Lasd [ConvertDashAcc2Acc $Lasd]
#
# from these acc, take the ones with good taxid
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lasd '),(']')) SELECT tax.acc,tax.id FROM tmp AS dbin, NCBI_TAXID AS tax WHERE tax.acc = dbin.acc and tax.id = '$taxid'"
set Ltax [$::db eval $cmd]
set Lacc {}
foreach {acc tx} $Ltax {
if {$tx == $taxid} {
set tax($acc) $tx
lappend Lacc $acc
}
}
# the Lasd and Lout lists have the same length
foreach {id accD} $Lout acc $Lasd {
if {[info exists tax($acc)]} {
lappend t($acc) $id
}
}
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc '),(']')) SELECT dbt.acc,dbt.id FROM tmp AS dbin, $BankTarget AS dbt WHERE dbt.acc = dbin.acc"
set Lout2 [$::db eval $cmd]
# create results list
parray t
foreach {acc id} $Lout2 {
puts "acc $acc id $id"
if {[info exists t($acc)]} {
lappend Lres [lindex $t($acc) 0] $id
}
}
puts "Lres $Lres"
} else {
# BankTaget takes DashAcc as input
# i.e. from GENE_NAME to REFSEQ
#
set Lacc1 [lmap {id acc} $Lout {set acc}]
# select acc with good taxid
set cmd "WITH tmp(acc) AS (VALUES ('[join $Lacc1 '),(']')) SELECT tax.acc,tax.id FROM tmp AS dbin, NCBI_TAXID AS tax WHERE tax.acc = dbin.acc AND tax.id = '$taxid'"
set Ltax [$::db eval $cmd]
set LaccTax {}
foreach {acc id} $Ltax {
set tax($acc) $id
lappend LaccTax $acc
}
foreach {id acc} $Lout {
if {[info exists tax($acc)]} {
lappend t($acc) $id
}
}
# Convert acc to DashAcc
set LaccD [ConvertAcc2DashAcc $LaccTax Ta2d Td2a]
set cmd "select db1.id, db2.id from $BankSource as db1, $BankTarget as db2, NCBI_TAXID as db3, ACC2ACC as db4 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db1.acc = db3.acc and db1.acc = db4.id"
# Now query the target database with Dash-acc
set cmd "WITH tmp(acc) AS (VALUES ('[join $LaccD '),(']')) SELECT dbt.acc,dbt.id FROM tmp AS dbin, $BankTarget AS dbt WHERE dbt.acc = dbin.acc"
set cmd "select db1.$source,db2.acc from $bank as db1, ACC2ACC as db2 where db1.$source in ('[join $Lacc ',']') and db1.acc = db2.id"
}
} else {
...
...
@@ -129,7 +328,6 @@ proc OldIDMappingRequest {} {
} else {
# taxid = 1
if {! $checkDashAcc} {
# lulu
set fdb [file join [idmHome] SQLDBS NCBI_TAXID.sql]
$db eval {attach database $fdb as NCBI_TAXID}
set cmd "select db1.id, db2.id from $from as db1, $to as db2, NCBI_TAXID as db3 where db1.id in ('[join $Lacc ',']') and db3.id = '$taxid' and db1.acc = db2.acc and db1.acc = db3.acc"
set Join [expr {$BankSource ni {ACC ID ACC+ID} && $BankTarget ni {ACC ID}}]
set Taxo [expr {$taxid != ""}]
set Dash [expr {$BankSource in [ListOfDBsWithDashAccess] && $BankTarget ni [ListOfDBsWithDashAccess]}]
set Dash [expr {($BankSource in [ListOfDBsWithDashAccess] && $BankTarget ni [ListOfDBsWithDashAccess]) ^ ($BankSource ni [ListOfDBsWithDashAccess] && $BankTarget in [ListOfDBsWithDashAccess])}]
set db [DbIDMappingSQL $bank open]
if { ! $Join && ! $Taxo && ! $Dash } {
set cmd [jtd $Lacc $bank $BankTarget $source $target]
set Lout [jtd $Lacc $bank $bank $BankTarget $source $target]
}
if { ! $Join && ! $Taxo && $Dash } {
set cmd [jtD $Lacc $BankSource $BankTarget $source $target]
set Lout [jtD $Lacc $bank $BankSource $BankTarget $source $target]
}
if { ! $Join && $Taxo && ! $Dash } {
set cmd [jTd $Lacc $BankSource $BankTarget $source $Tax]
set Lout [jTd $Lacc $bank $BankSource $BankTarget $source $target $taxid]
}
if { ! $Join && $Taxo && $Dash } {
set cmd [jTD $Lacc $BankSource $BankTarget $source $Tax]
set Lout [jTD $Lacc $bank $BankSource $BankTarget $source $target $taxid]
}
if { $Join && ! $Taxo && ! $Dash } {
set cmd[Jtd $Lacc $BankSource $BankTarget $source $target]
set Lout [Jtd $Lacc $BankSource $BankTarget $source $target]
}
if { $Join && ! $Taxo && $Dash } {
set cmd[JtD $Lacc $BankSource $BankTarget $source $target]
set Lout [JtD $Lacc $BankSource $BankTarget $source $target]
}
if { $Join && $Taxo && ! $Dash } {
set cmd[JTd $Lacc $BankSource $BankTarget $source $target $taxid]
set Lout [JTd $Lacc $BankSource $BankTarget $source $target $taxid]
}
if { $Join && $Taxo && $Dash } {
set cmd [JTD $Lacc $BankSource $BankTarget $source $Tax]
set Lout [JTD $Lacc $BankSource $BankTarget $source $taxid]
}
puts "\n$cmd\n"
set Lout [$db eval $cmd]
$db close
return $Lout
}
...
...
@@ -221,7 +457,7 @@ proc DASServers {} {
proc UniProtProteome {{pref 0}} {
# Taxonomy identifier of top node for query, e.g. 2 for Bacteria, 2157 for Archea, etc.
# (see http://www.uniprot.org/taxonomy)
# (se http://www.uniprot.org/taxonomy)
set top_node 131567
# Toggle this to 1 if you want reference instead of complete proteomes.