Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
clowdflows
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Alain Shakour
clowdflows
Commits
bd922c6a
Commit
bd922c6a
authored
Jul 16, 2013
by
vpodpecan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
updated bio3graph library and package data
parent
3cbea1e5
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
1222 additions
and
485 deletions
+1222
-485
workflows/bio3graph/db/package_data.json
workflows/bio3graph/db/package_data.json
+1061
-482
workflows/bio3graph/library.py
workflows/bio3graph/library.py
+155
-1
workflows/bio3graph/triplet_extractor/tripletExtraction.py
workflows/bio3graph/triplet_extractor/tripletExtraction.py
+6
-2
No files found.
workflows/bio3graph/db/package_data.json
View file @
bd922c6a
This diff is collapsed.
Click to expand it.
workflows/bio3graph/library.py
View file @
bd922c6a
...
...
@@ -5,7 +5,7 @@ Bio3graph triplet extractor.
"""
def
bio3graph_create_document
(
input_dict
):
def
bio3graph_create_document
_from_file
(
input_dict
):
from
triplet_extractor
import
data_structures
as
ds
fn
=
input_dict
[
'docfile'
]
doc
=
ds
.
Document
()
...
...
@@ -13,6 +13,16 @@ def bio3graph_create_document(input_dict):
return
{
'document'
:
doc
}
def
bio3graph_create_document_from_string
(
input_dict
):
from
triplet_extractor
import
data_structures
as
ds
from
unidecode
import
unidecode
docstr
=
input_dict
[
'docstr'
]
doc
=
ds
.
Document
()
doc
.
loadString
(
unidecode
(
docstr
))
return
{
'document'
:
doc
}
def
bio3graph_split_sentences
(
input_dict
):
from
triplet_extractor
import
data_structures
as
ds
doc
=
input_dict
[
'document'
]
...
...
@@ -62,6 +72,29 @@ def bio3graph_build_default_vocabulary(input_dict):
return
{
'vocabulary'
:
voc
}
def
bio3graph_build_default_vocabulary_custom_compounds
(
input_dict
):
from
triplet_extractor
import
tripletExtraction
as
te
from
os.path
import
normpath
,
join
,
dirname
from
StringIO
import
StringIO
comp
=
input_dict
[
'compounds'
]
dname
=
normpath
(
dirname
(
__file__
))
voc
=
te
.
Vocabulary
()
s
=
StringIO
()
s
.
write
(
comp
)
s
.
flush
()
voc
.
loadCompounds_file
(
s
)
voc
.
loadPredicates_files
(
activationFname
=
join
(
dname
,
'triplet_extractor/vocabulary/activation.lst'
),
activations_rotate
=
join
(
dname
,
'triplet_extractor/vocabulary/activation_rotate.lst'
),
inhibitionFname
=
join
(
dname
,
'triplet_extractor/vocabulary/inhibition.lst'
),
bindingFname
=
join
(
dname
,
'triplet_extractor/vocabulary/binding.lst'
),
activationFname_passive
=
join
(
dname
,
'triplet_extractor/vocabulary/activation_pas.lst'
),
inhibitionFname_passive
=
join
(
dname
,
'triplet_extractor/vocabulary/inhibition_pas.lst'
),
bindingFname_passive
=
join
(
dname
,
'triplet_extractor/vocabulary/binding_pas.lst'
))
return
{
'vocabulary'
:
voc
}
def
bio3graph_extract_triplets
(
input_dict
):
from
triplet_extractor
import
tripletExtraction
as
te
voc
=
input_dict
[
'vocabulary'
]
...
...
@@ -152,3 +185,124 @@ def bio3graph_reset_colours(input_dict):
nwx
=
copy
.
deepcopy
(
input_dict
[
'network'
])
gop
.
reset_edge_colors
(
nwx
)
return
{
'network'
:
nwx
}
def
bio3graph_search_pubmed
(
input_dict
):
from
NCBI
import
NCBI_Extractor
q
=
input_dict
[
'query'
]
if
not
q
:
raise
ValueError
(
'Empty PubMed query!'
)
nhits
=
input_dict
[
'maxHits'
]
maxHits
=
int
(
nhits
)
if
nhits
else
0
ex
=
NCBI_Extractor
()
ids
=
ex
.
query
(
q
,
maxHits
=
maxHits
)
return
{
'pmids'
:
ids
}
def
bio3graph_filter_open_access
(
input_dict
):
import
cPickle
from
os.path
import
normpath
,
join
,
dirname
oa
=
cPickle
.
load
(
open
(
normpath
(
join
(
dirname
(
__file__
),
'data/OA_dict.pickle'
)),
'rb'
))
ids
=
input_dict
[
'ids'
]
result
=
filter
(
lambda
(
x
):
True
if
x
in
oa
else
False
,
ids
)
return
{
'oa_ids'
:
result
}
def
bio3graph_get_xmls
(
input_dict
):
from
NCBI
import
NCBI_Extractor
ids
=
input_dict
[
'id_list'
]
if
not
isinstance
(
ids
,
list
):
ids
=
list
(
ids
)
result
=
[]
a
=
NCBI_Extractor
()
for
did
in
ids
:
result
.
append
(
a
.
getXML
(
did
))
return
{
'xmls'
:
result
}
def
bio3graph_get_fulltexts
(
input_dict
):
from
NCBI
import
NCBI_Extractor
ids
=
input_dict
[
'id_list'
]
if
not
isinstance
(
ids
,
list
):
ids
=
list
(
ids
)
result
=
[]
a
=
NCBI_Extractor
()
for
did
in
ids
:
doc
=
a
.
getFulltext
(
did
)
ft
=
'%s
\n
%s
\n
%s
\n
'
%
(
doc
.
title
,
doc
.
abstract
,
doc
.
body
)
result
.
append
(
ft
)
return
{
'fulltexts'
:
result
}
def
bio3graph_map_entrez_to_ncbi_symbol
(
input_dict
):
import
cPickle
from
os.path
import
normpath
,
join
,
dirname
e2symb
=
cPickle
.
load
(
open
(
normpath
(
join
(
dirname
(
__file__
),
'data/entrez2symbol.pickle'
)),
'rb'
))
glist
=
input_dict
[
'genes'
]
result
=
[]
for
g
in
glist
:
g
=
g
.
replace
(
'EntrezGene:'
,
''
)
g
=
int
(
g
)
symb
=
e2symb
.
get
(
g
)
if
symb
:
result
.
append
(
symb
)
return
{
'gene_symbols'
:
result
}
def
bio3graph_get_gene_synonyms_from_GPSDB
(
input_dict
):
from
GPSDB_synonyms
import
Synonym_extractor
glist
=
input_dict
[
'gene_symbols'
]
a
=
Synonym_extractor
()
result
=
a
.
get_geneset_synonyms
(
glist
)
return
{
'gene_synonyms'
:
result
}
def
bio3graph_construct_compounds_from_gene_synonyms
(
input_dict
):
import
csv
from
StringIO
import
StringIO
syns
=
input_dict
[
'gene_synonyms'
]
s
=
StringIO
()
w
=
csv
.
writer
(
s
)
for
g
in
syns
:
elts
=
[
g
]
+
syns
[
g
]
w
.
writerow
(
elts
)
s
.
flush
()
result
=
s
.
getvalue
()
return
{
'compounds_csv'
:
result
}
workflows/bio3graph/triplet_extractor/tripletExtraction.py
View file @
bd922c6a
...
...
@@ -49,8 +49,11 @@ def readEntitiesLnDoc_csv(fname):
Each line is either empty, or contains synonym(s) for some entity.
Returns a dictionary where keys are base names and values are synonyms.
'''
if
isinstance
(
fname
,
StringIO
.
StringIO
):
reader
=
csv
.
reader
(
fname
,
skipinitialspace
=
True
)
else
:
reader
=
csv
.
reader
(
open
(
fname
),
skipinitialspace
=
True
)
reader
=
csv
.
reader
(
open
(
fname
),
skipinitialspace
=
True
)
entities
=
{}
for
row
in
reader
:
if
len
(
row
)
==
0
:
...
...
@@ -590,7 +593,8 @@ class Vocabulary(object):
def
loadCompounds_stringIO
(
self
,
compString
):
compounds
=
readEntitiesLnDoc_stringIO
(
compString
)
# compounds = readEntitiesLnDoc_stringIO(com
compounds
=
readEntitiesLnDoc_csv
(
compString
)
self
.
_buildCompoundsStructures
(
compounds
)
#end
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment