Commit bd922c6a authored by vpodpecan's avatar vpodpecan
Browse files

updated bio3graph library and package data

parent 3cbea1e5
This diff is collapsed.
...@@ -5,7 +5,7 @@ Bio3graph triplet extractor. ...@@ -5,7 +5,7 @@ Bio3graph triplet extractor.
""" """
def bio3graph_create_document(input_dict): def bio3graph_create_document_from_file(input_dict):
from triplet_extractor import data_structures as ds from triplet_extractor import data_structures as ds
fn = input_dict['docfile'] fn = input_dict['docfile']
doc = ds.Document() doc = ds.Document()
...@@ -13,6 +13,16 @@ def bio3graph_create_document(input_dict): ...@@ -13,6 +13,16 @@ def bio3graph_create_document(input_dict):
return {'document': doc} return {'document': doc}
def bio3graph_create_document_from_string(input_dict):
from triplet_extractor import data_structures as ds
from unidecode import unidecode
docstr = input_dict['docstr']
doc = ds.Document()
doc.loadString(unidecode(docstr))
return {'document': doc}
def bio3graph_split_sentences(input_dict): def bio3graph_split_sentences(input_dict):
from triplet_extractor import data_structures as ds from triplet_extractor import data_structures as ds
doc = input_dict['document'] doc = input_dict['document']
...@@ -62,6 +72,29 @@ def bio3graph_build_default_vocabulary(input_dict): ...@@ -62,6 +72,29 @@ def bio3graph_build_default_vocabulary(input_dict):
return {'vocabulary': voc} return {'vocabulary': voc}
def bio3graph_build_default_vocabulary_custom_compounds(input_dict):
from triplet_extractor import tripletExtraction as te
from os.path import normpath, join, dirname
from StringIO import StringIO
comp = input_dict['compounds']
dname = normpath(dirname(__file__))
voc = te.Vocabulary()
s = StringIO()
s.write(comp)
s.flush()
voc.loadCompounds_file(s)
voc.loadPredicates_files(activationFname=join(dname, 'triplet_extractor/vocabulary/activation.lst'),
activations_rotate=join(dname, 'triplet_extractor/vocabulary/activation_rotate.lst'),
inhibitionFname=join(dname, 'triplet_extractor/vocabulary/inhibition.lst'),
bindingFname=join(dname, 'triplet_extractor/vocabulary/binding.lst'),
activationFname_passive=join(dname, 'triplet_extractor/vocabulary/activation_pas.lst'),
inhibitionFname_passive=join(dname, 'triplet_extractor/vocabulary/inhibition_pas.lst'),
bindingFname_passive=join(dname, 'triplet_extractor/vocabulary/binding_pas.lst'))
return {'vocabulary': voc}
def bio3graph_extract_triplets(input_dict): def bio3graph_extract_triplets(input_dict):
from triplet_extractor import tripletExtraction as te from triplet_extractor import tripletExtraction as te
voc = input_dict['vocabulary'] voc = input_dict['vocabulary']
...@@ -152,3 +185,124 @@ def bio3graph_reset_colours(input_dict): ...@@ -152,3 +185,124 @@ def bio3graph_reset_colours(input_dict):
nwx = copy.deepcopy(input_dict['network']) nwx = copy.deepcopy(input_dict['network'])
gop.reset_edge_colors(nwx) gop.reset_edge_colors(nwx)
return {'network': nwx} return {'network': nwx}
def bio3graph_search_pubmed(input_dict):
from NCBI import NCBI_Extractor
q = input_dict['query']
if not q:
raise ValueError('Empty PubMed query!')
nhits = input_dict['maxHits']
maxHits = int(nhits) if nhits else 0
ex = NCBI_Extractor()
ids = ex.query(q, maxHits=maxHits)
return {'pmids': ids}
def bio3graph_filter_open_access(input_dict):
import cPickle
from os.path import normpath, join, dirname
oa = cPickle.load(open(normpath(join(dirname(__file__), 'data/OA_dict.pickle')), 'rb'))
ids = input_dict['ids']
result = filter(lambda(x): True if x in oa else False, ids)
return {'oa_ids': result}
def bio3graph_get_xmls(input_dict):
from NCBI import NCBI_Extractor
ids = input_dict['id_list']
if not isinstance(ids, list):
ids = list(ids)
result = []
a = NCBI_Extractor()
for did in ids:
result.append(a.getXML(did))
return {'xmls': result}
def bio3graph_get_fulltexts(input_dict):
from NCBI import NCBI_Extractor
ids = input_dict['id_list']
if not isinstance(ids, list):
ids = list(ids)
result = []
a = NCBI_Extractor()
for did in ids:
doc = a.getFulltext(did)
ft = '%s\n%s\n%s\n' % (doc.title, doc.abstract, doc.body)
result.append(ft)
return {'fulltexts': result}
def bio3graph_map_entrez_to_ncbi_symbol(input_dict):
import cPickle
from os.path import normpath, join, dirname
e2symb = cPickle.load(open(normpath(join(dirname(__file__), 'data/entrez2symbol.pickle')), 'rb'))
glist = input_dict['genes']
result = []
for g in glist:
g = g.replace('EntrezGene:', '')
g = int(g)
symb = e2symb.get(g)
if symb:
result.append(symb)
return {'gene_symbols': result}
def bio3graph_get_gene_synonyms_from_GPSDB(input_dict):
from GPSDB_synonyms import Synonym_extractor
glist = input_dict['gene_symbols']
a = Synonym_extractor()
result = a.get_geneset_synonyms(glist)
return {'gene_synonyms': result}
def bio3graph_construct_compounds_from_gene_synonyms(input_dict):
import csv
from StringIO import StringIO
syns = input_dict['gene_synonyms']
s = StringIO()
w = csv.writer(s)
for g in syns:
elts = [g] + syns[g]
w.writerow(elts)
s.flush()
result = s.getvalue()
return {'compounds_csv': result}
...@@ -49,8 +49,11 @@ def readEntitiesLnDoc_csv(fname): ...@@ -49,8 +49,11 @@ def readEntitiesLnDoc_csv(fname):
Each line is either empty, or contains synonym(s) for some entity. Each line is either empty, or contains synonym(s) for some entity.
Returns a dictionary where keys are base names and values are synonyms. Returns a dictionary where keys are base names and values are synonyms.
''' '''
if isinstance(fname, StringIO.StringIO):
reader = csv.reader(fname, skipinitialspace=True)
else:
reader = csv.reader(open(fname), skipinitialspace=True)
reader = csv.reader(open(fname), skipinitialspace=True)
entities = {} entities = {}
for row in reader: for row in reader:
if len(row) == 0: if len(row) == 0:
...@@ -590,7 +593,8 @@ class Vocabulary(object): ...@@ -590,7 +593,8 @@ class Vocabulary(object):
def loadCompounds_stringIO(self, compString): def loadCompounds_stringIO(self, compString):
compounds = readEntitiesLnDoc_stringIO(compString) # compounds = readEntitiesLnDoc_stringIO(com
compounds = readEntitiesLnDoc_csv(compString)
self._buildCompoundsStructures(compounds) self._buildCompoundsStructures(compounds)
#end #end
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment