Commit 77ba410a authored by Anze Vavpetic's avatar Anze Vavpetic

added widgets for calling totrtale and term extraction; reorganized the NLP categories

parent 838db641
This diff is collapsed.
......@@ -4,6 +4,7 @@ import base64
from services.webservice import WebService
from workflows.security import safeOpen
def merge_sentences(input_dict):
"""
Merges the input sentences in XML according to the specified method.
......@@ -27,7 +28,8 @@ def merge_sentences(input_dict):
elif method == 'intersection_two':
for ids_alt in ids_list:
merged_sen = merged_sen | (ids_alt & ids)
return {'merged_sentences' : nlp.sentences_to_xml([id_to_sent[sid] for sid in merged_sen])}
return {'merged_sentences': nlp.sentences_to_xml([id_to_sent[sid] for sid in merged_sen])}
def load_corpus(input_dict):
'''
......@@ -35,7 +37,66 @@ def load_corpus(input_dict):
'''
f = safeOpen(input_dict['file'])
fname = os.path.basename(input_dict['file'])
wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
data = base64.b64encode(f.read())
ws = WebService('http://vihar.ijs.si:8095/totale?wsdl', 60000)
ws = WebService(wsdl, 60000)
response = ws.client.parseFile(fileName=fname, inFile=data)
return {'corpus' : response['parsedFile']}
return {'corpus': response['parsedFile']}
def nlp_totrtale(input_dict):
'''
Calls the totrtale web service.
'''
corpus = input_dict['corpus']
lang = input_dict['lang']
wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
xml = input_dict['xml'] == 'true'
postprocess = input_dict['postprocess'] == 'true'
bohoricica = input_dict['bohoricica'] == 'true'
antique = input_dict['antique'] == 'true'
print lang, wsdl, xml, postprocess, bohoricica, antique
ws = WebService(wsdl, 60000)
response = ws.client.runTotale(inFile=corpus, language=lang,
postProcessing=postprocess,
bohoricica=bohoricica,
antiqueSlovenian=antique,
outputAsXML=xml)
errors = response['error']
if errors:
# todo report this as warning
print errors
return {'annotations': response['annotatedFile']}
def nlp_term_extraction(input_dict):
'''
Term extraction from totrtale annotations.
'''
annotations = input_dict['annotations']
lang = input_dict['lang']
wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
ws = WebService(wsdl, 60000)
response = ws.client.TermExtraction(corpus=annotations, lang=lang, threshold=0)
return {'candidates': response['candidates']}
def nlp_def_extraction_terms(input_dict):
'''
Definition extraction using terms.
'''
pass
def nlp_def_extraction_patterns(input_dict):
'''
Definition extraction using pre-defined patterns.
'''
pass
def nlp_def_extraction_wnet(input_dict):
'''
Definition extraction using WordNet.
'''
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment