Commit 8f470499 authored by romanorac's avatar romanorac

bug fixes

parents f65999d7 c358ad19
from unicodedata import category
from django.core.management.base import BaseCommand, CommandError
from workflows.models import Category, AbstractWidget, AbstractInput, AbstractOutput, AbstractOption
from django.core import serializers
from optparse import make_option
import uuid
import os
import sys
from django.conf import settings
import json
def add_category(category,categories):
categories.add(category.pk)
if category.parent:
add_category(category.parent,categories)
def ensure_dir(directory):
if not os.path.exists(directory):
os.makedirs(directory)
def choice(choices,question="Your choice: "):
choice = None
while 1:
if not choice:
input_msg = ""
for i in range(0,len(choices)):
input_msg += "["+str(i)+"] "+str(choices[i])+"\n"
choice_number = raw_input(input_msg + question)
try:
choice = choices[int(choice_number)]
return choice
except:
sys.stderr.write("Error: Wrong choice.\n")
def serialize_widget(aw):
data = json.loads(serializers.serialize("json",[aw,]))[0]
if data.has_key('pk'):
data.pop('pk')
if data['fields'].has_key('user'):
data['fields'].pop('user')
if not data['fields']['category'] is None:
data['fields']['category'] = aw.category.uid
input_data = json.loads(serializers.serialize("json",aw.inputs.all()))
for i in input_data:
if i.has_key('pk'):
i.pop('pk')
i['fields']['widget']=aw.uid
output_data = json.loads(serializers.serialize("json",aw.outputs.all()))
for i in output_data:
if i.has_key('pk'):
i.pop('pk')
i['fields']['widget']=aw.uid
options_data = json.loads(serializers.serialize("json",AbstractOption.objects.filter(abstract_input__widget=aw)))
for o in options_data:
if o.has_key('pk'):
o.pop('pk')
o['fields']['abstract_input']=AbstractInput.objects.get(id=o['fields']['abstract_input']).uid
return [data,]+input_data+output_data+options_data
def serialize_category(c):
data = json.loads(serializers.serialize("json",[c,]))[0]
if data.has_key('pk'):
data.pop('pk')
if not data['fields']['parent'] is None:
c2 = Category.objects.get(id=data['fields']['parent'])
data['fields']['parent'] = c2.uid
if data['fields'].has_key('workflow'):
data['fields'].pop('workflow')
if data['fields'].has_key('user'):
data['fields'].pop('user')
return data
class Command(BaseCommand):
args = 'package_name'
help = 'Exports the package "package_name".'
def handle(self, *args, **options):
if (len(args) < 1):
raise CommandError('Argument "package_name" is required.')
package_name = args[0]
if 'workflows.'+package_name not in settings.INSTALLED_APPS:
raise CommandError("Package not found in INSTALLED_APPS.")
#here we check the integrity of the package
aws = AbstractWidget.objects.filter(package=package_name)
for aw in aws:
if aw.uid:
for bw in aws:
if bw.uid == aw.uid and bw.id != aw.id:
self.stdout.write("Found two widgets with the same UID. Please select a widget to assign new UID to.\n")
selected_widget = choice([aw,bw],"Select a widget: ")
selected_widget.set_uid(commit=True)
#first we check if package_data directory exists and make it if it doesn't
package_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../../'+package_name+"/package_data/")
ensure_dir(package_directory)
widgets_directory = os.path.join(package_directory,"widgets")
ensure_dir(widgets_directory)
categories_directory = os.path.join(package_directory,"categories")
ensure_dir(categories_directory)
self.stdout.write(" > Ensuring package directory for "+package_name+".\n")
categories = set()
self.stdout.write(" > Exporting widgets\n")
global_change = False
for aw in aws:
aw.update_uid()
add_category(aw.category,categories)
serialized_widget = serialize_widget(aw)
created = True
change = True
try:
widget_file = open(os.path.join(widgets_directory,aw.uid+'.json'),'r')
created = False
w_data = json.loads(widget_file.read())
widget_file.close()
if w_data == serialized_widget:
change = False
except:
created = True
change = True
if change:
global_change = True
if created:
self.stdout.write(" + Exporting widget "+str(aw)+"\n")
else:
self.stdout.write(" + Updating widget "+str(aw)+"\n")
widget_data = json.dumps(serialized_widget,indent=2)
widget_file = open(os.path.join(widgets_directory,aw.uid+'.json'),'w')
widget_file.write(widget_data)
widget_file.close()
if not global_change:
self.stdout.write(" No changes in the widgets detected!\n")
self.stdout.write(" > Exporting categories\n")
global_change = False
for category in categories:
c = Category.objects.get(id=category)
c.update_uid()
data = serialize_category(c)
created = True
change = True
try:
category_file = open(os.path.join(categories_directory,c.uid+'.json'),'r')
created = False
c_data = json.loads(category_file.read())
category_file.close()
if c_data == data:
change = False
except:
created = True
change = True
if change:
global_change = True
if created:
self.stdout.write(" + Exporting category "+str(c)+"\n")
else:
self.stdout.write(" + Updating category "+str(c)+"\n")
category_data = json.dumps(data,indent=2)
category_file = open(os.path.join(categories_directory,c.uid+'.json'),'w')
category_file.write(category_data)
category_file.close()
if not global_change:
self.stdout.write(" No changes in the categories detected!\n")
self.stdout.write('Thanks for using the new export command. You rock.\n')
from unicodedata import category
from django.core.management.base import BaseCommand, CommandError
from workflows.models import Category, AbstractWidget, AbstractInput, AbstractOutput, AbstractOption
from django.core import serializers
from optparse import make_option
import uuid
import os
import sys
from django.conf import settings
import json
from .export import serialize_category, serialize_widget
def parsewidgetdata(widget_data):
widget = None
inputs = []
outputs = []
options = []
for i in widget_data:
if i['model']=='workflows.abstractwidget':
widget = i
elif i['model']=='workflows.abstractinput':
inputs.append(i)
elif i['model']=='workflows.abstractoutput':
outputs.append(i)
elif i['model']=='workflows.abstractoption':
options.append(i)
else:
raise CommandError("Wrong data in widget files!")
return widget, inputs, outputs, options
class Command(BaseCommand):
args = 'package_name'
help = 'Imports the package "package_name".'
def handle(self, *args, **options):
if (len(args) < 1):
raise CommandError('Argument "package_name" is required.')
package_name = args[0]
if 'workflows.'+package_name not in settings.INSTALLED_APPS:
raise CommandError("Package not found in INSTALLED_APPS.")
package_directory = os.path.join(os.path.dirname(os.path.realpath(__file__)),'../../'+package_name+"/package_data/")
widgets_directory = os.path.join(package_directory,"widgets")
categories_directory = os.path.join(package_directory,"categories")
if not os.path.exists(package_directory) or not os.path.exists(widgets_directory) or not os.path.exists(categories_directory):
raise CommandError("Cannot find package data. Are you sure this package has been exported already?")
widget_files = os.listdir(widgets_directory)
category_files = os.listdir(categories_directory)
self.stdout.write(' > Importing categories\n')
global_change = False
for category_file in category_files:
cfilepath = os.path.join(categories_directory,category_file)
c_file = open(cfilepath,'r')
c_data = json.loads(c_file.read())
c_file.close()
uid = c_data['fields']['uid']
created = False
try:
c = Category.objects.get(uid=uid)
except Category.DoesNotExist:
created = True
c = Category(uid=uid)
old_c_data = serialize_category(c)
if old_c_data != c_data:
global_change = True
if created:
self.stdout.write(' + Creating category '+str(c_data['fields']['name'])+'\n')
else:
self.stdout.write(' + Updating category '+str(c_data['fields']['name'])+'\n')
for field in c_data['fields'].keys():
if field != 'parent':
setattr(c,field,c_data['fields'][field])
else:
parent = None
if c_data['fields']['parent'] != None:
try:
parent = Category.objects.get(uid=c_data['fields']['parent'])
except Category.DoesNotExist:
parent = Category(uid=c_data['fields']['parent'],name="Temporary category name")
parent.save()
c.parent = parent
c.save()
if not global_change:
self.stdout.write(" No changes detected in the categories.\n")
global_change = False
self.stdout.write(' > Importing widgets\n')
for widget_file in widget_files:
wfilepath = os.path.join(widgets_directory,widget_file)
w_file = open(wfilepath,'r')
w_data = json.loads(w_file.read())
w_file.close()
widget, inputs, outputs, options = parsewidgetdata(w_data)
created = False
try:
aw = AbstractWidget.objects.get(uid=widget['fields']['uid'],package=package_name)
except AbstractWidget.DoesNotExist:
aw = AbstractWidget(uid=widget['fields']['uid'],package=package_name)
created = True
if w_data != serialize_widget(aw):
global_change = True
if created:
self.stdout.write(' + Creating widget '+str(widget['fields']['name'])+'\n')
else:
self.stdout.write(' + Updating widget '+str(widget['fields']['name'])+'\n')
for field in widget['fields'].keys():
if field != 'category':
setattr(aw,field,widget['fields'][field])
else:
aw.category = Category.objects.get(uid=widget['fields']['category'])
aw.save()
for inp in inputs:
try:
i = AbstractInput.objects.get(uid=inp['fields']['uid'])
except AbstractInput.DoesNotExist:
i = AbstractInput(uid=inp['fields']['uid'])
for field in inp['fields'].keys():
if field != 'widget':
setattr(i,field,inp['fields'][field])
i.widget = aw
i.save()
for out in outputs:
try:
o = AbstractOutput.objects.get(uid=out['fields']['uid'])
except AbstractOutput.DoesNotExist:
o = AbstractOutput(uid=out['fields']['uid'])
for field in out['fields'].keys():
if field != 'widget':
setattr(o,field,out['fields'][field])
o.widget = aw
o.save()
for option in options:
try:
o = AbstractOption.objects.get(uid=option['fields']['uid'])
except AbstractOption.DoesNotExist:
o = AbstractOption(uid=option['fields']['uid'])
for field in option['fields'].keys():
if field != 'abstract_input':
setattr(o,field,option['fields'][field])
else:
o.abstract_input = AbstractInput.objects.get(uid=option['fields']['abstract_input'])
o.save()
if not global_change:
self.stdout.write(" No changes detected in the widgets.\n")
self.stdout.write('Thanks for using the new import command. You rock.\n')
......@@ -38,6 +38,14 @@ class Category(models.Model):
uid = models.CharField(max_length=250,blank=True,default='')
def update_uid(self):
import uuid
if self.uid == '' or self.uid is None:
self.uid = uuid.uuid4()
self.save()
if self.parent:
self.parent.update_uid()
class Meta:
verbose_name_plural = "categories"
ordering = ('order','name',)
......@@ -429,6 +437,22 @@ class AbstractWidget(models.Model):
if commit:
o.save()
def update_uid(self):
import uuid
if self.uid == '' or self.uid is None:
self.uid = uuid.uuid4()
self.save()
for i in self.inputs.filter(uid=''):
i.uid = uuid.uuid4()
i.save()
for option in i.options.filter(uid=''):
option.uid = uuid.uuid4()
option.save()
for o in self.outputs.filter(uid=''):
o.uid = uuid.uuid4()
o.save()
self.category.update_uid()
def __unicode__(self):
return unicode(self.name)
......
This diff is collapsed.
......@@ -3,12 +3,7 @@ import os.path
import base64
from services.webservice import WebService
from workflows.security import safeOpen
import requests
import json
import re
import itertools
webservices_url = "http://vihar.ijs.si:8104"
def merge_sentences(input_dict):
"""
......@@ -52,70 +47,6 @@ def load_corpus(input_dict):
response = ws.client.parseFile(fileName=fname, inFile=data)
return {'corpus': response['parsedFile']}
def load_corpus2(input_dict):
'''
Parses an input file and encodes it in base 64.
'''
if input_dict[u"text"] == "":
f = safeOpen(input_dict['file'])
fname = os.path.basename(input_dict['file'])
data = base64.b64encode(f.read())
else:
fname = "input_string.txt"
data = base64.b64encode(input_dict[u"text"].strip())
#define web service
webservice_url = webservices_url + "/parseFile"
params = {"filename": fname, "text": data} #set params
#call web service
resp = requests.post(webservice_url, params=params)
content = json.loads(resp.content)[u'parseFileResponse'][u'parseFileResult']
if content[u"error"] != "":
raise Exception(content[u"error"])
else:
return {'corpus': content[u"resp"]}
def load_tagged_corpus(input_dict):
"""
Loads TEI file, which is output of totrtale
"""
f = safeOpen(input_dict['file'])
#fname = os.path.basename(input_dict['file'])
#subprocess.call(["java -jar jing.jar tei_imp.rng " + fname + " >" + "out.txt"],shell=True)
data = f.read()
return {'annotations': data}
def nlp_totrtale2(input_dict):
'''
Calls the totrtale web service.
'''
corpus = input_dict['corpus']
lang = input_dict['lang']
xml = input_dict['xml']
postprocess = input_dict['postprocess']
bohoricica = input_dict['bohoricica']
antique = input_dict['antique']
#define web service
webservice_url = webservices_url + "/runToTrTaLe"
params = {"text":corpus, "language": lang, "postProcessing":postprocess, "bohoricica":bohoricica, "antique": antique, "outputAsXML":xml}
import time
start = time.time()
response = requests.post(webservice_url, params = params)
content = json.loads(response.content)
if u'runToTrTaLeResponse' in content:
content = content[u'runToTrTaLeResponse'][u'runToTrTaLeResult']
else:
content = content[u"error"]
end = time.time()
print "ToTrTale execution time was ", end - start
return {'annotations': content[u'resp']}
def nlp_totrtale(input_dict):
'''
......@@ -149,10 +80,6 @@ def nlp_term_extraction(input_dict):
annotations = input_dict['annotations']
lang = input_dict['lang']
wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8095/totale?wsdl')
if '<TEI xmlns="http://www.tei-c.org/ns/1.0">' in annotations:
annotations = XMLtoTEI(annotations)
ws = WebService(wsdl, 60000)
response = ws.client.TermExtraction(corpus=annotations, lang=lang,
threshold=0)
......@@ -166,10 +93,6 @@ def nlp_def_extraction_patterns(input_dict):
annotations = input_dict['annotations']
lang = input_dict['lang']
wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099')
if '<TEI xmlns="http://www.tei-c.org/ns/1.0">' in annotations:
annotations = XMLtoTEI(annotations)
ws = WebService(wsdl, 60000)
pattern = input_dict['pattern']
response = ws.client.GlossaryExtractionByPatterns(corpus=annotations,
......@@ -192,10 +115,6 @@ def nlp_def_extraction_terms(input_dict):
multiword_term = input_dict['multiword_term']
num_multiterms = input_dict['num_multiterms']
term_beginning = input_dict['term_beginning']
if '<TEI xmlns="http://www.tei-c.org/ns/1.0">' in annotations:
annotations = XMLtoTEI(annotations)
ws = WebService(wsdl, 60000)
response = ws.client.GlossaryExtractionByTerms(corpus=annotations,
candidates=term_candidates, lang=lang, nominatives=nominatives,
......@@ -212,36 +131,6 @@ def nlp_def_extraction_wnet(input_dict):
annotations = input_dict['annotations']
lang = input_dict['lang']
wsdl = input_dict.get('wsdl', 'http://vihar.ijs.si:8099')
if '<TEI xmlns="http://www.tei-c.org/ns/1.0">' in annotations:
annotations = XMLtoTEI(annotations)
ws = WebService(wsdl, 60000)
response = ws.client.GlossaryExtractionByWnet(corpus=annotations, lang=lang)
return {'sentences': response['candidates']}
def XMLtoTEI(text):
mask1 = ["\tTOK\t", "\t", "\t\n"]
pattern1 = "<w lemma=\"(?P<lemma>.*?)\" ana=\"(?P<ana>.*?)\">(?P<value>.*?)</w>"
pattern2 = "<title>(.*?)</title>"
pattern3 = "<pc>(.*?)</pc>"
newText=[]
for l in text.splitlines():
if "<w" in l:
match = [m.group("value", "lemma", "ana") for m in re.finditer(pattern1, l)][0]
newText.append(''.join(itertools.chain.from_iterable(zip(match, mask1))).decode("utf8"))
elif "</s>" in l:
newText.append("\t\t<S/>\t\n")
elif "<pc>" in l:
value = re.findall(pattern3, l)[0]
if value == ".":
newText.append(value+"\t\tPUN_TERM\t\n")
else:
newText.append(value+"\t\tPUN\t\n")
elif "<title>" in l:
title = re.findall(pattern2, l)[0]
newText.append("<TEXT title=" + title + ">\t\n")
elif "</body>" in l:
newText.append("</TEXT>\t\n")
return "".join(newText)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment