Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
clowdflows
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Alain Shakour
clowdflows
Commits
8f470499
Commit
8f470499
authored
Feb 10, 2015
by
romanorac
Browse files
Options
Browse Files
Download
Plain Diff
bug fixes
parents
f65999d7
c358ad19
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
1491 additions
and
1261 deletions
+1491
-1261
workflows/management/commands/export.py
workflows/management/commands/export.py
+181
-0
workflows/management/commands/import.py
workflows/management/commands/import.py
+159
-0
workflows/models.py
workflows/models.py
+24
-0
workflows/nlp/db/package_data.json
workflows/nlp/db/package_data.json
+1127
-1150
workflows/nlp/library.py
workflows/nlp/library.py
+0
-111
No files found.
workflows/management/commands/export.py
0 → 100755
View file @
8f470499
from
unicodedata
import
category
from
django.core.management.base
import
BaseCommand
,
CommandError
from
workflows.models
import
Category
,
AbstractWidget
,
AbstractInput
,
AbstractOutput
,
AbstractOption
from
django.core
import
serializers
from
optparse
import
make_option
import
uuid
import
os
import
sys
from
django.conf
import
settings
import
json
def
add_category
(
category
,
categories
):
categories
.
add
(
category
.
pk
)
if
category
.
parent
:
add_category
(
category
.
parent
,
categories
)
def
ensure_dir
(
directory
):
if
not
os
.
path
.
exists
(
directory
):
os
.
makedirs
(
directory
)
def
choice
(
choices
,
question
=
"Your choice: "
):
choice
=
None
while
1
:
if
not
choice
:
input_msg
=
""
for
i
in
range
(
0
,
len
(
choices
)):
input_msg
+=
"["
+
str
(
i
)
+
"] "
+
str
(
choices
[
i
])
+
"
\n
"
choice_number
=
raw_input
(
input_msg
+
question
)
try
:
choice
=
choices
[
int
(
choice_number
)]
return
choice
except
:
sys
.
stderr
.
write
(
"Error: Wrong choice.
\n
"
)
def
serialize_widget
(
aw
):
data
=
json
.
loads
(
serializers
.
serialize
(
"json"
,[
aw
,]))[
0
]
if
data
.
has_key
(
'pk'
):
data
.
pop
(
'pk'
)
if
data
[
'fields'
].
has_key
(
'user'
):
data
[
'fields'
].
pop
(
'user'
)
if
not
data
[
'fields'
][
'category'
]
is
None
:
data
[
'fields'
][
'category'
]
=
aw
.
category
.
uid
input_data
=
json
.
loads
(
serializers
.
serialize
(
"json"
,
aw
.
inputs
.
all
()))
for
i
in
input_data
:
if
i
.
has_key
(
'pk'
):
i
.
pop
(
'pk'
)
i
[
'fields'
][
'widget'
]
=
aw
.
uid
output_data
=
json
.
loads
(
serializers
.
serialize
(
"json"
,
aw
.
outputs
.
all
()))
for
i
in
output_data
:
if
i
.
has_key
(
'pk'
):
i
.
pop
(
'pk'
)
i
[
'fields'
][
'widget'
]
=
aw
.
uid
options_data
=
json
.
loads
(
serializers
.
serialize
(
"json"
,
AbstractOption
.
objects
.
filter
(
abstract_input__widget
=
aw
)))
for
o
in
options_data
:
if
o
.
has_key
(
'pk'
):
o
.
pop
(
'pk'
)
o
[
'fields'
][
'abstract_input'
]
=
AbstractInput
.
objects
.
get
(
id
=
o
[
'fields'
][
'abstract_input'
]).
uid
return
[
data
,]
+
input_data
+
output_data
+
options_data
def
serialize_category
(
c
):
data
=
json
.
loads
(
serializers
.
serialize
(
"json"
,[
c
,]))[
0
]
if
data
.
has_key
(
'pk'
):
data
.
pop
(
'pk'
)
if
not
data
[
'fields'
][
'parent'
]
is
None
:
c2
=
Category
.
objects
.
get
(
id
=
data
[
'fields'
][
'parent'
])
data
[
'fields'
][
'parent'
]
=
c2
.
uid
if
data
[
'fields'
].
has_key
(
'workflow'
):
data
[
'fields'
].
pop
(
'workflow'
)
if
data
[
'fields'
].
has_key
(
'user'
):
data
[
'fields'
].
pop
(
'user'
)
return
data
class
Command
(
BaseCommand
):
args
=
'package_name'
help
=
'Exports the package "package_name".'
def
handle
(
self
,
*
args
,
**
options
):
if
(
len
(
args
)
<
1
):
raise
CommandError
(
'Argument "package_name" is required.'
)
package_name
=
args
[
0
]
if
'workflows.'
+
package_name
not
in
settings
.
INSTALLED_APPS
:
raise
CommandError
(
"Package not found in INSTALLED_APPS."
)
#here we check the integrity of the package
aws
=
AbstractWidget
.
objects
.
filter
(
package
=
package_name
)
for
aw
in
aws
:
if
aw
.
uid
:
for
bw
in
aws
:
if
bw
.
uid
==
aw
.
uid
and
bw
.
id
!=
aw
.
id
:
self
.
stdout
.
write
(
"Found two widgets with the same UID. Please select a widget to assign new UID to.
\n
"
)
selected_widget
=
choice
([
aw
,
bw
],
"Select a widget: "
)
selected_widget
.
set_uid
(
commit
=
True
)
#first we check if package_data directory exists and make it if it doesn't
package_directory
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
'../../'
+
package_name
+
"/package_data/"
)
ensure_dir
(
package_directory
)
widgets_directory
=
os
.
path
.
join
(
package_directory
,
"widgets"
)
ensure_dir
(
widgets_directory
)
categories_directory
=
os
.
path
.
join
(
package_directory
,
"categories"
)
ensure_dir
(
categories_directory
)
self
.
stdout
.
write
(
" > Ensuring package directory for "
+
package_name
+
".
\n
"
)
categories
=
set
()
self
.
stdout
.
write
(
" > Exporting widgets
\n
"
)
global_change
=
False
for
aw
in
aws
:
aw
.
update_uid
()
add_category
(
aw
.
category
,
categories
)
serialized_widget
=
serialize_widget
(
aw
)
created
=
True
change
=
True
try
:
widget_file
=
open
(
os
.
path
.
join
(
widgets_directory
,
aw
.
uid
+
'.json'
),
'r'
)
created
=
False
w_data
=
json
.
loads
(
widget_file
.
read
())
widget_file
.
close
()
if
w_data
==
serialized_widget
:
change
=
False
except
:
created
=
True
change
=
True
if
change
:
global_change
=
True
if
created
:
self
.
stdout
.
write
(
" + Exporting widget "
+
str
(
aw
)
+
"
\n
"
)
else
:
self
.
stdout
.
write
(
" + Updating widget "
+
str
(
aw
)
+
"
\n
"
)
widget_data
=
json
.
dumps
(
serialized_widget
,
indent
=
2
)
widget_file
=
open
(
os
.
path
.
join
(
widgets_directory
,
aw
.
uid
+
'.json'
),
'w'
)
widget_file
.
write
(
widget_data
)
widget_file
.
close
()
if
not
global_change
:
self
.
stdout
.
write
(
" No changes in the widgets detected!
\n
"
)
self
.
stdout
.
write
(
" > Exporting categories
\n
"
)
global_change
=
False
for
category
in
categories
:
c
=
Category
.
objects
.
get
(
id
=
category
)
c
.
update_uid
()
data
=
serialize_category
(
c
)
created
=
True
change
=
True
try
:
category_file
=
open
(
os
.
path
.
join
(
categories_directory
,
c
.
uid
+
'.json'
),
'r'
)
created
=
False
c_data
=
json
.
loads
(
category_file
.
read
())
category_file
.
close
()
if
c_data
==
data
:
change
=
False
except
:
created
=
True
change
=
True
if
change
:
global_change
=
True
if
created
:
self
.
stdout
.
write
(
" + Exporting category "
+
str
(
c
)
+
"
\n
"
)
else
:
self
.
stdout
.
write
(
" + Updating category "
+
str
(
c
)
+
"
\n
"
)
category_data
=
json
.
dumps
(
data
,
indent
=
2
)
category_file
=
open
(
os
.
path
.
join
(
categories_directory
,
c
.
uid
+
'.json'
),
'w'
)
category_file
.
write
(
category_data
)
category_file
.
close
()
if
not
global_change
:
self
.
stdout
.
write
(
" No changes in the categories detected!
\n
"
)
self
.
stdout
.
write
(
'Thanks for using the new export command. You rock.
\n
'
)
workflows/management/commands/import.py
0 → 100755
View file @
8f470499
from
unicodedata
import
category
from
django.core.management.base
import
BaseCommand
,
CommandError
from
workflows.models
import
Category
,
AbstractWidget
,
AbstractInput
,
AbstractOutput
,
AbstractOption
from
django.core
import
serializers
from
optparse
import
make_option
import
uuid
import
os
import
sys
from
django.conf
import
settings
import
json
from
.export
import
serialize_category
,
serialize_widget
def
parsewidgetdata
(
widget_data
):
widget
=
None
inputs
=
[]
outputs
=
[]
options
=
[]
for
i
in
widget_data
:
if
i
[
'model'
]
==
'workflows.abstractwidget'
:
widget
=
i
elif
i
[
'model'
]
==
'workflows.abstractinput'
:
inputs
.
append
(
i
)
elif
i
[
'model'
]
==
'workflows.abstractoutput'
:
outputs
.
append
(
i
)
elif
i
[
'model'
]
==
'workflows.abstractoption'
:
options
.
append
(
i
)
else
:
raise
CommandError
(
"Wrong data in widget files!"
)
return
widget
,
inputs
,
outputs
,
options
class
Command
(
BaseCommand
):
args
=
'package_name'
help
=
'Imports the package "package_name".'
def
handle
(
self
,
*
args
,
**
options
):
if
(
len
(
args
)
<
1
):
raise
CommandError
(
'Argument "package_name" is required.'
)
package_name
=
args
[
0
]
if
'workflows.'
+
package_name
not
in
settings
.
INSTALLED_APPS
:
raise
CommandError
(
"Package not found in INSTALLED_APPS."
)
package_directory
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
'../../'
+
package_name
+
"/package_data/"
)
widgets_directory
=
os
.
path
.
join
(
package_directory
,
"widgets"
)
categories_directory
=
os
.
path
.
join
(
package_directory
,
"categories"
)
if
not
os
.
path
.
exists
(
package_directory
)
or
not
os
.
path
.
exists
(
widgets_directory
)
or
not
os
.
path
.
exists
(
categories_directory
):
raise
CommandError
(
"Cannot find package data. Are you sure this package has been exported already?"
)
widget_files
=
os
.
listdir
(
widgets_directory
)
category_files
=
os
.
listdir
(
categories_directory
)
self
.
stdout
.
write
(
' > Importing categories
\n
'
)
global_change
=
False
for
category_file
in
category_files
:
cfilepath
=
os
.
path
.
join
(
categories_directory
,
category_file
)
c_file
=
open
(
cfilepath
,
'r'
)
c_data
=
json
.
loads
(
c_file
.
read
())
c_file
.
close
()
uid
=
c_data
[
'fields'
][
'uid'
]
created
=
False
try
:
c
=
Category
.
objects
.
get
(
uid
=
uid
)
except
Category
.
DoesNotExist
:
created
=
True
c
=
Category
(
uid
=
uid
)
old_c_data
=
serialize_category
(
c
)
if
old_c_data
!=
c_data
:
global_change
=
True
if
created
:
self
.
stdout
.
write
(
' + Creating category '
+
str
(
c_data
[
'fields'
][
'name'
])
+
'
\n
'
)
else
:
self
.
stdout
.
write
(
' + Updating category '
+
str
(
c_data
[
'fields'
][
'name'
])
+
'
\n
'
)
for
field
in
c_data
[
'fields'
].
keys
():
if
field
!=
'parent'
:
setattr
(
c
,
field
,
c_data
[
'fields'
][
field
])
else
:
parent
=
None
if
c_data
[
'fields'
][
'parent'
]
!=
None
:
try
:
parent
=
Category
.
objects
.
get
(
uid
=
c_data
[
'fields'
][
'parent'
])
except
Category
.
DoesNotExist
:
parent
=
Category
(
uid
=
c_data
[
'fields'
][
'parent'
],
name
=
"Temporary category name"
)
parent
.
save
()
c
.
parent
=
parent
c
.
save
()
if
not
global_change
:
self
.
stdout
.
write
(
" No changes detected in the categories.
\n
"
)
global_change
=
False
self
.
stdout
.
write
(
' > Importing widgets
\n
'
)
for
widget_file
in
widget_files
:
wfilepath
=
os
.
path
.
join
(
widgets_directory
,
widget_file
)
w_file
=
open
(
wfilepath
,
'r'
)
w_data
=
json
.
loads
(
w_file
.
read
())
w_file
.
close
()
widget
,
inputs
,
outputs
,
options
=
parsewidgetdata
(
w_data
)
created
=
False
try
:
aw
=
AbstractWidget
.
objects
.
get
(
uid
=
widget
[
'fields'
][
'uid'
],
package
=
package_name
)
except
AbstractWidget
.
DoesNotExist
:
aw
=
AbstractWidget
(
uid
=
widget
[
'fields'
][
'uid'
],
package
=
package_name
)
created
=
True
if
w_data
!=
serialize_widget
(
aw
):
global_change
=
True
if
created
:
self
.
stdout
.
write
(
' + Creating widget '
+
str
(
widget
[
'fields'
][
'name'
])
+
'
\n
'
)
else
:
self
.
stdout
.
write
(
' + Updating widget '
+
str
(
widget
[
'fields'
][
'name'
])
+
'
\n
'
)
for
field
in
widget
[
'fields'
].
keys
():
if
field
!=
'category'
:
setattr
(
aw
,
field
,
widget
[
'fields'
][
field
])
else
:
aw
.
category
=
Category
.
objects
.
get
(
uid
=
widget
[
'fields'
][
'category'
])
aw
.
save
()
for
inp
in
inputs
:
try
:
i
=
AbstractInput
.
objects
.
get
(
uid
=
inp
[
'fields'
][
'uid'
])
except
AbstractInput
.
DoesNotExist
:
i
=
AbstractInput
(
uid
=
inp
[
'fields'
][
'uid'
])
for
field
in
inp
[
'fields'
].
keys
():
if
field
!=
'widget'
:
setattr
(
i
,
field
,
inp
[
'fields'
][
field
])
i
.
widget
=
aw
i
.
save
()
for
out
in
outputs
:
try
:
o
=
AbstractOutput
.
objects
.
get
(
uid
=
out
[
'fields'
][
'uid'
])
except
AbstractOutput
.
DoesNotExist
:
o
=
AbstractOutput
(
uid
=
out
[
'fields'
][
'uid'
])
for
field
in
out
[
'fields'
].
keys
():
if
field
!=
'widget'
:
setattr
(
o
,
field
,
out
[
'fields'
][
field
])
o
.
widget
=
aw
o
.
save
()
for
option
in
options
:
try
:
o
=
AbstractOption
.
objects
.
get
(
uid
=
option
[
'fields'
][
'uid'
])
except
AbstractOption
.
DoesNotExist
:
o
=
AbstractOption
(
uid
=
option
[
'fields'
][
'uid'
])
for
field
in
option
[
'fields'
].
keys
():
if
field
!=
'abstract_input'
:
setattr
(
o
,
field
,
option
[
'fields'
][
field
])
else
:
o
.
abstract_input
=
AbstractInput
.
objects
.
get
(
uid
=
option
[
'fields'
][
'abstract_input'
])
o
.
save
()
if
not
global_change
:
self
.
stdout
.
write
(
" No changes detected in the widgets.
\n
"
)
self
.
stdout
.
write
(
'Thanks for using the new import command. You rock.
\n
'
)
workflows/models.py
View file @
8f470499
...
@@ -38,6 +38,14 @@ class Category(models.Model):
...
@@ -38,6 +38,14 @@ class Category(models.Model):
uid
=
models
.
CharField
(
max_length
=
250
,
blank
=
True
,
default
=
''
)
uid
=
models
.
CharField
(
max_length
=
250
,
blank
=
True
,
default
=
''
)
def
update_uid
(
self
):
import
uuid
if
self
.
uid
==
''
or
self
.
uid
is
None
:
self
.
uid
=
uuid
.
uuid4
()
self
.
save
()
if
self
.
parent
:
self
.
parent
.
update_uid
()
class
Meta
:
class
Meta
:
verbose_name_plural
=
"categories"
verbose_name_plural
=
"categories"
ordering
=
(
'order'
,
'name'
,)
ordering
=
(
'order'
,
'name'
,)
...
@@ -429,6 +437,22 @@ class AbstractWidget(models.Model):
...
@@ -429,6 +437,22 @@ class AbstractWidget(models.Model):
if
commit
:
if
commit
:
o
.
save
()
o
.
save
()
def
update_uid
(
self
):
import
uuid
if
self
.
uid
==
''
or
self
.
uid
is
None
:
self
.
uid
=
uuid
.
uuid4
()
self
.
save
()
for
i
in
self
.
inputs
.
filter
(
uid
=
''
):
i
.
uid
=
uuid
.
uuid4
()
i
.
save
()
for
option
in
i
.
options
.
filter
(
uid
=
''
):
option
.
uid
=
uuid
.
uuid4
()
option
.
save
()
for
o
in
self
.
outputs
.
filter
(
uid
=
''
):
o
.
uid
=
uuid
.
uuid4
()
o
.
save
()
self
.
category
.
update_uid
()
def
__unicode__
(
self
):
def
__unicode__
(
self
):
return
unicode
(
self
.
name
)
return
unicode
(
self
.
name
)
...
...
workflows/nlp/db/package_data.json
View file @
8f470499
This diff is collapsed.
Click to expand it.
workflows/nlp/library.py
View file @
8f470499
...
@@ -3,12 +3,7 @@ import os.path
...
@@ -3,12 +3,7 @@ import os.path
import
base64
import
base64
from
services.webservice
import
WebService
from
services.webservice
import
WebService
from
workflows.security
import
safeOpen
from
workflows.security
import
safeOpen
import
requests
import
json
import
re
import
itertools
webservices_url
=
"http://vihar.ijs.si:8104"
def
merge_sentences
(
input_dict
):
def
merge_sentences
(
input_dict
):
"""
"""
...
@@ -52,70 +47,6 @@ def load_corpus(input_dict):
...
@@ -52,70 +47,6 @@ def load_corpus(input_dict):
response
=
ws
.
client
.
parseFile
(
fileName
=
fname
,
inFile
=
data
)
response
=
ws
.
client
.
parseFile
(
fileName
=
fname
,
inFile
=
data
)
return
{
'corpus'
:
response
[
'parsedFile'
]}
return
{
'corpus'
:
response
[
'parsedFile'
]}
def
load_corpus2
(
input_dict
):
'''
Parses an input file and encodes it in base 64.
'''
if
input_dict
[
u
"text"
]
==
""
:
f
=
safeOpen
(
input_dict
[
'file'
])
fname
=
os
.
path
.
basename
(
input_dict
[
'file'
])
data
=
base64
.
b64encode
(
f
.
read
())
else
:
fname
=
"input_string.txt"
data
=
base64
.
b64encode
(
input_dict
[
u
"text"
].
strip
())
#define web service
webservice_url
=
webservices_url
+
"/parseFile"
params
=
{
"filename"
:
fname
,
"text"
:
data
}
#set params
#call web service
resp
=
requests
.
post
(
webservice_url
,
params
=
params
)
content
=
json
.
loads
(
resp
.
content
)[
u
'parseFileResponse'
][
u
'parseFileResult'
]
if
content
[
u
"error"
]
!=
""
:
raise
Exception
(
content
[
u
"error"
])
else
:
return
{
'corpus'
:
content
[
u
"resp"
]}
def
load_tagged_corpus
(
input_dict
):
"""
Loads TEI file, which is output of totrtale
"""
f
=
safeOpen
(
input_dict
[
'file'
])
#fname = os.path.basename(input_dict['file'])
#subprocess.call(["java -jar jing.jar tei_imp.rng " + fname + " >" + "out.txt"],shell=True)
data
=
f
.
read
()
return
{
'annotations'
:
data
}
def
nlp_totrtale2
(
input_dict
):
'''
Calls the totrtale web service.
'''
corpus
=
input_dict
[
'corpus'
]
lang
=
input_dict
[
'lang'
]
xml
=
input_dict
[
'xml'
]
postprocess
=
input_dict
[
'postprocess'
]
bohoricica
=
input_dict
[
'bohoricica'
]
antique
=
input_dict
[
'antique'
]
#define web service
webservice_url
=
webservices_url
+
"/runToTrTaLe"
params
=
{
"text"
:
corpus
,
"language"
:
lang
,
"postProcessing"
:
postprocess
,
"bohoricica"
:
bohoricica
,
"antique"
:
antique
,
"outputAsXML"
:
xml
}
import
time
start
=
time
.
time
()
response
=
requests
.
post
(
webservice_url
,
params
=
params
)
content
=
json
.
loads
(
response
.
content
)
if
u
'runToTrTaLeResponse'
in
content
:
content
=
content
[
u
'runToTrTaLeResponse'
][
u
'runToTrTaLeResult'
]
else
:
content
=
content
[
u
"error"
]
end
=
time
.
time
()
print
"ToTrTale execution time was "
,
end
-
start
return
{
'annotations'
:
content
[
u
'resp'
]}
def
nlp_totrtale
(
input_dict
):
def
nlp_totrtale
(
input_dict
):
'''
'''
...
@@ -149,10 +80,6 @@ def nlp_term_extraction(input_dict):
...
@@ -149,10 +80,6 @@ def nlp_term_extraction(input_dict):
annotations
=
input_dict
[
'annotations'
]
annotations
=
input_dict
[
'annotations'
]
lang
=
input_dict
[
'lang'
]
lang
=
input_dict
[
'lang'
]
wsdl
=
input_dict
.
get
(
'wsdl'
,
'http://vihar.ijs.si:8095/totale?wsdl'
)
wsdl
=
input_dict
.
get
(
'wsdl'
,
'http://vihar.ijs.si:8095/totale?wsdl'
)
if
'<TEI xmlns="http://www.tei-c.org/ns/1.0">'
in
annotations
:
annotations
=
XMLtoTEI
(
annotations
)
ws
=
WebService
(
wsdl
,
60000
)
ws
=
WebService
(
wsdl
,
60000
)
response
=
ws
.
client
.
TermExtraction
(
corpus
=
annotations
,
lang
=
lang
,
response
=
ws
.
client
.
TermExtraction
(
corpus
=
annotations
,
lang
=
lang
,
threshold
=
0
)
threshold
=
0
)
...
@@ -166,10 +93,6 @@ def nlp_def_extraction_patterns(input_dict):
...
@@ -166,10 +93,6 @@ def nlp_def_extraction_patterns(input_dict):
annotations
=
input_dict
[
'annotations'
]
annotations
=
input_dict
[
'annotations'
]
lang
=
input_dict
[
'lang'
]
lang
=
input_dict
[
'lang'
]
wsdl
=
input_dict
.
get
(
'wsdl'
,
'http://vihar.ijs.si:8099'
)
wsdl
=
input_dict
.
get
(
'wsdl'
,
'http://vihar.ijs.si:8099'
)
if
'<TEI xmlns="http://www.tei-c.org/ns/1.0">'
in
annotations
:
annotations
=
XMLtoTEI
(
annotations
)
ws
=
WebService
(
wsdl
,
60000
)
ws
=
WebService
(
wsdl
,
60000
)
pattern
=
input_dict
[
'pattern'
]
pattern
=
input_dict
[
'pattern'
]
response
=
ws
.
client
.
GlossaryExtractionByPatterns
(
corpus
=
annotations
,
response
=
ws
.
client
.
GlossaryExtractionByPatterns
(
corpus
=
annotations
,
...
@@ -192,10 +115,6 @@ def nlp_def_extraction_terms(input_dict):
...
@@ -192,10 +115,6 @@ def nlp_def_extraction_terms(input_dict):
multiword_term
=
input_dict
[
'multiword_term'
]
multiword_term
=
input_dict
[
'multiword_term'
]
num_multiterms
=
input_dict
[
'num_multiterms'
]
num_multiterms
=
input_dict
[
'num_multiterms'
]
term_beginning
=
input_dict
[
'term_beginning'
]
term_beginning
=
input_dict
[
'term_beginning'
]
if
'<TEI xmlns="http://www.tei-c.org/ns/1.0">'
in
annotations
:
annotations
=
XMLtoTEI
(
annotations
)
ws
=
WebService
(
wsdl
,
60000
)
ws
=
WebService
(
wsdl
,
60000
)
response
=
ws
.
client
.
GlossaryExtractionByTerms
(
corpus
=
annotations
,
response
=
ws
.
client
.
GlossaryExtractionByTerms
(
corpus
=
annotations
,
candidates
=
term_candidates
,
lang
=
lang
,
nominatives
=
nominatives
,
candidates
=
term_candidates
,
lang
=
lang
,
nominatives
=
nominatives
,
...
@@ -212,36 +131,6 @@ def nlp_def_extraction_wnet(input_dict):
...
@@ -212,36 +131,6 @@ def nlp_def_extraction_wnet(input_dict):
annotations
=
input_dict
[
'annotations'
]
annotations
=
input_dict
[
'annotations'
]
lang
=
input_dict
[
'lang'
]
lang
=
input_dict
[
'lang'
]
wsdl
=
input_dict
.
get
(
'wsdl'
,
'http://vihar.ijs.si:8099'
)
wsdl
=
input_dict
.
get
(
'wsdl'
,
'http://vihar.ijs.si:8099'
)
if
'<TEI xmlns="http://www.tei-c.org/ns/1.0">'
in
annotations
:
annotations
=
XMLtoTEI
(
annotations
)
ws
=
WebService
(
wsdl
,
60000
)
ws
=
WebService
(
wsdl
,
60000
)
response
=
ws
.
client
.
GlossaryExtractionByWnet
(
corpus
=
annotations
,
lang
=
lang
)
response
=
ws
.
client
.
GlossaryExtractionByWnet
(
corpus
=
annotations
,
lang
=
lang
)
return
{
'sentences'
:
response
[
'candidates'
]}
return
{
'sentences'
:
response
[
'candidates'
]}
def
XMLtoTEI
(
text
):
mask1
=
[
"
\t
TOK
\t
"
,
"
\t
"
,
"
\t\n
"
]
pattern1
=
"<w lemma=
\"
(?P<lemma>.*?)
\"
ana=
\"
(?P<ana>.*?)
\"
>(?P<value>.*?)</w>"
pattern2
=
"<title>(.*?)</title>"
pattern3
=
"<pc>(.*?)</pc>"
newText
=
[]
for
l
in
text
.
splitlines
():
if
"<w"
in
l
:
match
=
[
m
.
group
(
"value"
,
"lemma"
,
"ana"
)
for
m
in
re
.
finditer
(
pattern1
,
l
)][
0
]
newText
.
append
(
''
.
join
(
itertools
.
chain
.
from_iterable
(
zip
(
match
,
mask1
))).
decode
(
"utf8"
))
elif
"</s>"
in
l
:
newText
.
append
(
"
\t\t
<S/>
\t\n
"
)
elif
"<pc>"
in
l
:
value
=
re
.
findall
(
pattern3
,
l
)[
0
]
if
value
==
"."
:
newText
.
append
(
value
+
"
\t\t
PUN_TERM
\t\n
"
)
else
:
newText
.
append
(
value
+
"
\t\t
PUN
\t\n
"
)
elif
"<title>"
in
l
:
title
=
re
.
findall
(
pattern2
,
l
)[
0
]
newText
.
append
(
"<TEXT title="
+
title
+
">
\t\n
"
)
elif
"</body>"
in
l
:
newText
.
append
(
"</TEXT>
\t\n
"
)
return
""
.
join
(
newText
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment