Commit ae3ebb71 authored by matjaz's avatar matjaz
Browse files

Major upgrade of lemmagen - first fully functional & approx. stable version of lemmagen package.

parent 059260f8
This diff is collapsed.
......@@ -5,6 +5,62 @@
from import_dotnet import *
from serialization_utils import *
def lemmagen_load_example_list_from_string(inputDict):
_tabDelim = ToString(inputDict['tabDelim'])
_format = ToString(inputDict['format'])
execResult = LemmaSharpIntf.LoadExampleListFromString(_tabDelim, _format)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['exampleList'] = execResultPy
return outputDict
def lemmagen_load_example_list_from_table(inputDict):
_table = ToNetObj(inputDict['table'])
_format = ToString(inputDict['format'])
execResult = LemmaSharpIntf.LoadExampleListFromTable(_table, _format)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['exampleList'] = execResultPy
return outputDict
def lemmagen_example_list_to_table(inputDict):
_exampleList = ToNetObj(inputDict['exampleList'])
execResult = LemmaSharpIntf.ExampleListToTable(_exampleList)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['table'] = execResultPy
return outputDict
def lemmagen_group_examples(inputDict):
_exampleList = ToNetObj(inputDict['exampleList'])
_ignoreFrequencies = ToBool(inputDict['ignoreFrequencies'])
_msdConsider = ToEnum(LemmaSharp.LemmatizerSettings.MsdConsideration, inputDict['msdConsider'], LemmaSharp.LemmatizerSettings.MsdConsideration.Distinct)
execResult = LemmaSharpIntf.GroupExamples(_exampleList, _ignoreFrequencies, _msdConsider)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['exampleList'] = execResultPy
return outputDict
def lemmagen_construct_lemmatizer_settings(inputDict):
_useFromInRules = ToBool(inputDict['useFromInRules'])
_msdConsider = ToEnum(LemmaSharp.LemmatizerSettings.MsdConsideration, inputDict['msdConsider'], LemmaSharp.LemmatizerSettings.MsdConsideration.Distinct)
_maxRulesPerNode = ToInt(inputDict['maxRulesPerNode'])
_buildFrontLemmatizer = ToBool(inputDict['buildFrontLemmatizer'])
_storeAllFullKnownWords = ToBool(inputDict['storeAllFullKnownWords'])
execResult = LemmaSharpIntf.ConstructLemmatizerSettings(_useFromInRules, _msdConsider, _maxRulesPerNode, _buildFrontLemmatizer, _storeAllFullKnownWords)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['lemmatizerSettings'] = execResultPy
return outputDict
def lemmagen_extract_lemmatizer_settings(inputDict):
_lmtz = ToNetObj(inputDict['lmtz'])
execResult = LemmaSharpIntf.ExtractLemmatizerSettings(_lmtz)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['lemmatizerSettings'] = execResultPy
return outputDict
def lemmagen_construct_prebuild_lemmatizer(inputDict):
_language = ToEnum(LemmaSharp.LanguagePrebuilt, inputDict['language'], LemmaSharp.LanguagePrebuilt.English)
execResult = LemmaSharpIntf.ConstructPrebuildLemmatizer(_language)
......@@ -14,27 +70,142 @@ def lemmagen_construct_prebuild_lemmatizer(inputDict):
return outputDict
def lemmagen_construct_lemmatizer(inputDict):
_useFromInRules = ToBool(inputDict['useFromInRules'])
_msdConsider = ToEnum(LemmaSharp.LemmatizerSettings+MsdConsideration, inputDict['msdConsider'], LemmaSharp.LemmatizerSettings+MsdConsideration.Distinct)
_maxRulesPerNode = ToInt(inputDict['maxRulesPerNode'])
_buildFrontLemmatizer = ToBool(inputDict['buildFrontLemmatizer'])
_storeAllFullKnownWords = ToBool(inputDict['storeAllFullKnownWords'])
_useMsdSplitTreeOptimization = ToBool(inputDict['useMsdSplitTreeOptimization'])
_msdSpec = ToString(inputDict['msdSpec'])
_lemmatizerSettings = ToNetObj(inputDict['lemmatizerSettings'])
_exampleList = ToNetObj(inputDict['exampleList'])
execResult = LemmaSharpIntf.ConstructLemmatizer(_useFromInRules, _msdConsider, _maxRulesPerNode, _buildFrontLemmatizer, _storeAllFullKnownWords, _useMsdSplitTreeOptimization, _msdSpec, _exampleList)
execResult = LemmaSharpIntf.ConstructLemmatizer(_lemmatizerSettings, _exampleList)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['lemmatizer'] = execResultPy
return outputDict
def lemmagen_display_lemmatization_rules(inputDict):
_lmtz = ToNetObj(inputDict['lmtz'])
execResult = LemmaSharpIntf.DisplayLemmatizationRules(_lmtz)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['lmtzTree'] = execResultPy
return outputDict
def lemmagen_lemmatize_words(inputDict):
_lemmatizer = ToNetObj(inputDict['lemmatizer'])
_words = ToNetObj(inputDict['words'])
_leaveWord = ToBool(inputDict['leaveWord'])
execResult = LemmaSharpIntf.LemmatizeWords(_lemmatizer, _words, _leaveWord)
_ignoreCase = ToBool(inputDict['ignoreCase'])
_msd = ToString(inputDict['msd'])
execResult = LemmaSharpIntf.LemmatizeWords(_lemmatizer, _words, _leaveWord, _ignoreCase, _msd)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['lemmas'] = execResultPy
return outputDict
def lemmagen_lemmatize_explain_words(inputDict):
_lemmatizer = ToNetObj(inputDict['lemmatizer'])
_words = ToNetObj(inputDict['words'])
_ignoreCase = ToBool(inputDict['ignoreCase'])
_msd = ToString(inputDict['msd'])
execResult = LemmaSharpIntf.LemmatizeExplainWords(_lemmatizer, _words, _ignoreCase, _msd)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['explanations'] = execResultPy
return outputDict
def lemmagen_delimited_file2_table(inputDict):
_file = ToString(inputDict['file'])
_delimiter = ToString(inputDict['delimiter'])
_firstLineIsHeader = ToBool(inputDict['firstLineIsHeader'])
_headerLine = ToString(inputDict['headerLine'])
_skipEmptyLines = ToBool(inputDict['skipEmptyLines'])
execResult = LemmaSharpIntf.DelimitedFile2Table(_file, _delimiter, _firstLineIsHeader, _headerLine, _skipEmptyLines)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['table'] = execResultPy
return outputDict
def lemmagen_delimited_string2_table(inputDict):
_examplesText = ToString(inputDict['examplesText'])
_delimiter = ToString(inputDict['delimiter'])
_firstLineIsHeader = ToBool(inputDict['firstLineIsHeader'])
_headerLine = ToString(inputDict['headerLine'])
_skipEmptyLines = ToBool(inputDict['skipEmptyLines'])
execResult = LemmaSharpIntf.DelimitedString2Table(_examplesText, _delimiter, _firstLineIsHeader, _headerLine, _skipEmptyLines)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['table'] = execResultPy
return outputDict
def lemmagen_filter_table_rows(inputDict):
_table = ToNetObj(inputDict['table'])
_indexList = ToNetObj(inputDict['indexList'])
_discardFilteredOut = ToBool(inputDict['discardFilteredOut'])
execResult = LemmaSharpIntf.FilterTableRows(_table, _indexList, _discardFilteredOut)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['tableSelected'] = execResultPy['tableSelected']
outputDict['tableFiltered'] = execResultPy['tableFiltered']
return outputDict
def lemmagen_extract_column_as_list(inputDict):
_table = ToNetObj(inputDict['table'])
_columnIndex = ToInt(inputDict['columnIndex'])
execResult = LemmaSharpIntf.ExtractColumnAsList(_table, _columnIndex)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['columnValues'] = execResultPy['columnValues']
outputDict['columnName'] = execResultPy['columnName']
return outputDict
def lemmagen_insert_list_as_column(inputDict):
_table = ToNetObj(inputDict['table'])
_columnIndex = ToInt(inputDict['columnIndex'])
_columnValues = ToNetObj(inputDict['columnValues'])
_columnName = ToString(inputDict['columnName'])
execResult = LemmaSharpIntf.InsertListAsColumn(_table, _columnIndex, _columnValues, _columnName)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['table'] = execResultPy
return outputDict
def lemmagen_table2_string_delimited(inputDict):
_table = ToNetObj(inputDict['table'])
_delimiter = ToString(inputDict['delimiter'])
_outputHeader = ToBool(inputDict['outputHeader'])
execResult = LemmaSharpIntf.Table2StringDelimited(_table, _delimiter, _outputHeader)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['string'] = execResultPy
return outputDict
def lemmagen_random_cross_validation_sets(inputDict):
_numOfSets = ToInt(inputDict['numOfSets'])
_numOfExamples = ToInt(inputDict['numOfExamples'])
_random = ToBool(inputDict['random'])
_useSeed = ToBool(inputDict['useSeed'])
_randomSeed = ToInt(inputDict['randomSeed'])
_examples = ToNetObj(inputDict['examples'])
execResult = LemmaSharpIntf.RandomCrossValidationSets(_numOfSets, _numOfExamples, _random, _useSeed, _randomSeed, _examples)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['exampleSetId'] = execResultPy
return outputDict
def lemmagen_get_set_indexes(inputDict):
_selectedSetId = ToInt(inputDict['selectedSetId'])
_setList = ToNetObj(inputDict['setList'])
_opposite = ToBool(inputDict['opposite'])
execResult = LemmaSharpIntf.GetSetIndexes(_selectedSetId, _setList, _opposite)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['setIndexes'] = execResultPy
return outputDict
def lemmagen_filter_list_elements(inputDict):
_list = ToNetObj(inputDict['list'])
_indexList = ToNetObj(inputDict['indexList'])
_discardFilteredOut = ToBool(inputDict['discardFilteredOut'])
execResult = LemmaSharpIntf.FilterListElements(_list, _indexList, _discardFilteredOut)
execResultPy = ToPyObj(execResult)
outputDict = {}
outputDict['listSelected'] = execResultPy['listSelected']
outputDict['listFiltered'] = execResultPy['listFiltered']
return outputDict
......@@ -3,30 +3,6 @@ import time
import logging
logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
#------------------------------supposed----------------------------------------
# Generic interfaces for pickling .net or standard objects
#------------------------------------------------------------------------------
#check if .net object and wrap it accordingly
def GetBaseOrLSO(obj):
#print "----------------------------------------------------------------------------------"
#print hasattr(obj, "GetType")
if (hasattr(obj, "GetType")):
#print System.Object().GetType().IsAssignableFrom(obj.GetType())
#print "----------------------------------------------------------------------------------"
if System.Object().GetType().IsAssignableFrom(obj.GetType()):
try:
srlz = Latino.ISerializable(obj)
return LSO(srlz)
except:
return obj
else:
return obj
else:
#print "----------------------------------------------------------------------------------"
return obj
#------------------------------supposed------------------------------------------------
# Generic interfaces for pickling .net objects
#------------------------------------------------------------------------------
......@@ -58,21 +34,21 @@ class SerializableObject:
class LatinoSerializableObject(SerializableObject):
def __getstate__(self):
logging.info('Serialize {0} with latino serializer (start)'.format(self.netObj.__class__.__name__))
#logging.info('Serialize {0} with latino serializer (start)'.format(self.netObj.__class__.__name__))
start = time.clock()
#for interface in self.netObj.GetType().GetInterfaces():
# print(interface)
byteData = LemmaSharpPy.Save(self.netObj)
elapsed = (time.clock() - start)
logging.info('Serialize {0} with latino serializer (end, size: {1:,} chars) in {2} ms'.format(self.netObj.__class__.__name__, len(byteData),elapsed))
#logging.info('Serialize {0} with latino serializer (end, size: {1:,} chars) in {2} ms'.format(self.netObj.__class__.__name__, len(byteData),elapsed))
return {'byteData': byteData}
def __setstate__(self, dict):
logging.info('Deserialize {0} with latino deserializer (start)'.format("<LatinoObject>"))
#logging.info('Deserialize {0} with latino deserializer (start)'.format("<LatinoObject>"))
start = time.clock()
self.netObj = LemmaSharpPy.Load(dict['byteData'])
self.copyAttributes()
elapsed = (time.clock() - start)
logging.info('Deserialize {0} with latino deserializer (end, size: {1:,} chars) in {2} ms'.format(self.netObj.__class__.__name__, len(dict['byteData']),elapsed))
#logging.info('Deserialize {0} with latino deserializer (end, size: {1:,} chars) in {2} ms'.format(self.netObj.__class__.__name__, len(dict['byteData']),elapsed))
def __repr__(self):
return "<LSO: " + self.netObj.__str__() + ">"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment