Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
clowdflows
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Service Desk
Milestones
Merge Requests
0
Merge Requests
0
Operations
Operations
Incidents
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Commits
Issue Boards
Open sidebar
Alain Shakour
clowdflows
Commits
3cbea1e5
Commit
3cbea1e5
authored
Jul 16, 2013
by
vpodpecan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
new functions and data for bio3graph
parent
b7de1379
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
555 additions
and
0 deletions
+555
-0
workflows/bio3graph/GPSDB_synonyms.py
workflows/bio3graph/GPSDB_synonyms.py
+160
-0
workflows/bio3graph/NCBI.py
workflows/bio3graph/NCBI.py
+221
-0
workflows/bio3graph/data/OA_dict.pickle
workflows/bio3graph/data/OA_dict.pickle
+0
-0
workflows/bio3graph/data/entrez2symbol.pickle
workflows/bio3graph/data/entrez2symbol.pickle
+0
-0
workflows/bio3graph/data/gene_synonyms_cache
workflows/bio3graph/data/gene_synonyms_cache
+0
-0
workflows/bio3graph/data/get_gene_synonyms_NCBI.py
workflows/bio3graph/data/get_gene_synonyms_NCBI.py
+152
-0
workflows/bio3graph/data/make_OA_id_dict.py
workflows/bio3graph/data/make_OA_id_dict.py
+22
-0
No files found.
workflows/bio3graph/GPSDB_synonyms.py
0 → 100644
View file @
3cbea1e5
import
urllib
import
urllib2
import
csv
import
cStringIO
import
os
import
cPickle
import
time
from
os.path
import
join
,
normpath
,
dirname
# http://gpsdb.expasy.org/cgi-bin/gpsdb/show?name=interleukin%208&model=Homo%20sapiens&type=gene&format=txt
# gpsdb.expasy.org/cgi-bin/gpsdb/show?name=indy&format=txt
#http://gpsdb.expasy.org/cgi-bin/gpsdb/show?model=Homo+sapiens&type=gene&name=interleukin+8&format=txt
GPSDB_URL
=
'http://gpsdb.expasy.org/cgi-bin/gpsdb/show?'
NAME
=
'name'
MODEL
=
'model'
TYPE
=
'type'
FORMAT
=
'format'
MODEL_HUMAN
=
'Homo sapiens'
TYPE_GENE
=
'gene'
TXT_FORMAT
=
'txt'
#entrez2symbol = cPickle.load(open('mappings/e2symb'))
#entrez2synonyms = cPickle.load(open('mappings/e2syns'))
class
Synonym_extractor
(
object
):
qwait
=
0.5
cache_fname
=
'data/gene_synonyms_cache'
def
__init__
(
self
):
self
.
last_qdate
=
0
try
:
self
.
_cache
=
cPickle
.
load
(
open
(
normpath
(
join
(
dirname
(
__file__
),
self
.
cache_fname
))))
except
Exception
:
self
.
_cache
=
{}
#end
def
_flush_cache
(
self
):
#fp = open(self.cache_fname, 'wb')
fp
=
open
(
normpath
(
join
(
dirname
(
__file__
),
self
.
cache_fname
)),
'wb'
)
cPickle
.
dump
(
self
.
_cache
,
fp
)
fp
.
close
()
#end
def
__del__
(
self
):
self
.
_flush_cache
()
def
wait
(
self
):
# 2 requests per second (for now)
td
=
time
.
time
()
-
self
.
last_qdate
if
td
<
self
.
qwait
:
print
'sleeping for %.2f seconds'
%
(
self
.
qwait
-
td
)
time
.
sleep
(
self
.
qwait
-
td
)
#end
def
make_query_url
(
self
,
geneSymbol
):
params
=
{
NAME
:
geneSymbol
,
MODEL
:
MODEL_HUMAN
,
TYPE
:
TYPE_GENE
,
FORMAT
:
TXT_FORMAT
}
qstring
=
GPSDB_URL
+
urllib
.
urlencode
(
params
)
return
qstring
#end
def
get_gene_synonyms
(
self
,
geneSymbol
):
if
geneSymbol
in
self
.
_cache
:
return
self
.
_cache
[
geneSymbol
]
self
.
wait
()
qstring
=
self
.
make_query_url
(
geneSymbol
)
try
:
fs
=
cStringIO
.
StringIO
(
urllib2
.
urlopen
(
urllib2
.
Request
(
qstring
)).
read
())
#print fs.getvalue()
table
=
csv
.
reader
(
fs
,
delimiter
=
'
\t
'
)
except
Exception
,
e
:
print
e
.
message
return
[]
synonyms
=
[
row
[
0
]
for
row
in
table
]
synonyms
=
[
x
.
lower
()
for
x
in
synonyms
]
synonyms
=
list
(
set
(
synonyms
))
self
.
_cache
.
update
({
geneSymbol
:
synonyms
})
return
synonyms
#end
def
get_geneset_synonyms
(
self
,
genes
):
assert
(
isinstance
(
genes
,
list
))
result
=
{}
for
gene
in
genes
:
result
[
gene
]
=
self
.
get_gene_synonyms
(
gene
)
return
result
#end
#end class
#res['A']['gsea', 'fisher', 'all', 'page'][1, 2, 3,...]['topGenes', 'terms', 'allGenes', 'scores']
# ['z_score', 'page_p', 'gsea_p', 'enrichment_score', 'unadjusted_p', 'fisher_p', 'aggregate_p']
# def get_synonyms_for_results(resultsFile):
# assert(os.path.isfile(resultsFile))
#
# res = cPickle.load(open(resultsFile))
# geneSets = {}
# for ruleNum in res['A']['all'].keys():
# rule = res['A']['all'][ruleNum]
# geneSets[ruleNum] = rule['topGenes']
#
#
# synonyms = {}
# se = Synonym_extractor()
# for ruleNum in geneSets.keys():
# synonyms[ruleNum] = se.get_geneset_synonyms(geneSets[ruleNum])
#
# fp = open('result_genes2syns.pickle', 'wb')
# cPickle.dump(synonyms, fp, cPickle.HIGHEST_PROTOCOL)
# fp.close()
# print 'written rule %d, genes: %d' % (ruleNum, len(synonyms[ruleNum]))
# return synonyms
# #end
#
#
# def synMapping():
# res_syns = cPickle.load(open('data/result_genes2syns.pickle'))
# syn_map = {}
# for ruleNum in res_syns.keys():
# for egid in res_syns[ruleNum].keys():
# if egid not in syn_map:
# syn_map[egid] = res_syns[ruleNum][egid]
# return syn_map
#
# #a = synMapping()
# #fp = open('data/gene2syn', 'wb')
# #cPickle.dump(a, fp, cPickle.HIGHEST_PROTOCOL)
# #fp.close()
#e2syns = cPickle.load(open('mappings/e2syns'))
#result_syns = cPickle.load(open('data/result_genes2syns.pickle'))
#for ruleNum in result_syns.keys():
#for egid in result_syns[ruleNum].keys():
#gpsdb = result_syns[ruleNum][egid]
#ncbi = list(set(e2syns[egid]))
#joined = list(set(gpsdb).union(set(ncbi)))
#diff = set(ncbi).difference(gpsdb)
##print ncbi
#if diff:
#print diff
#print 'gpsdb: %d, ncbi: %d, total: %d' %(len(gpsdb), len(ncbi), len(joined))
############
# a = Synonym_extractor()
# s = a.get_geneset_synonyms(['lgals3', 'sfpq', 'ddx39b', 'srsf11', 'cir1', 'luc7l3', 'prpf39', 'hnrnph1', 'lgals3', 'sfpq', 'ddx39b'])
##a = get_synonyms_for_results('data/ALL-FINAL.pickle')
workflows/bio3graph/NCBI.py
0 → 100644
View file @
3cbea1e5
import
urllib2
import
urllib
import
os
from
xml.etree
import
cElementTree
as
tree
import
time
import
pickle
import
re
import
xml.dom.minidom
as
dom
import
cPickle
import
csv
import
codecs
import
unidecode
class
Document
(
object
):
def
__init__
(
self
):
self
.
docid
=
None
self
.
year
=
None
self
.
title
=
None
self
.
abstract
=
None
self
.
body
=
None
#self.text = None
self
.
xml
=
None
#end
def
write_content_text
(
self
,
outdir
,
utf
=
True
):
assert
(
os
.
path
.
isdir
(
outdir
))
if
utf
:
fp
=
codecs
.
open
(
os
.
path
.
join
(
outdir
,
self
.
docid
+
'.txt'
),
'w'
,
encoding
=
'utf-8'
)
fp
.
write
(
self
.
title
+
'
\n
'
+
self
.
abstract
+
'
\n
'
+
self
.
body
)
else
:
fp
=
open
(
os
.
path
.
join
(
outdir
,
self
.
docid
+
'.txt'
),
'w'
)
fp
.
write
(
unidecode
.
unidecode
(
self
.
title
)
+
'
\n
'
+
unidecode
.
unidecode
(
self
.
abstract
)
+
'
\n
'
+
unidecode
.
unidecode
(
self
.
body
))
fp
.
close
()
#end
class
NCBI_Extractor
(
object
):
#qwait = 0.33
qwait
=
1.0
maxdoc
=
1000
searchURL
=
'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi'
fetchURL
=
'http://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi'
pmcURL
=
'http://www.ncbi.nlm.nih.gov/pmc/articles/PMC3315246/'
def
__init__
(
self
):
self
.
last_qdate
=
0
#end
def
dispatchRequest
(
self
,
q
):
# obey NCBI limitations (3 requests per second)
td
=
time
.
time
()
-
self
.
last_qdate
if
td
<
self
.
qwait
:
print
'sleeping for %.2f seconds'
%
(
self
.
qwait
-
td
)
time
.
sleep
(
self
.
qwait
-
td
)
self
.
last_qdate
=
time
.
time
()
return
urllib2
.
urlopen
(
urllib2
.
Request
(
q
)).
read
()
#end
def
getIDs
(
self
,
queryURL
,
maxHits
=
0
):
ids
=
[]
cnt
=
1
# first batch of results
result
=
self
.
dispatchRequest
(
queryURL
)
t
=
tree
.
fromstring
(
result
)
ids
.
extend
([
x
.
text
for
x
in
t
.
find
(
'IdList'
).
findall
(
'Id'
)])
hits
=
int
(
t
.
find
(
'Count'
).
text
)
print
'Total hits: '
,
hits
print
'batch: %d, got: %d'
%
(
cnt
,
len
(
ids
))
# if we have enough already
if
maxHits
>
0
and
(
len
(
ids
)
>
maxHits
or
maxHits
>
hits
):
return
ids
[:
maxHits
]
# if there are more, get them also with retstart option
while
len
(
ids
)
<
hits
:
nq
=
queryURL
+
'&retstart=%d&retmax=%d'
%
(
len
(
ids
),
self
.
maxdoc
)
result
=
self
.
dispatchRequest
(
nq
)
t
=
tree
.
fromstring
(
result
)
ids
.
extend
([
x
.
text
for
x
in
t
.
find
(
'IdList'
).
findall
(
'Id'
)])
cnt
+=
1
print
'batch: %d, total: %d'
%
(
cnt
,
len
(
ids
))
if
maxHits
and
len
(
ids
)
>=
maxHits
:
break
#end
if
maxHits
:
return
ids
[:
maxHits
]
else
:
return
ids
#end
def
query
(
self
,
queryText
,
db
=
'pmc'
,
maxHits
=
0
):
if
not
queryText
:
raise
ValueError
(
'Empty query!'
)
query
=
[(
'db'
,
db
),
(
'term'
,
queryText
)]
query
.
append
((
'retmax'
,
self
.
maxdoc
))
query
=
'%s?%s'
%
(
self
.
searchURL
,
urllib
.
urlencode
(
query
))
ids
=
self
.
getIDs
(
query
,
maxHits
=
maxHits
)
return
ids
#end
def
getDocument
(
self
,
did
,
db
=
'pmc'
):
xml
=
self
.
getXML
(
did
,
db
)
root
=
dom
.
parseString
(
xml
)
doc
=
self
.
extractArticleText
(
root
,
did
)
doc
.
docid
=
did
doc
.
xml
=
xml
return
doc
#end
def
getXML
(
self
,
did
,
db
=
'pmc'
):
query
=
[(
'db'
,
db
),
(
'id'
,
did
)]
url
=
'%s?%s'
%
(
self
.
fetchURL
,
urllib
.
urlencode
(
query
))
xml
=
self
.
dispatchRequest
(
url
)
return
xml
#end
def
getFulltext
(
self
,
did
):
xml
=
self
.
getXML
(
did
)
root
=
dom
.
parseString
(
xml
)
doc
=
self
.
extractArticleText
(
root
,
did
)
return
doc
def
getDocumentFromXMLfile
(
self
,
fname
,
did
=
None
):
#xml = codecs.open(fname, encoding='utf-8').read()
if
not
did
:
did
=
os
.
path
.
splitext
(
os
.
path
.
split
(
fname
)[
1
])[
0
]
xml
=
open
(
fname
).
read
()
root
=
dom
.
parseString
(
xml
)
doc
=
self
.
extractArticleText
(
root
,
did
)
doc
.
docid
=
did
doc
.
xml
=
xml
return
doc
#end
def
extractArticleText
(
self
,
root
,
did
):
try
:
titleNode
=
root
.
getElementsByTagName
(
'article-title'
)[
0
]
except
Exception
:
title
=
''
print
'Warning: no title found, document %s'
%
str
(
did
)
else
:
title
=
self
.
list2text
(
self
.
recursiveCollect
(
titleNode
,
[]))
try
:
abstractNode
=
root
.
getElementsByTagName
(
'abstract'
)[
0
]
except
Exception
:
abstract
=
''
print
'Warning: no abstract found, document %s'
%
str
(
did
)
else
:
abstract
=
self
.
list2text
(
self
.
recursiveCollect
(
abstractNode
,
[]))
abstract
=
re
.
sub
(
'(\[)[ ,-:;]*(\])'
,
''
,
abstract
)
# remove what remains of citations
try
:
bodyNode
=
root
.
getElementsByTagName
(
'body'
)[
0
]
except
Exception
:
body
=
''
print
'Warning: no body found, document %s'
%
str
(
did
)
else
:
body
=
self
.
list2text
(
self
.
recursiveCollect
(
bodyNode
,
[]))
body
=
re
.
sub
(
'(\[)[ ,-:;]*(\])'
,
''
,
body
)
ytags
=
root
.
getElementsByTagName
(
'pub-date'
)
years
=
[]
for
x
in
ytags
:
y
=
x
.
getElementsByTagName
(
'year'
)
if
y
:
years
.
append
(
int
(
y
[
0
].
childNodes
[
0
].
data
))
year
=
min
(
years
)
new
=
Document
()
new
.
year
=
year
new
.
title
=
title
new
.
abstract
=
abstract
new
.
body
=
body
#new.text = abstract + ' ' + body
return
new
#end
#
def
recursiveCollect
(
self
,
node
,
result
,
skipTags
=
[
'title'
,
'xref'
,
'table'
,
'graphic'
,
'ext-link'
,
'media'
,
'inline-formula'
,
'disp-formula'
]):
for
child
in
node
.
childNodes
:
if
child
.
nodeType
==
dom
.
Node
.
ELEMENT_NODE
:
if
child
.
tagName
not
in
skipTags
:
self
.
recursiveCollect
(
child
,
result
)
elif
child
.
nodeType
==
dom
.
Node
.
TEXT_NODE
:
result
.
append
(
child
.
data
)
#endfor
return
result
#end
def
list2text
(
self
,
lst
):
result
=
''
for
x
in
lst
:
result
+=
x
.
strip
()
+
' '
return
result
.
strip
()
#end
#end
########################################
### ALL LEUKEMIA SEARCH
#a = NCBI_Extractor()
##d = a.getDocument(2792210)
#ids = a.query('("t-lymphocytes"[MeSH Terms] OR "t-lymphocytes"[All Fields] OR "t cell"[All Fields] OR "t-cell"[All Fields]) OR ("leukaemia"[All Fields] OR "leukemia"[MeSH Terms] OR "leukemia"[All Fields])',
# maxHits=1001)
#ids = a.query('leukemia', maxHits=10)
#fp = open('ALL-ids.pickle', 'wb')
#cPickle.dump(ids, fp, cPickle.HIGHEST_PROTOCOL)
#fp.close()
########################################
\ No newline at end of file
workflows/bio3graph/data/OA_dict.pickle
0 → 100644
View file @
3cbea1e5
File added
workflows/bio3graph/data/entrez2symbol.pickle
0 → 100644
View file @
3cbea1e5
File added
workflows/bio3graph/data/gene_synonyms_cache
0 → 100644
View file @
3cbea1e5
workflows/bio3graph/data/get_gene_synonyms_NCBI.py
0 → 100644
View file @
3cbea1e5
import
cPickle
import
pickle
import
cStringIO
import
StringIO
import
gzip
import
urllib
# NCBI data ftp.
NCBI_URL
=
'http://mirrors.vbi.vt.edu/mirrors/ftp.ncbi.nih.gov/gene/DATA/'
NCBI_ORGANISM_GENE_DATA
=
'GENE_INFO/Mammalia/'
# for gene_info file
FORMAT_LINE_START
=
'#Format:'
ENTREZ_ID
=
'GeneID'
GENE_SYMBOL
=
'Symbol'
SYNONYMS
=
'Synonyms'
NEWENTRY
=
'NEWENTRY'
ENTREZ_ID_COLNO
=
1
GENE_SYMBOL_COLNO
=
2
SYNONYMS_COLNO
=
4
# for gene2accession file
tax_id
=
0
GeneID
=
1
status
=
2
RNA_nucleotide_accession
=
3
RNA_nucleotide_gi
=
4
protein_accession
=
5
protein_gi
=
6
genomic_nucleotide_accession
=
7
EMPTY
=
'-'
class
Mappings
(
object
):
def
__init__
(
self
,
entrez2symbol
=
{},
symbol2entrez
=
{},
entrez2synonyms
=
{},
synonyms2entrez
=
{},
errors
=
[]):
self
.
entrez2symbol
=
entrez2symbol
self
.
symbol2entrez
=
symbol2entrez
self
.
entrez2synonyms
=
entrez2synonyms
self
.
synonyms2entrez
=
synonyms2entrez
#end
#end class
def
getMapping
():
entrez2symbol
=
{}
symbol2entrez
=
{}
entrez2synonyms
=
{}
synonyms2entrez
=
{}
for
fname
in
[
'Homo_sapiens.gene_info.gz'
]:
#, 'Mus_musculus.gene_info.gz', 'Rattus_norvegicus.gene_info.gz']:
#for fname in ['gene_info_small']:
#fp = open(fname, 'r')
# Download gzip file.
web_fp
=
urllib
.
urlopen
(
'%s%s%s'
%
(
NCBI_URL
,
NCBI_ORGANISM_GENE_DATA
,
fname
))
fp
=
gzip
.
GzipFile
(
fileobj
=
StringIO
.
StringIO
(
web_fp
.
read
()))
print
'Reading file "%s"'
%
fname
ln
=
0
while
True
:
ln
+=
1
if
ln
%
100000
==
0
:
print
'at line %d'
%
ln
line
=
fp
.
readline
().
strip
()
if
not
line
:
#EOF
break
elts
=
line
.
split
(
'
\t
'
)
if
elts
[
0
].
startswith
(
'#'
):
continue
## if there is header, check it
#if elts[0] == FORMAT_LINE_START:
#if len(elts) < 3 or elts[ENTREZ_ID_COLNO+1] != ENTREZ_ID or elts[GENE_SYMBOL_COLNO+1] != GENE_SYMBOL or \
#elts[SYNONYMS_COLNO+1] != SYNONYMS:
#raise TypeError('Input file is not an Entrez gene info file.')
#continue
entid
=
int
(
elts
[
ENTREZ_ID_COLNO
])
if
entid
in
entrez2symbol
or
entid
in
entrez2synonyms
:
print
'duplicate GeneID (Entrez ID): %d'
%
entid
continue
# get gene symbol
symbol
=
elts
[
GENE_SYMBOL_COLNO
].
lower
()
if
symbol
==
NEWENTRY
:
continue
# we ignore symbols which are not unique
if
symbol
in
symbol2entrez
:
print
'Not unique: '
,
symbol
,
entid
eid
=
symbol2entrez
[
symbol
]
del
entrez2symbol
[
eid
]
del
symbol2entrez
[
symbol
]
continue
entrez2symbol
[
entid
]
=
symbol
symbol2entrez
[
symbol
]
=
entid
# get gene synonyms
syns
=
[
s
.
strip
().
lower
()
for
s
in
elts
[
SYNONYMS_COLNO
].
split
(
'|'
)]
syns
=
[
s
for
s
in
syns
if
s
!=
EMPTY
]
# we ignore synonyns which are not unique
entrez2synonyms
[
entid
]
=
[]
for
s
in
syns
:
if
s
in
synonyms2entrez
:
e
=
synonyms2entrez
[
s
]
entrez2synonyms
[
e
].
remove
(
s
)
del
synonyms2entrez
[
s
]
else
:
synonyms2entrez
[
s
]
=
entid
entrez2synonyms
[
entid
].
append
(
s
)
#end
#end while
fp
.
close
()
print
'Done.'
#end for
print
len
(
synonyms2entrez
),
len
(
entrez2synonyms
)
return
Mappings
(
entrez2symbol
=
entrez2symbol
,
symbol2entrez
=
symbol2entrez
,
entrez2synonyms
=
entrez2synonyms
,
\
synonyms2entrez
=
synonyms2entrez
)
#end
def
mappingModuleAll
(
a
):
#a = cPickle.load(open('mappingsHMR.pickle'))
assert
(
isinstance
(
a
,
Mappings
))
ofp
=
open
(
'mappings/mappingsFull.py'
,
'w'
)
ofp
.
write
(
'from pickle import load
\n
'
)
ofp
.
write
(
'from cStringIO import StringIO
\n\n
'
)
for
attr
in
[
'symbol2entrez'
,
'synonyms2entrez'
,
'entrez2symbol'
,
'entrez2synonyms'
]:
outS
=
cStringIO
.
StringIO
()
d
=
getattr
(
a
,
attr
)
pickle
.
dump
(
d
,
outS
)
outS
.
flush
()
#ofp.write("%s = load(StringIO('''%s'''))\n" % (attr, b64encode(outS.getvalue())))
ofp
.
write
(
"%s = load(StringIO('''%s'''))
\n
"
%
(
attr
,
outS
.
getvalue
()))
ofp
.
close
()
#end
mapp
=
getMapping
()
fp
=
open
(
'entrez2symbol.pickle'
,
'wb'
)
cPickle
.
dump
(
mapp
.
entrez2symbol
,
fp
,
cPickle
.
HIGHEST_PROTOCOL
)
workflows/bio3graph/data/make_OA_id_dict.py
0 → 100644
View file @
3cbea1e5
import
urllib2
import
cPickle
from
os.path
import
join
,
abspath
,
normpath
,
dirname
FILE_LIST_URL
=
'ftp://ftp.ncbi.nlm.nih.gov/pub/pmc/file_list.txt'
dname
=
dirname
(
__file__
)
mapping
=
{}
fp
=
urllib2
.
urlopen
(
FILE_LIST_URL
)
lines
=
fp
.
readlines
()
for
line
in
lines
:
elts
=
line
.
split
(
'
\t
'
)
if
len
(
elts
)
!=
3
:
continue
pmid
=
elts
[
2
].
strip
().
replace
(
'PMC'
,
''
)
mapping
[
pmid
]
=
None
fp
=
open
(
normpath
(
join
(
dname
,
'OA_dict.pickle'
)),
'w'
)
cPickle
.
dump
(
mapping
,
fp
,
cPickle
.
HIGHEST_PROTOCOL
)
fp
.
close
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment