Commit d541498a authored by Renato Caminha Juaçaba Neto's avatar Renato Caminha Juaçaba Neto 🙂
Browse files

scripts update

parent f74988ac
......@@ -11,7 +11,7 @@ import seaborn as sb
from matplotlib.backends.backend_pdf import PdfPages
from subprocess import Popen, PIPE
sb.set(context='paper', font_scale=1.4, style='ticks', palette='Dark2',
sb.set(context='paper', font_scale=1.4, style='ticks', palette='Paired',
rc={'lines.linewidth': 1.5, 'lines.markersize': 10})
pd.set_option('display.max_rows', 30)
......@@ -97,7 +97,7 @@ fullStreamHitsDF = pd.concat(statistics_dataframes_with_params[1], ignore_index=
fullL3DF = pd.concat(statistics_dataframes_with_params[2], ignore_index=True, sort=True)
fullConsumerStatsDF = pd.concat(statistics_dataframes_with_params[3], ignore_index=True, sort=True)
fullStreamDelaysDF = pd.concat(statistics_dataframes_with_params[4], ignore_index=True, sort=True)
fullStreamDistancesDF = pd.concat(statistics_dataframes_with_params[5], ignore_index=True, sort=True)
node_type_order = ("Consumer",
"Border Router")#,
......@@ -148,31 +148,32 @@ logger.info("Plotting cache size graphs...")
mask = (fullCsDF.Type == "CacheSizeByte") & \
~(fullCsDF.NodeType == "Producer")
maskedData = fullCsDF.loc[mask, ['NodeType', 'Packets', 'nConsumers', 'AppScenario', 'seed']]
# maskedData.loc[:, 'NodeType'].cat.remove_unused_categories(inplace=True)
# aggMaskedData = maskedData.groupby(['NodeType', 'nConsumers', 'AppScenario', 'seed'], as_index=False).mean()
maskedData = fullCsDF.loc[mask, ['NodeType', 'nConsumers', 'AppScenario', 'seed', 'Packets']]
aggMaskedData = maskedData.groupby(['NodeType', 'nConsumers', 'AppScenario', 'seed'], as_index=False).mean()
# duo_relplot(title="Individual node content store usage", single_pdf=pdf,
# xlabel="Number of consumer", ylabel="Data in content store (KB)",
# kind="line", data=aggMaskedData, style="NodeType", hue="NodeType", hue_order=node_type_order,
# legend='brief', row_order=scenarioOrder, y="Packets", x="nConsumers", row="AppScenario")
duo_catplot(title="Individual node content store usage", xlabel="Number of consumers", **violin_params,
ylabel="Data in content store (KB)", single_pdf=pdf, kind="violin", data=maskedData, hue="NodeType",
hue_order=node_type_order, y="Packets", x="nConsumers", row="AppScenario", row_order=scenarioOrder)
mask = (fullCsDF.Type.str.contains("CacheSizeMbyte"))
maskedData = fullCsDF.loc[mask, ["nConsumers", "AppScenario", "Packets", "seed"]]
# maskedData.loc[:, 'NodeType'].cat.remove_unused_categories(inplace=True)
aggMaskedData = maskedData.groupby(["nConsumers", "AppScenario", "seed"], as_index=False).sum()
# single_relplot(title="Sum of data in all content stores", xlabel="Number of consumers", ylabel="Megabytes",
# single_pdf=pdf, kind="line", data=aggMaskedData, y="Packets", x="nConsumers", hue_order=scenarioOrder,
# style="AppScenario", hue="AppScenario")
for n, g in aggMaskedData.groupby('NodeType'):
single_catplot(title="%s - Individual node content store usage" % n, xlabel="Number of consumers", single_pdf=pdf,
ylabel="Data in content store (KB)", kind="violin", data=g, hue="AppScenario", y="Packets",
hue_order=scenarioOrder, x="nConsumers", **violin_params)
single_catplot(title="Sum of data in all content stores", xlabel="Number of consumers", ylabel="Megabytes",
kind="violin", data=aggMaskedData, y="Packets", x="nConsumers", hue="AppScenario",
hue_order=scenarioOrder, **violin_params)
# mask = (fullCsDF.Type.str.contains("CacheSizeMbyte"))
# maskedData = fullCsDF.loc[mask, ["nConsumers", "AppScenario", "Packets", "seed"]]
# # maskedData.loc[:, 'NodeType'].cat.remove_unused_categories(inplace=True)
# aggMaskedData = maskedData.groupby(["nConsumers", "AppScenario", "seed"], as_index=False).sum()
#
# # single_relplot(title="Sum of data in all content stores", xlabel="Number of consumers", ylabel="Megabytes",
# # single_pdf=pdf, kind="line", data=aggMaskedData, y="Packets", x="nConsumers", hue_order=scenarioOrder,
# # style="AppScenario", hue="AppScenario")
#
# single_catplot(title="Sum of data in all content stores", xlabel="Number of consumers", ylabel="Megabytes",
# kind="violin", data=aggMaskedData, y="Packets", x="nConsumers", hue="AppScenario",
# hue_order=scenarioOrder, **violin_params)
logger.info("Plotting L3 graphs...")
......@@ -183,16 +184,16 @@ maskedData = fullL3DF.loc[mask, :]
# maskedData.loc[:, 'NodeType'].cat.remove_unused_categories(inplace=True)
aggMaskedData = maskedData.loc[:, ["Node", "NodeType", "AppScenario", "nConsumers", "seed", "KilobytesRaw"]]\
.groupby(["Node", "NodeType", "AppScenario", "nConsumers", "seed"], as_index=False).sum()#\
# .groupby(["NodeType", "AppScenario", "nConsumers", "seed"], as_index=False).mean()
.groupby(["Node", "NodeType", "AppScenario", "nConsumers", "seed"], as_index=False).sum()\
.groupby(["NodeType", "AppScenario", "nConsumers", "seed"], as_index=False).mean()
# duo_relplot(title="Border router traffic load", xlabel="Number of consumers", ylabel="Data transmitted (KB)",
# single_pdf=pdf, kind="line", data=aggMaskedData, style="NodeType", row="AppScenario", row_order=scenarioOrder,
# hue_order=node_type_order, hue="NodeType", x="nConsumers", y="KilobytesRaw")
duo_catplot(title="Border router traffic load", xlabel="Number of consumers", ylabel="Data transmitted (KB)",
single_pdf=pdf, kind="violin", data=aggMaskedData, row="AppScenario", row_order=scenarioOrder,
y="KilobytesRaw", x="nConsumers", **violin_params)
single_catplot(title="Border router traffic load", xlabel="Number of consumers", ylabel="Data transmitted (KB)",
single_pdf=pdf, kind="violin", data=aggMaskedData, hue="AppScenario", hue_order=scenarioOrder,
y="KilobytesRaw", x="nConsumers", **violin_params)
#, hue="NodeType", hue_order=node_type_order)
# duo_catplot(title="Border router traffic load", xlabel="Number of consumers", ylabel="Data transmitted (KB)",
......@@ -218,26 +219,26 @@ logger.info("Plotting consumer metrics...")
mask = (fullConsumerStatsDF.Metric.str.contains("InterestSatisfaction"))
maskedData = fullConsumerStatsDF.loc[mask, ['nConsumers', 'Value', 'AppScenario', 'seed']]
# aggMaskedData = maskedData.groupby(['nConsumers', 'AppScenario', 'seed'], as_index=False).mean()
aggMaskedData = maskedData.groupby(['nConsumers', 'AppScenario', 'seed'], as_index=False).mean()
# single_relplot(title="Interest Satisfaction", xlabel="Number of consumers", ylabel="Data packets received (%)",
# single_pdf=pdf, kind='line', data=aggMaskedData, x="nConsumers", y='Value', hue="AppScenario",
# hue_order=scenarioOrder, style="AppScenario")
single_catplot(title="Interest Satisfaction", xlabel="Number of consumers", ylabel="Data packets received (%)",
single_pdf=pdf, kind='violin', data=maskedData, x="nConsumers", y='Value', hue="AppScenario",
hue_order=scenarioOrder, **violin_params)
single_pdf=pdf, kind='violin', data=aggMaskedData, x="nConsumers", y='Value', hue="AppScenario",
hue_order=scenarioOrder, facet_kws={'ylim':(0, 110)}, **violin_params)
mask = (fullConsumerStatsDF.Metric.str.contains("FirstDataDelayNorm"))
maskedData = fullConsumerStatsDF.loc[mask, ['AppScenario', 'nConsumers', 'Value', 'seed']]
# aggMaskedData = maskedData.groupby(['AppScenario', 'nConsumers', 'seed'], as_index=False).mean()
aggMaskedData = maskedData.groupby(['AppScenario', 'nConsumers', 'seed'], as_index=False).mean()
# single_relplot(title="Normalized stream setup delay ", xlabel="Number of consumers", ylabel="Normalized delay",
# single_pdf=pdf, kind='line', data=aggMaskedData, x="nConsumers", y='Value', hue="AppScenario",
# hue_order=scenarioOrder, style="AppScenario")
single_catplot(title="Normalized stream setup delay ", xlabel="Number of consumers", ylabel="Normalized delay",
single_pdf=pdf, kind='violin', data=maskedData, x="nConsumers", y='Value', hue="AppScenario",
single_pdf=pdf, kind='violin', data=aggMaskedData, x="nConsumers", y='Value', hue="AppScenario",
hue_order=scenarioOrder, **violin_params)
......@@ -258,6 +259,8 @@ logger.info("Plotting stream hits...")
mask = (fullStreamHitsDF.NodeType.str.contains("Border Router")) & \
(fullStreamHitsDF.HitType.str.contains("NewStreams"))
maskedData = fullStreamHitsDF.loc[mask, ["nConsumers", "seed", "HitCount"]]
maskedData['HitCount'] = maskedData.HitCount.astype('int32')
# aggMaskedData = maskedData.groupby(["nConsumers", "seed"], as_index=False).mean()
# single_relplot(title="Streams created by border routers", ylabel="Streams created", xlabel="Number of consumers",
......@@ -281,6 +284,15 @@ single_catplot(title="Streams created by border routers", ylabel="Streams create
# x="nConsumers", y='Delay', style='AppScenario')
logger.info("Plotting stream delay distances...")
maskedData = fullStreamDistancesDF.loc[:, ['AppScenario', 'seed', 'HopCount']]
avgData = maskedData.groupby(['AppScenario', 'seed'], as_index=False).mean()
single_catplot(title='Distance between consumers and producers', xlabel="", ylabel="Hop count",
single_pdf=pdf, kind='violin', data=avgData, x='AppScenario', y='HopCount', **violin_params)
logger.info("Done plotting!!!")
if params.single_pdf: pdf.close()
......
......@@ -43,7 +43,7 @@ def get_node_type(node_name):
def read_statistics(dir_path='', read_l3_dataset=True, read_cs_dataset=True, read_stream_hits_dataset=True,
read_consumer_dataset=True, read_delay_metrics=True):
read_consumer_dataset=True, read_delay_metrics=True, read_stream_distances=True):
logger = logging.getLogger('ReadStatistics')
logger.info("Starting")
......@@ -58,8 +58,7 @@ def read_statistics(dir_path='', read_l3_dataset=True, read_cs_dataset=True, rea
if read_cs_dataset and path.isfile(path.join(dir_path, "cs-trace.trace")):
logger.info("Reading content store metrics")
cs_dataset = pd.read_csv(path.join(dir_path, "cs-trace.trace"), sep="\s+")
# cs_dataset['Type'] = cs_dataset.loc[:, 'Type'].astype('category')
cs_dataset['NodeType'] = pd.Series(map(get_node_type, cs_dataset['Node']))#, dtype='category')
cs_dataset['NodeType'] = cs_dataset.Node.apply(get_node_type)
cache_size_mask = cs_dataset['Type'].str.contains('CacheSizeByte')
tmp = cs_dataset[cache_size_mask].copy(deep=True)
......@@ -74,12 +73,7 @@ def read_statistics(dir_path='', read_l3_dataset=True, read_cs_dataset=True, rea
if read_stream_hits_dataset and path.isfile(path.join(dir_path, "stream-hits.trace")):
logger.info("Reading stream hits")
stream_hits_dataset = pd.read_csv(path.join(dir_path, "stream-hits.trace"), sep='\s+')
if (stream_hits_dataset.size != 0):
# stream_hits_dataset['Type'] = stream_hits_dataset['Type'].astype('category')
stream_hits_dataset['NodeType'] = pd.Series(map(get_node_type, stream_hits_dataset['Node']))#,
#dtype='category')
else:
stream_hits_dataset = None
stream_hits_dataset['NodeType'] = stream_hits_dataset.Node.apply(get_node_type)
else:
stream_hits_dataset = None
......@@ -87,8 +81,7 @@ def read_statistics(dir_path='', read_l3_dataset=True, read_cs_dataset=True, rea
if read_l3_dataset and path.isfile(path.join(dir_path, "l3-rate-trace.trace")):
logger.info("Reading L3 metrics")
l3_dataset = pd.read_csv(path.join(dir_path, "l3-rate-trace.trace"), sep="\s+")
# l3_dataset['Type'] = l3_dataset['Type'].astype('category')
l3_dataset['NodeType'] = pd.Series(map(get_node_type, l3_dataset['Node']))#, dtype='category')
l3_dataset['NodeType'] = l3_dataset.Node.apply(get_node_type)
l3_dataset['Megabytes'] = l3_dataset['Kilobytes'].divide(1000)
l3_dataset['MegabytesRaw'] = l3_dataset['KilobytesRaw'].divide(1000)
else:
......@@ -98,8 +91,7 @@ def read_statistics(dir_path='', read_l3_dataset=True, read_cs_dataset=True, rea
if read_consumer_dataset and path.isfile(path.join(dir_path, "consumer-metrics.trace")):
logger.info("Reading consumer metrics")
consumer_dataset = pd.read_csv(path.join(dir_path, "consumer-metrics.trace"), sep="\s+")
# l3_dataset['Type'] = l3_dataset['Type'].astype('category')
consumer_dataset['NodeType'] = pd.Series(map(get_node_type, consumer_dataset['Node']))#, dtype='category')
consumer_dataset['NodeType'] = consumer_dataset.Node.apply(get_node_type)
else:
consumer_dataset = None
......@@ -107,13 +99,21 @@ def read_statistics(dir_path='', read_l3_dataset=True, read_cs_dataset=True, rea
if read_delay_metrics and path.isfile(path.join(dir_path, "stream-delays.trace")):
logger.info("Reading stream delays")
stream_delays = pd.read_csv(path.join(dir_path, "stream-delays.trace"), sep="\s+")
# l3_dataset['Type'] = l3_dataset['Type'].astype('category')
stream_delays['NodeType'] = pd.Series(map(get_node_type, consumer_dataset['Node']))#, dtype='category')
stream_delays['NodeType'] = stream_delays.Node.apply(get_node_type)
else:
stream_delays = None
### Delay distances
if read_stream_distances and path.isfile(path.join(dir_path, "stream-distances.trace")):
logger.info("Reading stream distances")
stream_distances = pd.read_csv(path.join(dir_path, "stream-distances.trace"), sep="\s+")
stream_distances['NodeType'] = stream_distances.Node.apply(get_node_type)
else:
stream_distances = None
logger.info("Finished reading statistics")
return sim_parameters, cs_dataset, stream_hits_dataset, l3_dataset, consumer_dataset, stream_delays
return sim_parameters, cs_dataset, stream_hits_dataset, l3_dataset, \
consumer_dataset, stream_delays, stream_distances
def get_iqr_outliers(series):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment