Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Q
quarter_car_model_data_preparation
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mibaj
quarter_car_model_data_preparation
Commits
a37c48f9
Commit
a37c48f9
authored
4 years ago
by
mibaj
Browse files
Options
Downloads
Patches
Plain Diff
Update transform_to_window_dataset.py
parent
d9be7324
Branches
master
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
util_scripts/transform_to_window_dataset.py
+47
-71
47 additions, 71 deletions
util_scripts/transform_to_window_dataset.py
with
47 additions
and
71 deletions
util_scripts/transform_to_window_dataset.py
+
47
−
71
View file @
a37c48f9
"""
A script to apply the sliding window approach on input time series data. Creates fully prepared datasets for analysis.
@author: Milena Bajic (DTU Compute)
"""
import
sys
,
os
,
glob
,
time
import
argparse
import
pickle
import
shutil
import
random
import
pandas
as
pd
import
numpy
as
np
import
matplotlib.pyplot
as
plt
from
multiprocessing
import
Pool
,
cpu_count
# save def. width, height, severity and speed too
class
Window_dataset
():
def
__init__
(
self
,
input_dir
,
filestring
,
win_size
=
2
,
out_dir
=
''
,
is_test
=
True
):
# Initial processing time
t0
=
time
.
time
()
# Get from input
...
...
@@ -55,28 +62,10 @@ class Window_dataset():
print
(
'
Number of split dataframes: {0}
'
.
format
(
self
.
n_splits
))
print
(
'
Number of processes: {0}
'
.
format
(
self
.
n_processes
))
# Prepare chunks
#print('Preparing chunks')
#self.chunks = [ self.split_input_dataframes[x:x+self.n_processes]
#for x in range(0, len(self.split_input_dataframes), self.n_processes)]
#print('Number of chunks: {0}'.format( len(self.chunks)))
for
df_i
,
df
in
list
(
enumerate
(
self
.
split_input_dataframes
)):
print
(
'
===> Passing df:
'
,
df_i
)
df
.
reset_index
(
inplace
=
True
,
drop
=
True
)
self
.
make_sliding_window_df
(
df_i
,
df
)
'''
pool = Pool(processes=self.n_processes)
pass_chunk = list(enumerate(chunk))
pass_chunk = [(chunk_number,) + c for c in pass_chunk] # [ (chunk_number, df_number, chunk_data) ]
print(
'
Chunk data prepared, starting pool map..
'
)
pool.map(self.make_sliding_window_df, pass_chunk)
pool.close()
pool.join()
'''
#sys.exit(0)
dt
=
round
(
time
.
time
()
-
t0
,
1
)
print
(
'
Time to process: {0} s
'
.
format
(
dt
))
...
...
@@ -86,31 +75,23 @@ class Window_dataset():
print
(
'
Loading: {0}
'
.
format
(
filename
))
with
open
(
filename
,
"
rb
"
)
as
f
:
df
=
pickle
.
load
(
f
)
return
df
def
remove_samples_with_zero_counts
(
self
,
input_dataframe
):
#
r
emove samples with too narrow defects so there is no point
'
caught
'
in type and severity
#
R
emove samples with too narrow defects so there is no point
"
caught
"
in type and severity
input_dataframe
[
'
keep
'
]
=
input_dataframe
.
type
.
apply
(
lambda
row
:
np
.
count_nonzero
(
row
)
>
0
)
input_dataframe
=
input_dataframe
[
input_dataframe
[
'
keep
'
]
==
True
]
input_dataframe
.
drop
([
'
keep
'
],
axis
=
1
,
inplace
=
True
)
input_dataframe
.
reset_index
(
drop
=
True
,
inplace
=
True
)
return
input_dataframe
def
make_sliding_window_df
(
self
,
df_i
,
input_dataframe_part
):
# Making sliding window (each window: constant in distance, variable length, slide by 1 point)
print
(
'
Making sliding window
'
)
#try: # parallel mode, pass data in chunks
#chunk_i, df_i, input_dataframe_part = inp
#print('Chunk_i: ', chunk_i)
#print('df_i: ',df_i)
window_df
=
pd
.
DataFrame
([],
columns
=
self
.
window_columns
)
#print('Data: ',type(input_dataframe_part))
# Fill pd with windows from initial one
# Fill Dataframe with windows from initial one
for
index
,
row
in
input_dataframe_part
.
iterrows
():
if
(
index
%
500
==
0
):
print
(
'
Processing row: {0}/{1}
'
.
format
(
index
,
input_dataframe_part
.
shape
[
0
]))
...
...
@@ -129,8 +110,8 @@ class Window_dataset():
end_index
=
np
.
where
(
row
.
distance
>
100
-
self
.
win_size
)[
0
][
0
]
-
1
#
#print(end_index, row.distance[end_index]) # end index in the whole row (so the last sample is 2m)
# Loop over windows
for
i
in
range
(
0
,
end_index
+
1
):
# to include the last window
# Loop over
the
windows
for
i
in
range
(
0
,
end_index
+
1
):
try
:
# Get min and max index of this window
window_start_meters
=
row
.
distance
[
i
]
...
...
@@ -148,10 +129,8 @@ class Window_dataset():
# Put this window into row df data
for
col
in
self
.
window_columns
:
#print('Col: ',col)
if
col
==
'
window_class
'
:
# compute window class column
unique_classes
=
np
.
unique
(
row
[
'
type
'
][
i
:
window_end_index
])
#possible are only 1-label windows or windows with 0 (no defect) and 1 defect
#print('uniq: ',unique_classes)
if
len
(
unique_classes
)
==
1
:
row_df
.
at
[
i
,
col
]
=
unique_classes
[
0
]
elif
len
(
unique_classes
)
==
2
:
...
...
@@ -162,14 +141,12 @@ class Window_dataset():
row_df
.
at
[
i
,
col
]
=
row
[
col
][
i
:
window_end_index
]
else
:
row_df
.
at
[
i
,
col
]
=
row
[
col
]
#float or string, just repeat
#sys.exit(0)
except
:
pass
#print(row_df.window_class)
return
row_df
def
save_pickle
(
self
,
df
,
out_dir
,
df_type
):
print
(
'
Saving {0} as pickle.
'
.
format
(
df_type
))
pickle_name
=
out_dir
+
'
/
'
+
df_type
+
'
_windows.pkl
'
df
.
to_pickle
(
pickle_name
)
...
...
@@ -177,39 +154,38 @@ class Window_dataset():
return
#===============================#
# ============================= #
# =========SETUP ============== #
# ============================= #
if
__name__
==
"
__main__
"
:
cluster
=
True
is_test
=
False
filetype
=
'
train
'
home
=
os
.
path
.
expanduser
(
'
~
'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'
Please provide command line arguments.
'
)
parser
.
add_argument
(
'
--test
'
,
default
=
False
,
help
=
'
If test is true, will process 100 rows only (use for testing purposes).
'
)
parser
.
add_argument
(
'
--filetype
'
,
default
=
'
train
'
,
help
=
'
Choose between train, test or valid. This file will be processed and output created.
'
)
parser
.
add_argument
(
'
--input_dir
'
,
default
=
'
{0}/quarter_car_model_data_preparation/data/Golden-car-simulation-August-2020/train-val-test-normalized
'
.
format
(
home
),
help
=
'
Input directory containing single-defect .mat files.
'
)
parser
.
add_argument
(
'
--output_dir_base
'
,
default
=
'
{0}/quarter_car_model_data_preparation/data/Golden-car-simulation-August-2020
'
.
format
(
home
),
help
=
'
Directory base where a new directory with output files will be created.
'
)
#===============================#
# ============================= #
args
=
parser
.
parse_args
()
# Input dir
# =======#
if
cluster
:
input_dir
=
'
/dtu-compute/mibaj/Golden-car-simulation-August-2020/train-val-test-normalized
'
else
:
input_dir
=
'
/Users/mibaj/quarter-car-simulation-data-analysis/data/Golden-car-simulation-August-2020/train-val-test-normalized
'
input_dir
=
args
.
input_dir
output_dir_base
=
args
.
output_dir_base
is_test
=
args
.
test
filetype
=
args
.
filetype
# Output directory
# =======#
out_dir
=
'
{0}-split-into-windows
'
.
format
(
input_dir
)
# Make output directory
out_dir
=
'
{0}/train-val-test-normalized-split-into-windows
'
.
format
(
output_dir_base
)
if
not
os
.
path
.
exists
(
out_dir
):
os
.
makedirs
(
out_dir
)
if
not
os
.
path
.
exists
(
out_dir
+
'
/
'
+
str
(
filetype
)):
os
.
makedirs
(
out_dir
+
'
/
'
+
str
(
filetype
))
# T
es
t
# Proc
es
s
# ======#
if
is_test
:
test
=
Window_dataset
(
input_dir
,
'
test
'
,
out_dir
=
out_dir
,
is_test
=
is_test
)
sys
.
exit
(
0
)
# Datasets
# ====================== #
result
=
Window_dataset
(
input_dir
,
filetype
,
out_dir
=
out_dir
+
'
/
'
+
str
(
filetype
),
is_test
=
is_test
)
result
=
Window_dataset
(
input_dir
,
'
test
'
,
out_dir
=
out_dir
,
is_test
=
is_test
)
else
:
result
=
Window_dataset
(
input_dir
,
filetype
,
out_dir
=
out_dir
+
'
/
'
+
str
(
filetype
))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment