Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Physionet Challenge 2020
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Admin message
Maintenance moved to Monday 17.3. at 13:00. ETA 60 - 90 minutes.
Show more breadcrumbs
Joaquin Rives Gambin
Physionet Challenge 2020
Commits
145415f7
Commit
145415f7
authored
5 years ago
by
Joaquin Rives Gambin
Browse files
Options
Downloads
Patches
Plain Diff
reshape_segmented_arrays function added to preprocess_and_segmentaion.py
parent
9d10ad10
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
preprocess_and_segmentation.py
+181
-8
181 additions, 8 deletions
preprocess_and_segmentation.py
preprocessor.py
+1
-1
1 addition, 1 deletion
preprocessor.py
with
182 additions
and
9 deletions
preprocess_and_segmentation.py
+
181
−
8
View file @
145415f7
...
...
@@ -5,8 +5,6 @@ import re
import
matplotlib.pyplot
as
plt
from
preprocessor
import
preprocess_input_data
data_dir
=
'
sample_of_data/Training_WFDB
'
fs
=
500
# Hz
lead_labels
=
[
'
I
'
,
'
II
'
,
'
III
'
,
'
aVR
'
,
'
aVL
'
,
'
aVF
'
,
'
V1
'
,
'
V2
'
,
'
V3
'
,
'
V4
'
,
'
V5
'
,
'
V6
'
]
...
...
@@ -84,12 +82,13 @@ def window_stack(a, win_width, overlap):
def
segmenting_data
(
dict_of_data
,
seg_width
,
overlap_perc
):
segmented_signals
=
{}
ignore_key
=
[
'
info
'
]
for
key
in
dict_of_data
.
keys
()
-
ignore_key
:
segmented_signals
[
key
]
=
window_stack
(
dict_of_data
[
key
],
seg_width
,
overlap
=
overlap_perc
)
# add the
label
to the dict
segmented_signals
[
'
info
'
]
=
np
.
repeat
(
dict_of_data
[
'
info
'
],
len
(
segmented_signals
[
lead_labels
[
0
]]))
# add the
info/label back
to the dict
segmented_signals
[
'
info
'
]
=
np
.
repeat
(
dict_of_data
[
'
info
'
],
len
(
segmented_signals
[
lead_labels
[
1
]]))
return
segmented_signals
...
...
@@ -108,8 +107,167 @@ def segment_all_dict_data(data_dict, seg_width, overlap_perc):
return
segmented_dict_of_data
# def mad(my_segment, theta=10):
# my_median = np.median(my_segment)
#
# ########################### find the outliers
#
# MedianAD = theta * (np.median(np.abs(my_segment - my_median)))
#
# MedianAD_flag = np.abs(my_segment - my_median)
#
# outliers = np.where(MedianAD_flag > MedianAD, 1, 0)
#
# ########################## get sign of the data
#
# sign_of_my_segment = np.where(my_segment > 0, 1, -1)
#
# ########################## replace the ones positive with my_median and the negative ones with -my_median
#
# cleaned_segment = my_segment.copy()
#
# outliers_to_replace = outliers * sign_of_my_segment
#
# cleaned_segment[np.where(outliers_to_replace == +1)] = abs(MedianAD)
#
# cleaned_segment[np.where(outliers_to_replace == -1)] = -abs(MedianAD)
#
# #########################
#
# return cleaned_segment
def
reshape_segmented_arrays
(
input_dict
,
shuffle_IDs
=
True
,
shuffle_segments
=
True
,
# outlier_rejection_flag=True,
segment_standardization_flag
=
True
):
from
random
import
shuffle
#########################################
list_of_swapped_stack
=
[]
list_of_ID_arrays
=
[]
list_of_label_arrays
=
[]
#########################################
for
key
in
input_dict
.
keys
():
print
(
key
)
##################################### list of the matrices of segmented data in 6 channel
dict_data
=
input_dict
[
key
]
ID
=
key
data_list
=
[
v
for
k
,
v
in
dict_data
.
items
()
if
k
!=
'
info
'
]
##################################### stacking all the data into one array
data_stacked_array
=
np
.
stack
(
data_list
,
axis
=
0
)
##################################### outlier rejection by 5 and 95th percentile at each segment
# if outlier_rejection_flag:
# # data_stacked_array = outlier_rejection(data_stacked_array)
#
# data_stacked_array = np.apply_along_axis(mad, 2, data_stacked_array)
#
# ##################################### shuffle the segments in the data_stacked_array cleaned
if
shuffle_segments
:
random_indices
=
np
.
random
.
randint
(
0
,
data_stacked_array
.
shape
[
1
],
data_stacked_array
.
shape
[
1
])
data_stacked_array
=
data_stacked_array
[:,
random_indices
,
:]
##################################### swap the axes
swaped_stack
=
np
.
swapaxes
(
np
.
swapaxes
(
data_stacked_array
,
0
,
2
),
0
,
1
)
#####################################
ID_for_segments
=
np
.
repeat
(
ID
,
swaped_stack
.
shape
[
0
])
label_for_segments
=
dict_data
[
'
info
'
]
#################################### append to their corresponding lists
list_of_swapped_stack
.
append
(
swaped_stack
)
list_of_ID_arrays
.
append
(
ID_for_segments
)
list_of_label_arrays
.
append
(
label_for_segments
)
# print(swaped_stack.shape)
################################### shuffle the order of subjects in every list
if
shuffle_IDs
:
######################## generate random indices
perm
=
list
(
range
(
len
(
list_of_ID_arrays
)))
shuffle
(
perm
)
# print(perm)
######################## rearrange the lists
list_of_swapped_stack
=
[
list_of_swapped_stack
[
index
]
for
index
in
perm
]
list_of_ID_arrays
=
[
list_of_ID_arrays
[
index
]
for
index
in
perm
]
list_of_label_arrays
=
[
list_of_label_arrays
[
index
]
for
index
in
perm
]
################################### transform the lists into numpy arrays by stacking along first axis
array_of_segments
=
np
.
concatenate
(
list_of_swapped_stack
,
axis
=
0
)
array_of_IDs
=
np
.
concatenate
(
list_of_ID_arrays
,
axis
=
0
)[:,
np
.
newaxis
]
array_of_labels
=
np
.
concatenate
(
list_of_label_arrays
,
axis
=
0
)[:,
np
.
newaxis
]
################################# normalize every segemnt
if
segment_standardization_flag
:
def
segment_standardization
(
my_segment
):
from
sklearn.preprocessing
import
StandardScaler
#################
s
=
StandardScaler
()
################# fit on training data
normalized_segment
=
s
.
fit_transform
(
my_segment
[:,
np
.
newaxis
])
#############
return
(
normalized_segment
.
ravel
())
############################
print
(
"
Standardizing...
"
)
array_of_segments
=
np
.
apply_along_axis
(
segment_standardization
,
1
,
array_of_segments
)
################################# print the shapes
print
(
'
shape of the array of segments is :
'
,
array_of_segments
.
shape
)
print
(
'
shape of the array of IDs is :
'
,
array_of_IDs
.
shape
)
print
(
'
shape of the array of labels is :
'
,
array_of_labels
.
shape
)
##################################
return
(
array_of_segments
,
array_of_labels
,
array_of_IDs
)
def
plot_segment
(
inputarray
,
seg_indx
,
axis1
,
axis2
):
fig
=
plt
.
figure
()
ax
=
fig
.
add_subplot
(
111
)
ax
.
plot
(
inputarray
[
seg_indx
,
:,
axis1
:
axis2
])
plt
.
show
()
return
(
fig
)
if
__name__
==
'
__main__
'
:
data_dir
=
'
data/Training_WFDB
'
# load data
data
=
load_data
(
data_dir
)
...
...
@@ -119,8 +277,23 @@ if __name__ == '__main__':
# segment signal
data
=
segment_all_dict_data
(
data
,
500
,
0.5
)
subj2_leadII
=
data
[
'
A0002
'
][
'
II
'
]
# reshape to array
arr_of_segments
,
arr_of_labels
,
arr_of_IDs
=
reshape_segmented_arrays
(
data
,
shuffle_IDs
=
True
,
shuffle_segments
=
True
,
# outlier_rejection_flag=False,
segment_standardization_flag
=
True
)
# Plot segment examples
# plot_segment(arr_of_segments, 0, 0, 1)
# Check labels
from
collections
import
Counter
labels
=
[
i
[
0
][
'
Dx
'
]
for
i
in
arr_of_labels
]
label_count
=
Counter
(
labels
)
print
(
label_count
)
plt
.
plot
(
subj2_leadII
[
1
])
plt
.
plot
(
subj2_leadII
[
2
])
plt
.
show
()
\ No newline at end of file
# plt.bar(label_count.keys(), label_count.values())
# plt.show()
This diff is collapsed.
Click to expand it.
preprocessor.py
+
1
−
1
View file @
145415f7
...
...
@@ -3,7 +3,7 @@ import copy
FS
=
500
lf_filter
=
0.5
# Hz
hf_filter
=
2
0
# Hz
hf_filter
=
3
0
# Hz
order_filter
=
4
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment