Looking for patterns in grand mal seizures¶

Author: James Twose Date: 20-03-2021

Context¶

This data set is 12 months of seizure frequency data of a 20 year old male that lives in Iowa City, Iowa. He sufferers from grand mal seizures that result from Tubular Sclerosis that affects his brain. This is my nephew Michael, and I love him, and I want to access the smartest and best data resources to help provide insight and maybe support that can give him some relief.

I am providing this data in hopes of creating a contest that can help identify and predict a pattern to his seizures based on this frequency data and state (sleep/awake), and environmental data including weather, allergens, temperature, and other potential sources.

About Tubular Sclerosis¶

Tubular Sclerosis is an uncommon genetic disorder that causes noncancerous (benign) tumors — unexpected overgrowths of normal tissue — to develop in many parts of the body.

[1]:

# utility packages
from sinfo import sinfo

# data manipulation packages
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# plotting packages
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import seaborn as sns

#analysis packages
from scipy.spatial import distance_matrix

from jmspack.NLTSA import (ts_levels,
                           distribution_uniformity,
                           fluctuation_intensity,
                           complexity_resonance,
                           complexity_resonance_diagram,
                          cumulative_complexity_peaks,
                          cumulative_complexity_peaks_plot)

from dtw import dtw, rabinerJuangStepPattern

from pyrqa.time_series import TimeSeries
from pyrqa.settings import Settings
from pyrqa.analysis_type import Classic
from pyrqa.neighbourhood import FixedRadius
from pyrqa.metric import EuclideanMetric
from pyrqa.computation import RQAComputation
from pyrqa.computation import RPComputation
from pyrqa.image_generator import ImageGenerator

Importing the dtw module. When using in academic works please cite:
  T. Giorgino. Computing and Visualizing Dynamic Time Warping Alignments in R: The dtw Package.
  J. Stat. Soft., doi:10.18637/jss.v031.i07.

[2]:

sinfo(write_req_file=False)

-----
dtw         1.1.6
jmspack     0.0.2
matplotlib  3.3.4
numpy       1.19.2
pandas      1.2.3
pyrqa       NA
scipy       1.6.1
seaborn     0.11.1
sinfo       0.3.1
sklearn     0.23.2
-----
IPython             7.21.0
jupyter_client      6.1.7
jupyter_core        4.7.1
jupyterlab          3.0.10
notebook            6.2.0
-----
Python 3.8.8 (default, Feb 24 2021, 13:46:16) [Clang 10.0.0 ]
macOS-10.16-x86_64-i386-64bit
12 logical CPU cores, i386
-----
Session information updated at 2021-03-21 21:16

[3]:

interesting_weather_columns = ['NAME', 'SNOW', 'PRCP',
 'TMAX',
 'TMIN',
 'timestamp']

[4]:

weather_df = (pd.read_csv("iowa_weather.csv")
              .assign(timestamp = lambda x : pd.to_datetime(x["DATE"]))
             .loc[:, interesting_weather_columns]
             )

[5]:

mal_df = (pd.read_excel("MB_SeizureRecords.xlsx", index_col=0)
          .loc[1:72, :]
          .assign(timestamp = lambda x : pd.to_datetime(x["DATE"]))
          .loc[:, ["timestamp", "STATE"]]
          .reset_index(drop=True)
          .assign(grand_mal = 1)
         )

mal_df.columns = [x.lower() for x in mal_df.columns]

[6]:

all_times = pd.date_range(mal_df["timestamp"].min(),
                          mal_df["timestamp"].max(), freq = "d")

[7]:

troublesome_ts = (mal_df
 .groupby("timestamp")
 .count()
 .loc[:, "grand_mal"]
 .sort_values(ascending=False)
 .head(9)
 .index
 .tolist()
)

[8]:

both_ts = (mal_df[mal_df["timestamp"].isin(troublesome_ts)]
 .value_counts()
 .reset_index()
 .groupby("timestamp")
 .count()
 .sort_values("state")
 .tail(4)
 .index
 .tolist()
)

[9]:

mals = (mal_df
 .groupby("timestamp")
 .count()
 .loc[:, ["grand_mal"]]
)

states = (mal_df
 .groupby("timestamp")
 .head(1)
 .set_index("timestamp")
 .loc[:, ["state"]]
)

_df = pd.merge(mals, states, left_index=True, right_index=True)

_df.loc[both_ts, "state"] = "both"

[10]:

_df = _df.reindex(all_times)

[11]:

_df.loc[_df["grand_mal"].isna(), "grand_mal"] = 0

[12]:

df = pd.merge(_df,
         weather_df[weather_df["NAME"].str.contains("IOWA CITY, IA US")],
         how="left",
         left_index=True,
         right_on="timestamp"
        ).set_index("timestamp")

[13]:

_ = plt.figure(figsize=(30, 5))
_ = sns.heatmap(df.drop(["state", "NAME"], axis=1).T)

RQA plotting¶

[14]:

scale_cols = interesting_weather_columns[1:-1] + ["grand_mal"]

[15]:

scale_cols

[15]:

['SNOW', 'PRCP', 'TMAX', 'TMIN', 'grand_mal']

[33]:

# for feature in scale_cols:
# # feature = "grand_mal"
# # feature = "TMIN"

#     data_points = df[feature].values.tolist()

#     time_series = TimeSeries(data_points,
#                              embedding_dimension=1,
#                              time_delay=1)
#     settings = Settings(time_series,
#                         analysis_type=Classic,
#                         neighbourhood=FixedRadius(0.65),
#                         similarity_measure=EuclideanMetric,
#                         theiler_corrector=1)
#     computation = RQAComputation.create(settings,
#                                         verbose=False)
#     result = computation.run()

#     computation = RPComputation.create(settings)
#     result = computation.run()
#     ImageGenerator.save_recurrence_plot(result.recurrence_matrix_reverse,
#                                         f'Img/{feature}_recurrence_plot.png')

TS_levels¶

[17]:

df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 359 entries, 2017-05-21 to 2018-05-14
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   grand_mal  359 non-null    float64
 1   state      60 non-null     object
 2   NAME       359 non-null    object
 3   SNOW       359 non-null    float64
 4   PRCP       359 non-null    float64
 5   TMAX       358 non-null    float64
 6   TMIN       357 non-null    float64
dtypes: float64(5), object(2)
memory usage: 30.5+ KB

[18]:

def scale_df(x):
    return pd.DataFrame(MinMaxScaler().fit_transform(x), columns=x.columns, index=x.index)

[19]:

df[scale_cols] = scale_df(df[scale_cols])

[20]:

feature_selection = scale_cols #+ ["grand_mal"]
ts_df = df.drop(["state", "NAME"], axis=1).dropna().reset_index()

[21]:

all_ts_levels = pd.DataFrame()
for column in feature_selection:
    ts_levels_df, fig, ax = ts_levels(ts=ts_df[column],
                                  ts_x=None,
                                  criterion="mse",
                                  max_depth=10,
                                  min_samples_leaf=15,
                                  min_samples_split=2,
                                  max_leaf_nodes=30,
                                  plot=False,
                                  equal_spaced=True,
                                  n_x_ticks=10)
    _ = ts_levels_df.set_index("t_steps", inplace=True)
    ts_levels_df.columns = [f"{column}_{x}" for x in ts_levels_df.columns.tolist()]
    all_ts_levels = pd.concat([all_ts_levels, ts_levels_df], axis=1)

[22]:

plot_df = (pd.merge(ts_df["timestamp"].reset_index(drop=True),
         all_ts_levels,
         left_index=True,
         right_index=True)
       .melt(id_vars="timestamp"))

[23]:

ax = sns.scatterplot(x="timestamp",
                  y="value",
                  hue="variable",
                  palette="gist_rainbow",
                  data=plot_df[plot_df["variable"].str.contains("grand_mal")],
                 )
ax = sns.lineplot(x="timestamp",
                  y="value",
                  hue="variable",
                  palette="gist_rainbow",
                  data=plot_df[plot_df["variable"].str.contains("grand_mal")],
                 )

[24]:

plt.figure(figsize=(30, 10))
height=1
ax = sns.scatterplot(x="timestamp",
                  y="value",
                  hue="variable",
                  palette="gist_rainbow",
                  data=plot_df[plot_df["variable"].str.contains("original_ts")],
                  legend=False,
                  linestyle='dashed',
                 )
ax = sns.lineplot(x="timestamp",
                  y="value",
                  hue="variable",
                  palette="gist_rainbow",
                  data=plot_df[plot_df["variable"].str.contains("ts_levels")],
                 )
_ = ax.set(xlabel='Date', ylabel='Scaled Values',
           title=f'Weather and grand mal occurence ts levels')

Dynamic time warping of ts levels time series¶

[25]:

template = plot_df[plot_df["variable"].str.contains("grand_mal_ts_levels")].value.values
query = plot_df[plot_df["variable"].str.contains("PRCP_ts_levels")].value.values

# template = plot_df[plot_df["variable"].str.contains("TMAX_ts_levels")].value.values
# query = plot_df[plot_df["variable"].str.contains("TMIN_ts_levels")].value.values

# template = plot_df[plot_df["variable"].str.contains("TMAX_ts_levels")].value.values
# query = plot_df[plot_df["variable"].str.contains("TMAX_ts_levels")].value.values

[26]:

len(query), len(template)

[26]:

(356, 356)

[27]:

## Find the best match with the canonical recursion formula
alignment = dtw(query, template, keep_internals=True)

## Display the warping curve, i.e. the alignment curve
_ = alignment.plot(type="threeway")

## Align and plot with the Rabiner-Juang type VI-c unsmoothed recursion
dtw(query, template, keep_internals=True,
    step_pattern=rabinerJuangStepPattern(1, "c"))\
    .plot(type="twoway",offset=-2)

# ## See the recursion relation, as formula and diagram
# print(rabinerJuangStepPattern(6,"c"))
# rabinerJuangStepPattern(6,"c").plot()

alignment.distance

[27]:

10.25423849434186

[28]:

ax = sns.lineplot(x="timestamp",
                  y="value",
#                   hue="variable",
                  palette="gist_rainbow",
                  data=plot_df[plot_df["variable"].str.contains("grand_mal_ts_levels")],
                 )

ax = sns.lineplot(x="timestamp",
                  y="value",
#                   hue="variable",
                  palette="gist_rainbow",
                  data=plot_df[plot_df["variable"].str.contains("PRCP_ts_levels")],
                 )

_ = ax.set(xlabel='Date', ylabel='Scaled Values',
           title=f'Weather and grand mal occurence ts levels')

Dynamic Complexity¶

[29]:

fluctuation_intensity_df = fluctuation_intensity(df=ts_df.set_index("timestamp"),
                                                  win=7,
                                                  xmin=0,
                                                  xmax=1,
                                                  col_first=1,
                                                  col_last=ts_df.shape[1]-1)

distribution_uniformity_df = distribution_uniformity(df=ts_df.set_index("timestamp"),
                                                  win=7,
                                                  xmin=0,
                                                  xmax=1,
                                                  col_first=1,
                                                  col_last=ts_df.shape[1]-1)

complexity_resonance_df = complexity_resonance(fluctuation_intensity_df, distribution_uniformity_df)

[30]:

_ = complexity_resonance_diagram(fluctuation_intensity_df, plot_title='Fluctuation Intensity Diagram', figsize=(30, 7))
_ = complexity_resonance_diagram(distribution_uniformity_df, plot_title='Distribution Uniformity Diagram', figsize=(30, 7))
_ = complexity_resonance_diagram(complexity_resonance_df, figsize=(30, 7))

[31]:

cumulative_complexity_peaks_df, significant_peaks_df = cumulative_complexity_peaks(df=complexity_resonance_df,
                            significant_level_item = 0.001,
                            significant_level_time = 0.05,)

[32]:

_ = cumulative_complexity_peaks_plot(cumulative_complexity_peaks_df,
                                     significant_peaks_df,
                                    figsize=(20,5),
                                    height_ratios=[1,4])