Update thresholds in main

9a02f9c5 · s202576 · 89416530 · 9a02f9c5
Commit 9a02f9c5 authored Feb 20, 2022 by s202576
--- a/Event_detection/main.ipynb
+++ b/Event_detection/main.ipynb
@@ -3790,20 +3790,69 @@
    "# Classify events"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "id": "83dda611",
+   "metadata": {},
+   "source": [
+    "## Step 0: Define known thresholds"
+   ]
+  },
  {
   "cell_type": "code",
   "execution_count": 17,
+   "id": "1846d15b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def carpenter(a):\n",
+    "    \"\"\"\n",
+    "    A function that takes in amplitude (deg) and returns duration (ms).\n",
+    "    \"\"\"\n",
+    "    return 21 + 2.2*a"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
   "id": "22575f46",
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Define thresholds\n",
+    "# Fixations:\n",
    "min_fix_dur = 0.1\n",
-    "# blink_down_dur = 0.1\n",
-    "# blink_up_dur = 0.22\n",
+    "\n",
+    "# Blinks:\n",
    "avg_blink_dur = 0.1\n",
    "max_blink_dur = 0.32\n",
-    "max_sac_dur = 0.4"
+    "# blink_down_dur = 0.1\n",
+    "# blink_up_dur = 0.22\n",
+    "\n",
+    "# Saccades:\n",
+    "# Calculate maximum saccade duration using Carpenter's relation between saccade amplitude and duration, and the\n",
+    "# maximum possible span in the human visual field.\n",
+    "max_horiz_deg = 180\n",
+    "max_vert_deg = 200\n",
+    "max_amp = np.sqrt(180**2 + 200**2)\n",
+    "max_sac_dur = np.ceil(carpenter(max_amp))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "0b59405a",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "613.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(max_sac_dur)"
   ]
  },
  {
@@ -3882,8 +3931,8 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 19,
-   "id": "0d6cac8b",
+   "execution_count": null,
+   "id": "b643bd10",
   "metadata": {},
   "outputs": [],
   "source": [
@@ -3891,8 +3940,16 @@
    "df['event'] = np.where(df.event == 'fix', np.where(df.iss > 100, 'other', df.event), df.event)\n",
    "\n",
    "# Create events df\n",
-    "events_df = make_events_df(df)\n",
-    "\n",
+    "events_df = make_events_df(df)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "0d6cac8b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
    "# Combine non-fixation events into neighboring fixations if they consist only of Nan values,\n",
    "# last shorter than the average duration of blinks, and have an amplitude between neighboring fixation\n",
    "# centers less than the dispersion threshold\n",
 %% Cell type:code id:b0b3bba9 tags:
  
 ``` python
 import numpy as np
 import matplotlib
 %matplotlib notebook
 import matplotlib.pyplot as plt
 import os
 import sys
 import pickle
 from struct import *
 import pandas as pd
 import seaborn as sns
 sns.set_style("white")
 import warnings
 warnings.filterwarnings("ignore")
 ```
  
 %% Cell type:code id:019054a2 tags:
  
 ``` python
 #
 # DO NOT TOUCH. Define colors and states for plotting events.
 #
 states = ['fix', 'sac', 'smp', 'blink', 'other']
 colors = {'fix':'red', 'sac':'lime', 'smp':'purple', 'blink':'dodgerblue', 'other':'grey'}
 borders = {'fix':'magenta', 'sac':'green', 'smp':'blue', 'blink':'royalblue', 'other':'black'}
 ```
  
 %% Cell type:markdown id:9ede396d tags:
  
 ## Set desired parameters
  
 %% Cell type:code id:5a764090 tags:
  
 ``` python
 #
 # Change parameters here.
 #
 DEVICE = 'varjo' # choose between ['PI', 'varjo']
  
 #
 # If device is 'PI'...
 #
 PARTICIPANT = 'P18' # string, choose between list of participants below
 START, END = 1000, 2000 # set where you want subsample of graph to be (used in visuals)
  
 #
 # Else if device is 'varjo'...
 #
 EYE = 'right' # choose between ['right', 'left']
 FILE = 'look_around' # choose between ['blinks', 'look_around', 'fix_rotate_cw', 'fix_rotate_ccw']
 ```
  
 %% Cell type:code id:dc50b2f6 tags:
  
 ``` python
 #
 # DO NOT TOUCH. These parameters are set based on chosen params above.
 #
 if DEVICE == 'PI':
    EYE = 'combined'
    SAMPLE_RATE = 66
 elif DEVICE == 'varjo':
    PARTICIPANT = 'varjo'
    SAMPLE_RATE = 200
 else: SAMPLE_RATE = None
 ```
  
 %% Cell type:code id:da053c5b tags:
  
 ``` python
 #
 # List of available PI participants to choose from (and their corresponding data length).
 #
 # Part. Length of data
 # P2    9524
 # P3    25640
 # P4    21023
 # P5    16177
 # P8    12640
 # P9    16417
 # P11    8000
 # P12    9571
 # P13    8658
 # P14    13707
 # P15    16950
 # P17    6377
 # P18    12669
 # P19    22098
 # P21    9325
 # P24    9945
 ```
  
 %% Cell type:markdown id:3883d6d0 tags:
  
 ## Define functions
  
 %% Cell type:markdown id:cf60eeae tags:
  
 ### functions used to clean and manipulate the gaze data
  
 %% Cell type:code id:e244f3d3 tags:
  
 ``` python
 def clean_data(df):
    """
    Takes in a dataframe and returns a subset where rows with intersample speed over 1000 deg/s become Nan.
    """
    assert 'iss' in list(df.columns), "'iss' is not a column in the dataframe"
  
    # Label data where the intersample speed is above the physiological threshold of 1000 deg/s as noise.
    for idx, row in df[~np.isnan(df.iss)].iterrows():
        if row.iss <= 0 or row.iss > 1000:
            df.loc[idx, 'x_deg':'iss'] = np.NAN
  
    # Trim edges, which are usually noise that's not relevant to the actual experiment
    if DEVICE == 'varjo':
        df = df[110:len(df)-150] # number of samples to trim equivalent to 10 second
    elif DEVICE == 'PI':
        trim = int(10 * SAMPLE_RATE) # number of samples to trim equivalent to 10 second
        df = df[trim:len(df)-trim]
  
    # reset row indices
    df.reset_index(drop=True, inplace=True)
  
    return df
 ```
  
 %% Cell type:markdown id:26978116 tags:
  
 ### functions used in dispersion (I-DT)
  
 %% Cell type:code id:94fe353c tags:
  
 ``` python
 def fillWindow(j):
    """
    Given a starting index, returns a dictionary of dataframe indices:rows that spans at least a specified
    duration.
    """
    win = {j:df.loc[j]}
    start = df.loc[j, 'time']
    dur = 0
  
    # Add row to window until duration is at least minimum fixation duration (100 ms).
    while dur < idt_dur_thresh:
        if j == len(df)-1:
            return None
        j += 1
        win[j] = df.loc[j]
        # only recalculate duration if possible (never end in row with None value)
        if ~np.isnan(df.loc[j, 'x_deg']) and ~np.isnan(df.loc[j, 'y_deg']):
            dur = df.loc[j, 'time'] - start
  
    return win
  
  
 def dispersion(win):
    """
    Given a list of dataframe rows with columns 'x_deg' and 'y_deg', returns the euclidean distance between
    the minimum and maximum x's and y's.
    """
    if win == None:
        return idt_disp_thresh+1
  
    minx = maxx = win[0].x_deg
    miny = maxy = win[0].y_deg
  
    for p in win:
        if ~np.isnan(p.x_deg) and ~np.isnan(p.y_deg):
            minx = min(minx, p.x_deg)
            maxx = max(maxx, p.x_deg)
            miny = min(miny, p.y_deg)
            maxy = max(maxy, p.y_deg)
  
    max_d = np.sqrt((maxx - minx)**2 + (maxy - miny)**2)
  
    return max_d
 ```
  
 %% Cell type:markdown id:17565435 tags:
  
 ### functions used to create and adjust events_df
  
 %% Cell type:code id:68eab97e tags:
  
 ``` python
 def make_events_df(df):
    """
    Creates a dataframe of events with features given a dataframe with 'x_deg', 'y_deg', 'isd', 'iss', 'isv',
    'event', and timestamp 'time'.
    """
    eventStream = []
    prev_row, prev_idx = [], []
  
    for idx, row in df.iterrows():
  
        # ignore row if NAN values exist
        if any(row.values != row.values):
            continue
  
        if len(prev_row) == 0:
            prev_row = row
            prev_idx = idx
            stream = [prev_row]
            stream_i = [prev_idx]
            continue
  
        stream.append(prev_row)
        stream_i.append(prev_idx)
  
        # keep increasing the stream so long as the event is the same
        if row.event == prev_row.event:
            prev_row = row
            prev_idx = idx
        # if the event type changes, calculate the features
        else:
            duration = stream[-1].time - stream[0].time
            avg_x = np.average([r.x_deg for r in stream])
            avg_y = np.average([r.y_deg for r in stream])
            amplitude = np.sqrt((stream[-1].x_deg-stream[0].x_deg)**2 + (stream[-1].y_deg-stream[0].y_deg)**2)
            max_disp = max([np.sqrt((r.x_deg-avg_x)**2 + (r.y_deg-avg_y)**2) for r in stream])
            max_speed = max([r.iss for r in stream])
            avg_speed = np.average([r.iss for r in stream])
            std_speed = np.std([r.iss for r in stream])
            # if the event is a fixation and of duration less than the minimum fixation duration,
            # then combine it with the next event
            if prev_row.event == 'fix' and duration < 0.08:
                # relabel from fix to 'other'
                for r in stream:
                    r.event = row.event
                stream.append(prev_row)
                stream_i.append(prev_idx)
                prev_row = row
                prev_idx = idx
            # otherwise log the event to stream
            else:
                eventStream.append({'event':prev_row.event,
                                    'start_i':stream_i[0],
                                    'end_i':stream_i[-1],
                                    'amplitude':amplitude,
                                    'duration':duration,
                                    'center':(avg_x,avg_y),
                                    'max_disp':max_disp,
                                    'max_speed':max_speed,
                                    'avg_speed':avg_speed,
                                    'std_speed':std_speed})
                prev_row = row
                prev_idx = idx
                stream = [prev_row]
                stream_i = [prev_idx]
  
    return pd.DataFrame(eventStream)
  
 def adjust_amp_dur(events_df):
    """
    Adjusts amplitudes of non-fixations to be the euclidean distance from the prev fixation center to the center
    of the next fixation. Also adjusts duration of non-fixations to be time from end of prev fixation to start
    of next fixation.
    """
    for idx, row in events_df.iterrows():
        if idx == 0 or idx == len(events_df)-1:
            continue
  
        prev_row = events_df.loc[idx-1]
        next_row = events_df.loc[idx+1]
  
        if row.event!= 'fix' and prev_row.event == 'fix' and next_row.event == 'fix':
            events_df.loc[idx,'amplitude'] = np.sqrt((next_row.center[0]-prev_row.center[0])**2+(next_row.center[1]-prev_row.center[1])**2)
            events_df.loc[idx,'duration'] = df.loc[next_row.start_i,'time']-df.loc[prev_row.end_i,'time']
    return events_df
  
 def map_to_stream(seq, df):
    df['event'] = None
    for i, row in seq.iterrows():
        df.loc[row.start_i:row.end_i+1, 'event'] = row.event
    return df
 ```
  
 %% Cell type:markdown id:aaa00306 tags:
  
 ### functions used in plotting
  
 %% Cell type:code id:a281e60a tags:
  
 ``` python
 def makePlot(df, show_events=[]):
  
    fig, ax = plt.subplots(figsize=(10,5))
    ax2 = ax.twinx()  # twin object for two different y-axes on the same plot
  
    # Plot position.
    ax2.plot(df.time, df.x_deg, label='x', linewidth=0.6)
    ax2.plot(df.time, df.y_deg, label='y', linewidth=0.6)
    ax2.set_ylabel('deg', color='orange', fontsize=10)
  
    # Plot speed.
    ax.plot(df.time, df.iss, color='grey', label='iss')
    ax.set_ylabel('Intersample Speed (deg/s)', fontsize=10)
    ax.set_xlabel('Time (s)')
  
    # Plot each event a separate color.
    upper_bound = min(df[~np.isnan(df.iss)].iss)-20
    lower_bound = upper_bound-20
    if len(show_events) > 0:
        for event in show_events:
            ax.plot(df.time, np.where(df['event']==event,df.iss,None), '+',
                    color=colors[event], alpha=0.5, label=event)
            ax.fill_between(df.time, lower_bound, upper_bound, where= df.event == event,
                            color=colors[event], alpha=0.5)
  
    # labels
    ax.legend(loc='upper left')
    ax2.legend(loc='upper right')
  
    ax.set_title('Intersample Speed vs Time')
  
    lower = min(min(df[~np.isnan(df.x_deg)].x_deg), min(df[~np.isnan(df.y_deg)].y_deg)) - 5
    upper = max(max(df[~np.isnan(df.x_deg)].x_deg), max(df[~np.isnan(df.y_deg)].y_deg)) + 5
  
    ax2.set_yticks([i for i in range(int(lower), int(upper), 2)])
    ax2.grid(linewidth=0.5)
  
    plt.show()
  
  
 def plot_hists(df, events_df, norm = False):
    df = map_to_stream(events_df, df)
  
    features = ['iss', 'avg_speed', 'max_speed', 'amplitude', 'duration', 'max_disp']
    pos = {0:(0,0), 1:(1,0), 2:(0,1), 3:(1,1), 4:(2,0), 5:(2,1)}
    states = events_df.event.unique()
  
    fig, axes = plt.subplots(nrows = 3, ncols = 2, figsize = (10,16))
  
    for feat in features:
  
        row, col = pos[features.index(feat)]
  
        if feat == 'iss':
            for state in [s for s in states if s != 'noise']:
                try:
                    axes[row][col].hist(df[df.event == state][feat], bins = 50, alpha = 0.5, density=True, label = state)
                except: pass
            axes[row][col].set_title(str(feat)+' by event')
            axes[row][col].set_xlabel(feat)
            axes[row][col].set_ylabel('probability density')
            axes[row][col].legend()
  
        elif norm:
            for state in [s for s in states if s != 'noise']:
                try:
                    axes[row][col].hist(normalize(events_df[events_df.event == state][feat]), bins = 50, density=True, alpha = 0.5, label = state)
                except: pass
            axes[row][col].set_title(str(feat)+' by event')
            axes[row][col].set_xlabel(feat)
            axes[row][col].set_ylabel('probability density')
            axes[row][col].legend()
  
        else:
            for state in [s for s in states if s != 'noise']:
                try:
                    axes[row][col].hist(events_df[events_df.event == state][feat], bins = 50, alpha = 0.5, density=True, label = state)
                except: pass
            axes[row][col].set_title(str(feat)+' by event')
            axes[row][col].set_xlabel(feat)
            axes[row][col].set_ylabel('probability density')
            axes[row][col].legend()
  
    plt.show()
 ```
  
 %% Cell type:markdown id:c814ad50 tags:
  
 # Step 0: Collect and pre-process the data
  
 %% Cell type:code id:083b7025 tags:
  
 ``` python
 # All participants with a calibration folder and either PVL or CVL
 participants = {'P2':'CVL', 'P3':'CVL', 'P4':'Other', 'P5':'PVL', 'P6':'CVL', 'P8':'CVL', 'P9':'CVL', 'P10':'CVL',
                'P11':'PVL', 'P12':'PVL', 'P13':'CVL', 'P14':'PVL', 'P15':'PVL', 'P16':'PVL', 'P17':'PVL',
                'P18':'CVL', 'P19':'PVL', 'P21':'CVL', 'P22':'PVL', 'P24':'PVL', 'P25':'CVL', 'varjo':'None'}
  
 tasks = []
  
 # Take only valid datasets
 isCalibBad = ['P3_sandwich', 'P3_cereal', 'P3_walk', 'P4_sandwich', 'P4_cereal', 'P4_walk', 'P11_sandwich',
             'P11_cereal', 'P11_walk', 'P13_sandwich', 'P13_cereal', 'P13_walk', 'P14_sandwich' 'P14_walk',
             'P16_sandwich', 'P18_sandwich', 'P18_cereal', 'P18_walk', 'P19_sandwich', 'P19_walk',
             'P21_sandwich', 'P21_walk']
 maybe_isCalibBad = ['P12_sandwich', 'P12_cereal', 'P12_walk', 'P17_cereal', 'P17_walk', 'P25_cereal']
 isOptimal = ['P6_walk', 'P14_cereal', 'P16_cereal', 'P16_walk', 'P17_sandwich', 'P19_cereal', 'P21_cereal',
            'P22_cereal', 'P22_walk', 'P24_cereal', 'P25_sandwich', 'P25_walk']
  
 ```
  
 %% Cell type:code id:296e2d27 tags:
  
 ``` python
 data = {}
 round_to = 3
  
 if DEVICE == 'PI':
    calib_excluded = pd.read_csv('/data/IBOSTesting/PIDataCalibrated_new/Analysis/1Poly/calibratedData/calibedGaze_Fiona_walk_all_headCorrected.csv')
    p_list = calib_excluded.subID.unique()
 elif DEVICE == 'varjo':
    p_list = ['varjo']
 else:
    raise Exception("Device must be specified as either 'PI' or 'varjo'.")
  
 for participant in p_list:
  
    if DEVICE == 'PI':
  
        temp = calib_excluded[calib_excluded.subID == participant]
  
        subID = participant
        vision_loss = participants[subID]
        task_type = 'walk'
  
        time, x_deg, y_deg = temp['timestamp(s)'].to_numpy(), temp['gaze_x_deg_calib_excluded'].to_numpy(), temp['gaze_y_deg_calib_excluded'].to_numpy()
        time  = time-time[0]
        azimuth = temp['eyeInWorldFicksAz_calib_excluded'].to_numpy()
        polar = temp['eyeInWorldFicksPol_calib_excluded'].to_numpy()
  
        x_deg = azimuth
        y_deg = polar
  
        x_deg[~np.isnan(x_deg)] = np.rad2deg(np.unwrap(np.deg2rad(x_deg[~np.isnan(x_deg)])))
        y_deg[~np.isnan(y_deg)] = np.rad2deg(np.unwrap(np.deg2rad(y_deg[~np.isnan(y_deg)])))
  
    elif DEVICE == 'varjo':
  
        subID = participant
        vision_loss = participants[subID]
  
        if FILE == 'blinks':
            temp = pd.read_csv('varjoData/Blinking-Every-Seconds-For-10-Seconds.csv')
            task_type = 'blinking (20 sec)'
        elif FILE == 'look_around':
            temp = pd.read_csv('varjoData/Looking-Around-For-20-Seconds.csv')
            task_type = 'looking around (20 sec)'
        elif FILE == 'fix_rotate_cw':
            temp = pd.read_csv('varjoData/Fixating-On-Thumb-While-Rotating-Clockwise.csv')
            task_type = 'fixating on thumb while rotating clockwise'
        elif FILE == 'fix_rotate_ccw':
            temp = pd.read_csv('varjoData/Fixating-On-Thumb-While-Rotating-Counter-Clockwise.csv')
            task_type = 'fixating on thumb while rotating counter clockwse'
        else:
            raise Exception("No varjo file specific. Should specify 'blinking' or 'look_around'.")
  
        if EYE == 'right':
            time, x_deg, y_deg = temp['CaptureTime'].to_numpy(), temp['RightAngle_X'].to_numpy(), temp['RightAngle_Y'].to_numpy()
        elif EYE == 'left':
            time, x_deg, y_deg = temp['CaptureTime'].to_numpy(), temp['LeftAngle_X'].to_numpy(), temp['LeftAngle_Y'].to_numpy()
        else:
            raise Exception("Running varjo data but no eye specified. Should specify 'right' or 'left'.")
        time  = (time-time[0])*10**-9
        x_deg[~np.isnan(x_deg)] = np.rad2deg(np.unwrap(np.deg2rad(x_deg[~np.isnan(x_deg)])))
        y_deg[~np.isnan(y_deg)] = np.rad2deg(np.unwrap(np.deg2rad(y_deg[~np.isnan(y_deg)])))
  
    assert len(time) == len(x_deg) and len(x_deg) == len(y_deg), "Vectors of mismatched length."
  
    # calculate intersample features (note that velocity includes direction, and speed is just magnitude)
    isd = []
    iss = []
    isv = []
  
    for i in range(1, len(time)):
        # intersample displacement and velocity should only be calculated for non-NAN values
        if np.isnan(x_deg[i-1]) or np.isnan(x_deg[i]) or np.isnan(y_deg[i-1]) or np.isnan(y_deg[i]):
            isd.append(np.NAN)
            iss.append(np.NAN)
            isv.append(np.NAN)
        else:
            d = np.sqrt((x_deg[i]-x_deg[i-1])**2+(y_deg[i]-y_deg[i-1])**2)
            v = d/(time[i]-time[i-1])
            isd.append(d)
            iss.append(v)
  
            d_sign = (x_deg[i]-x_deg[i-1])+(y_deg[i]-y_deg[i-1])
            v_sign = d_sign/(time[i]-time[i-1])
            isv.append(v_sign)
  
    isd.append(np.NAN)
    iss.append(np.NAN)
    isv.append(np.NAN)
  
    isd = np.array(isd)
    iss = np.array(iss)
    isv = np.array(isv)
  
    # Remove noise
    # Remove data where iss >= 1000 deg/s and iss <= 0 deg/s.
    df = clean_data(pd.DataFrame({'time':time,
                                    'x_deg':x_deg,
                                    'y_deg':y_deg,
                                    'isd':isd,
                                    'iss':iss,
                                    'isv':isv
                                    }))
  
    data[subID] = df
 ```
  
 %% Cell type:code id:2b781cd0 tags:
  
 ``` python
 print("Part.", "Length of data")
 for key, value in data.items():
    print(f"{key}    {len(value)}")
 ```
  
 %% Output
  
    Part. Length of data
    varjo    2009
  
 %% Cell type:markdown id:507d070b tags:
  
 ## Visualize the features
  
 %% Cell type:code id:cf354486 tags:
  
 ``` python
 # visual check of normalized gaze features
  
 print(f"WITH UNWRAP \nParticipant: {PARTICIPANT} with vision loss: {participants[PARTICIPANT]}")
  
 # get dataframe for chosen participant
 df = data[PARTICIPANT]
  
 # graph the full dataset if using a varjo 20 sec file
 if DEVICE == 'varjo':
    START = 0
    END = len(df)+1
  
 plt.figure(figsize=(10,6)) # Set the plot size
  
 # Plot position
 plt.subplot(2,1,1)
 plt.plot((df.time-df.time[0])[START:END], df.x_deg[START:END], label = 'x pos')
 plt.plot((df.time-df.time[0])[START:END], df.y_deg[START:END], label = 'y pos')
 plt.ylabel('position \n (deg)', fontsize=15)
 plt.xlabel('time (s)', fontsize = 15)
 plt.legend(loc='upper right')
 plt.title('Gaze position over time', fontsize=15)
  
 # Plot speed
 plt.subplot(2,1,2)
 plt.plot((df.time-df.time[0])[START:END], df.iss[START:END], label='intersample speed')
 plt.ylabel('speed \n (deg/s)', fontsize=15)
 plt.xlabel('time (s)', fontsize=15)
 plt.legend(loc='upper right')
 plt.title('Intersample speed over time', fontsize = 15)
  
 plt.subplots_adjust(hspace=0.6)
  
 plt.show()
 ```
  
 %% Output
  
    WITH UNWRAP
    Participant: varjo with vision loss: None
  
  
  
 %% Cell type:code id:30c973fa tags:
  
 ``` python
 plt.figure()
 plt.scatter(df.x_deg[START:END], df.y_deg[START:END], s=5)
 plt.ylabel('y-position (deg)', fontsize=15)
 plt.xlabel('x-position (deg)', fontsize=15)
 plt.title('Gaze position', fontsize = 15)
 plt.show()
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:80ff4607 tags:
  
 ``` python
 plt.figure()
 sns.distplot(df[~np.isnan(df.iss)].iss, len(df)//10)
 plt.title("Spread of intersample speed")
 plt.ylabel("Count")
 plt.xlabel("iss (deg/s)")
 plt.show()
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:073426ce tags:
  
 ``` python
 df.head()
 ```
  
 %% Output
  
         time     x_deg     y_deg       isd        iss       isv
    0  1.2104 -1.711354  5.685386  0.097937   9.793714   0.96850
    1  1.2204 -1.637429  5.621146  0.083103   4.155173  -5.85895
    2  1.2404 -1.691506  5.558044  0.107905  10.790498 -12.85510
    3  1.2504 -1.714667  5.452654  0.160179  16.017857 -20.01640
    4  1.2604 -1.761719  5.299542  0.152059  15.205869 -18.92760
  
 %% Cell type:markdown id:a9175b38 tags:
  
 # Classify events
  
+%% Cell type:markdown id:83dda611 tags:
+
+## Step 0: Define known thresholds
+
+%% Cell type:code id:1846d15b tags:
+
+``` python
+def carpenter(a):
+    """
+    A function that takes in amplitude (deg) and returns duration (ms).
+    """
+    return 21 + 2.2*a
+```
+
 %% Cell type:code id:22575f46 tags:
  
 ``` python
-# Define thresholds
+# Fixations:
 min_fix_dur = 0.1
-# blink_down_dur = 0.1
-# blink_up_dur = 0.22
+
+# Blinks:
 avg_blink_dur = 0.1
 max_blink_dur = 0.32
-max_sac_dur = 0.4
+# blink_down_dur = 0.1
+# blink_up_dur = 0.22
+
+# Saccades:
+# Calculate maximum saccade duration using Carpenter's relation between saccade amplitude and duration, and the
+# maximum possible span in the human visual field.
+max_horiz_deg = 180
+max_vert_deg = 200
+max_amp = np.sqrt(180**2 + 200**2)
+max_sac_dur = np.ceil(carpenter(max_amp))
 ```
  
+%% Cell type:code id:0b59405a tags:
+
+``` python
+print(max_sac_dur)
+```
+
+%% Output
+
+    613.0
+
 %% Cell type:markdown id:ebb4c2c6 tags:
  
 ## Step 1: Detect fixations using I-DT
  
 %% Cell type:code id:04c40f74 tags:
  
 ``` python
 # Run I-DT
 df['event'] = 'other'
  
 idt_dur_thresh = min_fix_dur
 idt_disp_thresh = df[~np.isnan(df.isd)].isd.mean() + df[~np.isnan(df.isd)].isd.std() # should be ~2 deg (more w/ noise)
 print("dispersion threshold (deg):", idt_disp_thresh)
  
 i = 0
 while i < len(df):
  
    # Fill the window with samples of at least 100 ms (min fix threshold).
    if ~np.isnan(df.loc[i, 'x_deg']) and ~np.isnan(df.loc[i, 'y_deg']):
        window = fillWindow(i)
    else:
        i += 1
        continue
  
    if window == None:
        i += 1
        continue
  
    d = dispersion(list(window.values()))
  
    if d <= idt_disp_thresh:
        # We are in a fixation, but need to grow it
        # until we move above the threshold.
  
        k = list(window.keys())[-1]+1
        if k == len(df):
            for key, value in window.items():
                df.loc[key, 'event'] = 'fix'
            i = k
            continue
  
        d = dispersion(list(window.values()).extend(df.loc[k]))
  
        while k < len(df) and d <= idt_disp_thresh:
            window[k] = df.loc[k]
            k += 1
            d = dispersion(list(window.values()).extend(df.loc[k]))
  
        i = k
  
        for key, value in window.items():
            df.loc[key, 'event'] = 'fix'
  
    else:
        # Not a duration of min 100 ms and within ~2 deg
        i += 1
 ```
  
 %% Output
  
    dispersion threshold (deg): 1.8770788515550427
  
-%% Cell type:code id:0d6cac8b tags:
+%% Cell type:code id:b643bd10 tags:
  
 ``` python
 # Check: relabel fixation points as other where intersample speed is greater than 100 deg/s
 df['event'] = np.where(df.event == 'fix', np.where(df.iss > 100, 'other', df.event), df.event)
  
 # Create events df
 events_df = make_events_df(df)
+```
  
+%% Cell type:code id:0d6cac8b tags:
+
+``` python
 # Combine non-fixation events into neighboring fixations if they consist only of Nan values,
 # last shorter than the average duration of blinks, and have an amplitude between neighboring fixation
 # centers less than the dispersion threshold
 for idx, row in events_df.iterrows():
    if idx == 0 or idx >= len(events_df)-1:
        continue
    prev_row = events_df.loc[idx-1]
    next_row = events_df.loc[idx+1]
    if row.event == 'other' and prev_row.event == 'fix' and next_row.event == 'fix':
        amp = np.sqrt((next_row.center[0]-prev_row.center[0])**2+(next_row.center[1]-prev_row.center[1])**2)
        dur = next_row.start_i - prev_row.end_i
        if dur < avg_blink_dur and amp <= idt_disp_thresh and row.amplitude == 0.0:
            events_df.loc[idx, 'event'] = 'fix'
  
 # Map the changes to the dataset, recreate events df based on updated dataset, and
 # adjust amplitude and duration of non-fixation events.
 df = map_to_stream(events_df, df)
 events_df = make_events_df(df)
 events_df = adjust_amp_dur(events_df)
 ```
  
 %% Cell type:markdown id:6b7da211 tags:
  
 ## Step 2: Classify blinks within non-fixations
  
 %% Cell type:code id:cd85434a tags:
  
 ``` python
 def length_of_nans(start, end):
    """
    A function that returns the duration and start and end indices of the longest Nan sequence in a window.
    """
    i = start+1
    nan_spans = []
    nan_span = False
    while i < end+1:
        s0 = i-1
        while np.isnan(df.loc[i,'x_deg']) and np.isnan(df.loc[i,'y_deg']) and i < end+1:
            s1 = i
            i += 1
            nan_span = True
        if nan_span:
            nan_spans.append([s0, s1])
        nan_span = False
        i += 1
    max_dur = 0.0
    max_loc = [None,None]
    for s in nan_spans:
        dur = df.loc[s[1],'time']-df.loc[s[0],'time']
        if dur > max_dur:
            max_dur = dur
            max_loc = [s[0],s[1]]
    return max_dur, max_loc
 ```
  
 %% Cell type:code id:6dcde51a tags:
  
 ``` python
 vel_thresh = df[df.event=='other'].iss.mean()
 print(f"(+/-) vel thresh (deg/s): {vel_thresh}")
  
 buffer = int(np.ceil(0.02 * SAMPLE_RATE)) # number of samples to trim equivalent to 20 ms
  
 for idx, row in events_df[events_df.event == 'other'].iterrows():
    v = df.loc[row.start_i:row.end_i+1, 'isv'].to_numpy()
  
    v_min = min(v)
    start = np.nanargmin(v)+row.start_i
    v_max = max(v)
    end = np.nanargmax(v)+row.start_i
  
    max_nan_dur, max_nan_locs = length_of_nans(row.start_i, row.end_i)
  
    # if there's a hole of at least 100 ms, classify as blink
    if max_nan_dur >= avg_blink_dur*0.75:
        try:
            df.loc[max_nan_locs[0]-buffer:max_nan_locs[1]+buffer+1,'event'] = 'blink'
        except:
            try: # if at the end of the dataframe
                df.loc[max_nan_locs[0]-buffer:max_nan_locs[1]+1,'event'] = 'blink'
            except:# if at the beginning of the dataframe
                df.loc[max_nan_locs[0]:max_nan_locs[1]+buffer+1,'event'] = 'blink'
    # or if the event has a large negative velocity followed by a large positive velocity
    elif v_min < -vel_thresh and v_max > vel_thresh and start <= end:
        # then there likely exists a blink within the event
        # because a blink will resemble a very fast downward movement followed by a very fast upward movement
        try:
            df.loc[start-buffer:end+buffer+1,'event'] = 'blink'
        except:
            try: # if at the end of the dataframe
                df.loc[start-buffer:end+1,'event'] = 'blink'
            except:# if at the beginning of the dataframe
                df.loc[start:end+buffer+1,'event'] = 'blink'
  
 events_df = make_events_df(df)
 ```
  
 %% Output
  
    (+/-) vel thresh (deg/s): 109.9350340126173
  
 %% Cell type:markdown id:2b5196ca tags:
  
 ## Step 3: For each blink, determine if it happened during a saccade, smooth pursuit, or fixation and label accordingly
  
 %% Cell type:code id:0f7c4df3 tags:
  
 ``` python
 max_speed_thresh = events_df[events_df.event != 'blink'].max_speed.mean()
 avg_speed_thresh = events_df[events_df.event != 'blink'].avg_speed.mean()
 print('max speed thresh (deg/s):', max_speed_thresh)
 print('avg speed thresh (deg/s):', avg_speed_thresh)
  
 for idx, row in events_df[events_df.event=='blink'].iterrows():
    if idx <= 1 or idx >= len(events_df)-2:
        continue
  
    prev_prev_row = events_df.loc[idx-2]
    prev_row = events_df.loc[idx-1]
    next_row = events_df.loc[idx+1]
    next_next_row = events_df.loc[idx+2]
  
    # For blinks in the sequence Fix -> Other -> Blink -> Other -> Fix
    if prev_prev_row.event=='fix' and prev_row.event=='other' and next_row.event=='other' and next_next_row.event=='fix':
        d = np.sqrt((next_next_row.center[0]-prev_prev_row.center[0])**2+(next_next_row.center[1]-prev_prev_row.center[1])**2)
        dur = df.loc[next_row.end_i, 'time'] - df.loc[prev_row.start_i, 'time']
  
        if d <= idt_disp_thresh and dur <= max_blink_dur:
            blink_start = min([i for i, r in df.loc[prev_row.start_i:next_row.end_i+1].iterrows() if r.iss >= 100])
            blink_end = max([i for i, r in df.loc[prev_row.start_i:next_row.end_i+1].iterrows() if r.iss >= 100])
            df.loc[prev_row.start_i:next_row.end_i+1,'event'] = 'fix'
            df.loc[blink_start:blink_end,'event'] = 'blink'
            # print(np.round(d, 3), df.loc[prev_row.start_i,'time'], df.loc[next_row.end_i+1,'time'])
        elif dur > max_sac_dur:
            df.loc[prev_row.start_i:row.start_i,'event']='smp'
            df.loc[next_row.start_i:next_row.end_i+1,'event']='smp'
        else:
            df.loc[prev_row.start_i:row.start_i,'event']='sac'
            df.loc[next_row.start_i:next_row.end_i+1,'event']='sac'
  
    # For blinks in the sequence Fix -> Blink -> Other -> Fix
    elif prev_row.event=='fix' and next_row.event=='other' and next_next_row.event=='fix':
        d = np.sqrt((next_next_row.center[0]-prev_row.center[0])**2+(next_next_row.center[1]-prev_row.center[1])**2)
        dur = df.loc[next_row.end_i, 'time'] - df.loc[row.start_i, 'time']
  
        if d <= idt_disp_thresh and dur <= max_blink_dur:
            blink_end = max([i for i, r in df.loc[row.start_i:next_row.end_i+1].iterrows() if r.iss >= 100])
            df.loc[row.start_i:blink_end+1,'event'] = 'blink'
            df.loc[blink_end+1:blink_end,'event'] = 'fix'
            # print(np.round(d, 3), df.loc[prev_row.start_i,'time'], df.loc[next_row.end_i+1,'time'])
        elif dur > max_sac_dur:
            df.loc[next_row.start_i:next_row.end_i+1,'event']='smp'
        else:
            df.loc[next_row.start_i:next_row.end_i+1,'event']='sac'
  
 events_df = make_events_df(df)
 ```
  
 %% Output
  
    max speed thresh (deg/s): 184.65622061141264
    avg speed thresh (deg/s): 69.0977354153344
  
 %% Cell type:markdown id:d862ebdf tags:
  
 ## Step 4: Label remaining events as smooth pursuit, saccade, or other
  
 %% Cell type:code id:bc5705a1 tags:
  
 ``` python
 events_df = adjust_amp_dur(events_df)
 ```
  
 %% Cell type:code id:fa7fe23f tags:
  
 ``` python
 avg_speed_thresh = events_df[events_df.event=='other'].avg_speed.mean()
 std_speed_thresh = events_df[events_df.event=='other'].std_speed.describe()['25%']
 print("avg speed thresh (deg/s):", avg_speed_thresh)
 print("25th percentile of std speed (deg/s):", std_speed_thresh)
  
 for idx, row in events_df[events_df.event=='other'].iterrows():
    if row.avg_speed < avg_speed_thresh and row.std_speed < std_speed_thresh and row.std_speed != 0.0:
        df.loc[row.start_i:row.end_i,'event'] = 'smp'
    elif row.duration <= max_sac_dur:
        df.loc[row.start_i:row.end_i,'event'] = 'sac'
  
 events_df = make_events_df(df)
 events_df = adjust_amp_dur(events_df)
 ```
  
 %% Output
  
    avg speed thresh (deg/s): 118.7274424710976
    25th percentile of std speed (deg/s): 76.07774965945468
  
 %% Cell type:markdown id:f3f0a9c0 tags:
  
 # Visualize Results
  
 %% Cell type:code id:9a60167a tags:
  
 ``` python
 print(f"Participant: {PARTICIPANT} \n task: {task_type} \n vision loss: {participants[PARTICIPANT]}")
 print(f"Device: {DEVICE} \n Sample Rate: {SAMPLE_RATE} \n Eye: {EYE}")
 ```
  
 %% Output
  
    Participant: varjo
     task: looking around (20 sec)
     vision loss: None
    Device: varjo
     Sample Rate: 200
     Eye: right
  
 %% Cell type:code id:fc7a24f5 tags:
  
 ``` python
 # visualize a subsample
 df = map_to_stream(events_df, df)
 temp = df.copy()[START:END]
 makePlot(temp, show_events=['fix', 'smp', 'sac', 'blink', 'other'])
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:ed83bb26 tags:
  
 ``` python
 plot_hists(df, events_df)
 ```
  
 %% Output
  
  
  
 %% Cell type:markdown id:394ae6c3 tags:
  
 ### Check Carpenter's Theorem
  
 %% Cell type:code id:4b86166d tags:
  
 ``` python
 carp_thresh = 1
  
 def plot_carpenter(seqDF, show_events=[]):
  
    # plot Carpenter's Theorem
    plt.figure(figsize=(10,5))
    x = np.linspace(min(seqDF.amplitude), max(seqDF.amplitude))
    y = 21 + 2.2*x
    plt.plot(x, y, color = 'green', label = 'D = 21 + 2.2A')
  
    # plot error bounds (20%)
    y_u = 21*(1+carp_thresh) + 2.2*x
    y_l = 21*(1-carp_thresh) + 2.2*x
    plt.plot(x, y_u, color = 'black', linestyle = 'dashed', linewidth = 0.7)
    plt.plot(x, y_l, color = 'black', linestyle = 'dashed', linewidth = 0.7, label = 'error bound')
  
    # plot events
    for event in show_events:
        seq = seqDF[seqDF['event']==event]    # convert duration from s to ms
        plt.scatter(seq.amplitude,
                    seq.duration*1000,
                    color = colors[event],
                    edgecolors=borders[event],
                    label = event,
                    alpha = 0.5)
    plt.legend(loc = 'upper right')
    plt.xlabel('Amplitude (deg)')
    plt.ylabel('Duration (ms)')
    plt.title('Amplitude vs Duration of Events')
    plt.show()
  
 plot_carpenter(events_df, show_events=['fix', 'smp', 'sac', 'blink', 'other'])
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:da656983 tags:
  
 ``` python
 events_df['error'] = abs(events_df.duration*1000 - (21 + 2.2 * events_df.amplitude)) / (21 + 2.2 * events_df.amplitude)
  
 plt.figure()
 for state in states:
    try:
        sns.distplot(events_df[events_df.event==state]['error'], bins=100, label=state)
    except: pass
 plt.title('Spread of Carpenter Error')
 plt.xlabel('error')
 plt.ylabel('count')
 plt.legend()
 plt.show()
  
 events_df['error'].describe()
 ```
  
 %% Output
  
  
  
    count    123.000000
    mean       3.572066
    std        3.197198
    min        0.392409
    25%        1.455644
    50%        3.152569
    75%        3.818000
    max       17.037557
    Name: error, dtype: float64
  
 %% Cell type:code id:188bf462 tags:
  
 ``` python
 for state in states:
    print(state, np.round(events_df[events_df.event==state]['error'].mean(),2))
 ```
  
 %% Output
  
    fix 4.48
    sac 1.53
    smp 5.1
    blink 3.1
    other 6.57
  
 %% Cell type:code id:ef15461a tags:
  
 ``` python
 bins = 100
  
 # plot histogram
 plt.figure()
 for state in states:
    sns.distplot(df[df.event == state][~df.iss.isna()].iss, bins=bins, color=colors[state], label = state)
 plt.title('Intersample Speed by Event')
 plt.xlabel('Intersample Speed (deg/s)')
 plt.legend()
 plt.show()
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:8165b098 tags:
  
 ``` python
 for state in states:
    print(f"Percentage of {state} events: {np.round(len(events_df[events_df.event==state])/len(events_df)*100,1)}")
 ```
  
 %% Output
  
    Percentage of fix events: 46.3
    Percentage of sac events: 34.1
    Percentage of smp events: 11.4
    Percentage of blink events: 4.1
    Percentage of other events: 4.1
  
 %% Cell type:code id:37337f21 tags:
  
 ``` python
 bins = 20
  
 fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10,15))
 pos = {0:(0,0), 1:(1,0), 2:(0,1), 3:(1,1), 4:(0,2), 5:(1,2)}
  
 for i in range(len(states)):
    a = events_df[events_df.event == states[i]].max_disp
    b = events_df[events_df.event == states[i]].max_speed
  
    col, row = pos[i]
    axes[row][col].hist2d(a,b,bins=bins)
    axes[row][col].set_title(states[i])
    axes[row][col].set_xlabel('Max Dispersion (deg)')
    axes[row][col].set_ylabel('Max Speed (deg/s)')
  
 plt.show()
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:9cbe1fb8 tags:
  
 ``` python
 bins = 20
  
 fig, axes = plt.subplots(nrows=3, ncols=2, figsize=(10,15))
 pos = {0:(0,0), 1:(1,0), 2:(0,1), 3:(1,1), 4:(0,2), 5:(1,2)}
  
 for i in range(len(states)):
    a = events_df[events_df.event == states[i]].amplitude
    b = events_df[events_df.event == states[i]].duration
  
    col, row = pos[i]
    axes[row][col].hist2d(a,b,bins=bins)
    axes[row][col].set_title(states[i])
    axes[row][col].set_xlabel('Amplitude (deg)')
    axes[row][col].set_ylabel('Duration (s)')
  
 plt.show()
 ```
  
 %% Output
  
  
  
 %% Cell type:code id:04ec5e7f tags:
  
 ``` python
 for state in states:
  
    print("\n" + state)
    print('********************')
    print('avg amp (deg):', np.round(np.average(events_df[events_df.event==state].amplitude),2))
    print("avg dur (s):", np.round(np.average(events_df[events_df.event==state].duration),2))
    print("avg amp/dur:", np.round(np.average(events_df[events_df.event==state].amplitude)/np.average(events_df[events_df.event==state].duration),3), "deg/s")
  
    print("\navg max disp (deg):", np.round(np.average(events_df[events_df.event==state].max_disp),2))
    print("avg max speed (deg/s):", np.round(np.average(events_df[events_df.event==state].max_speed),2))
    print("avg avg disp/speed:", np.round(np.average(events_df[events_df.event==state].max_disp)/np.average(events_df[events_df.event==state].max_speed),3), "s")
 ```
  
 %% Output
  
    
    fix
    ********************
    avg amp (deg): 1.34
    avg dur (s): 0.13
    avg amp/dur: 10.133 deg/s
    
    avg max disp (deg): 0.96
    avg max speed (deg/s): 52.57
    avg avg disp/speed: 0.018 s
    
    sac
    ********************
    avg amp (deg): 17.23
    avg dur (s): 0.15
    avg amp/dur: 118.013 deg/s
    
    avg max disp (deg): 10.36
    avg max speed (deg/s): 335.63
    avg avg disp/speed: 0.031 s
    
    smp
    ********************
    avg amp (deg): 8.24
    avg dur (s): 0.27
    avg amp/dur: 30.513 deg/s
    
    avg max disp (deg): 5.73
    avg max speed (deg/s): 189.47
    avg avg disp/speed: 0.03 s
    
    blink
    ********************
    avg amp (deg): 18.49
    avg dur (s): 0.28
    avg amp/dur: 65.073 deg/s
    
    avg max disp (deg): 12.51
    avg max speed (deg/s): 397.12
    avg avg disp/speed: 0.031 s
    
    other
    ********************
    avg amp (deg): 25.48
    avg dur (s): 0.56
    avg amp/dur: 45.822 deg/s
    
    avg max disp (deg): 19.92
    avg max speed (deg/s): 403.84
    avg avg disp/speed: 0.049 s
  
 %% Cell type:code id:be559960 tags:
  
 ``` python
 ```
  
 %% Cell type:code id:c8e6de1d tags:
  
 ``` python
 ```