Upload dualmicro_functions.py

d1ecc0df · glia · a5b6f589 · d1ecc0df
Commit d1ecc0df authored 11 months ago by glia
--- a/dualmicro_functions.py
+++ b/dualmicro_functions.py
+# -*- coding: utf-8 -*-
+"""
+Functions for the analysis of two-brain microstates
+
+@author: Qianliang Li (glia@dtu.dk)
+"""
+import os
+import numpy as np
+import pandas as pd
+import mne
+import mat73
+import matplotlib.pyplot as plt
+import nolds
+
+from eeg_microstates3 import (locmax, T_empirical, H_1, H_2)
+
+def load_epoch_from_fieldtrip(i, files, event_id):
+    """Loads the preprocessed EEG data """
+    # Get environmental variables
+    Subject_id = os.environ.get('Subject_id')
+    # Load the file
+    tmp_dict = mat73.loadmat(files[i])
+    s_id = Subject_id[i]
+    eeg = tmp_dict["eeg_ppn"+str(s_id)[-1]+"_seg"]
+    # Get the epoched time series data (epoch, ch, time)
+    data = np.stack(eeg["trial"])
+    # Re-scale data
+    data *= 1e-6 # micro Volt to Volt
+    # Create info instance
+    ch_names = [ele[0] for ele in eeg["hdr"]["label"]]
+    sfreq = int(eeg["fsample"])
+    eeg_info = mne.create_info(ch_names, sfreq,ch_types="eeg")
+    # Update filter info
+    mne.filter._filt_update_info(eeg_info, True, 1.0, 40)
+    # Make events array (epoch number, _, event id)
+    events = np.zeros((len(data),3)).astype(int)
+    events[:,0] = np.arange(0,len(data))
+    events[:,2] = eeg["trialinfo"][:,0]
+    # Save trialinfo object for synchronization of pairs based on timings
+    trialinfo = eeg["trialinfo"][:,0:3]
+    # Create the Epochs
+    epoch = mne.EpochsArray(data, eeg_info, events, 0, event_id)
+    # Set montage
+    epoch.set_montage("biosemi64")
+    return epoch, trialinfo
+
+# The function takes the data as a numpy array (n_t, n_ch)
+def prepare_1P_micro_arr(i, ppn2_correction, sfreq, freq_range=None, standardize:bool=True):
+    """ Prepare single-brain EEG data for microstate analysis """
+    # Get environmental variables
+    Subject_id = os.environ.get('Subject_id')
+    # Load epochs
+    epoch, trialinfo = load_epoch_from_fieldtrip(i)
+    # Ensure it is averaged referenced
+    epoch = epoch.set_eeg_reference("average")
+    # Get numpy arrays
+    micro_data = epoch.get_data() # get data
+    if standardize == True:
+        # Standardize data - will make EEG amplitudes comparable between
+        micro_data = micro_data - micro_data.mean(axis=1, keepdims=True)
+        micro_data /= micro_data.std(axis=1, keepdims=True)
+    # Correct the event_id for ppn2 in each pair, to swap all 6 with 4
+    # and 7 with 5 and vice versa
+    if str(Subject_id[i])[-1]=="2":
+        trialinfo_df = pd.DataFrame(trialinfo.copy())
+        trialinfo_df.iloc[:,0].replace(ppn2_correction, inplace=True)
+        trialinfo = trialinfo_df.to_numpy()
+
+    # Transform data to correct shape
+    arr_shape = micro_data.shape # get shape
+    micro_data = micro_data.swapaxes(1,2) # swap ch and time axis
+    micro_data = micro_data.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
+    
+    # Filter the data
+    if freq_range == None:
+        micro_data_filtered = micro_data # do not perform filtering
+    elif not freq_range == None:
+        # Notice it is done only after combining epochs and time, as filter length
+        # would be too long for 1s epochs. The filter function wants time on the last axis
+        micro_data = micro_data.transpose()
+        micro_data_filtered = mne.filter.filter_data(micro_data, sfreq, freq_range[0], freq_range[1])
+        # Reverse the shape
+        micro_data_filtered = micro_data_filtered.transpose()
+    
+    return micro_data_filtered, trialinfo
+
+def plot_microstates(n_maps, maps, gev, epoch_info):
+    """ Plot the microstates """
+    fig, axarr = plt.subplots(1, n_maps, figsize=(5*n_maps,5))
+    fig.patch.set_facecolor('white')
+    for imap in range(n_maps):
+        mne.viz.plot_topomap(maps[imap,:], pos = epoch_info, axes = axarr[imap]) # plot
+        axarr[imap].set_title("GEV: {:.2f}".format(gev[imap]), fontsize=16, fontweight="bold") # title
+    fig.suptitle("Microstates: {}".format(n_maps), fontsize=20, fontweight="bold")
+    plt.tight_layout()
+    return fig
+
+def reorder_microstate_results(new_order, maps, gev, m_labels):
+    """ Function to re-order microstates based on manual order """
+    reordered_maps = maps[new_order,:]
+    reordered_gev = gev[new_order]
+    # Make directory to find and replace map labels
+    dic0 = {value:key for key, value in enumerate(new_order)}
+    reordered_m_labels = np.array([dic0.get(n, n) for n in m_labels]) # re-order labels
+    return reordered_maps, reordered_gev, reordered_m_labels
+
+def microstate_run_length_encoding(m_labels):
+    """ Take a 1D stream of microstates and returns the label and duration """
+    # Adapted from RLE Matlab code by Abdulrahman Ikram Siddiq, 2011
+    # Initialize
+    counter = 0
+    label = [9999] * len(m_labels)
+    duration = [9999] * len(m_labels)
+    # Starting the lists
+    label[counter] = m_labels[counter] # first element
+    duration[counter] = 1 # first element
+    for i in range(1,len(m_labels)):
+        # Add 1 to duration, if the next element is the same as current microstate
+        if m_labels[i-1] == m_labels[i]:
+            duration[counter] += 1
+        # If it is not the same, get the new microstate label and restart duration
+        else:
+            counter += 1
+            label[counter] = m_labels[i]
+            duration[counter] = 1
+    # Trim the unused parts of the list
+    label = label[:counter+1]
+    duration = duration[:counter+1]
+    assert not any(np.array(label) == 9999) # all fillers removed
+    assert not any(np.array(duration) == 9999) # all fillers removed
+    
+    return np.array(label), np.array(duration)
+
+def single_micro_fit_all_feature_computation(i, n_maps, microstate_results, trialinfo_list, sfreq, event_id,
+                                             single_brain_event_id):
+    """
+    Estimates the common (intrabrain) microstate features:
+        1. Average duration a given microstate remains stable (Dur)
+        2. Frequency occurrence, independent of individual duration (Occ)
+            Average number of times a microstate becomes dominant per second
+        3. Ratio of total Time Covered (TCo)
+        4. Transition probabilities (TMx)
+        5. Ratio of shannon entropy relative to theoretical max chaos (Ent)
+    
+    Parameters
+    ----------
+    i : int
+        The index.
+    n_maps : int
+        The number of maps (clusters) used.
+    microstate_results : list
+        The estimated microstates.
+    trialinfo_list : list
+        List with trial informations.
+    sfreq : int
+        The sampling frequency.
+    event_id : dict
+        The mappings between event id and condition.
+    single_brain_event_id : dict
+        The renamed mappings between event id and condition for single-brain
+        analysis.
+
+    Returns
+    -------
+    m_labels : np.array
+        The microstate sequence (labels) time series in the format (epoch, time).
+    events : pd.DataFrame
+        The events corresponding to each epoch.
+    MFeatures : list
+        List of arrays of each microstate feature.
+
+    """
+    # Get environmental variables
+    Subject_id = os.environ.get('Subject_id')
+    # Get the microstate labels
+    sub_idx = microstate_results[5]
+    subject_indices = sub_idx[i], sub_idx[i+1]
+    m_labels = microstate_results[1][subject_indices[0]:subject_indices[1]]
+    
+    # Get the trialinfo with conditions
+    # Notice that ppn2 events have been corrected, so we are using separate
+    # observer and actor conditions
+    # and also separate follower and leader conditions
+    Subject0, trialinfo = trialinfo_list[i]
+    assert Subject_id[i] == Subject0
+    assert len(trialinfo) == len(m_labels)/sfreq
+    # Convert to dataframe
+    event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id
+    events = pd.DataFrame(trialinfo,columns=["Event_id","Trial_number","Trial_start_time"])
+    events["Event_label"] = events["Event_id"].replace(event_id_inv)
+    events = events.reset_index().rename(columns={"index":"Epoch_idx"})
+    
+    # Reshape m_labels to (epoch, time)
+    m_labels = m_labels.reshape(len(trialinfo),sfreq)
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    m_labels = m_labels[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Initialize arrays
+    Dur_arr = np.zeros((len(single_brain_event_id),n_maps)); Dur_arr.fill(np.nan)
+    Occ_arr = np.zeros((len(single_brain_event_id),n_maps)); Occ_arr.fill(np.nan)
+    TCo_arr = np.zeros((len(single_brain_event_id),n_maps)); TCo_arr.fill(np.nan)
+    TMx_arr = np.zeros((len(single_brain_event_id),n_maps,n_maps))
+    Ent_arr = np.zeros(len(single_brain_event_id))
+    for e in range(len(single_brain_event_id)):
+        ev_idx = list(single_brain_event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            Dur_arr0 = np.zeros((2,n_maps))
+            Occ_arr0 = np.zeros((2,n_maps))
+            TCo_arr0 = np.zeros((2,n_maps))
+            TMx_arr0 = np.zeros((2,n_maps,n_maps))
+            Ent_arr0 = np.zeros(2)
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                m_labels0 = m_labels[ep_idx0,:]
+                m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+                # Compute duration, occurrence and time covered
+                l_, d_ = microstate_run_length_encoding(m_labels0_flat)
+                # For each microstate
+                for ii in range(n_maps):
+                    if np.isnan(np.nanmean(d_[l_==ii])):
+                        # The specific microstate did not occur at all for this event
+                        Dur_arr0[s,ii] = 0
+                        Occ_arr0[s,ii] = 0
+                        TCo_arr0[s,ii] = 0
+                    else:
+                        Dur_arr0[s,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms
+                        Occ_arr0[s,ii] = len(d_[l_==ii])/len(d_) * sfreq
+                        TCo_arr0[s,ii] = np.sum(d_[l_==ii])/np.sum(d_)
+                # Compute transition matrix
+                TMx_arr0[s] = T_empirical(m_labels0_flat, n_maps)
+                # Compute Shannon Entropy relative to max
+                Ent_arr0[s] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps))
+            # Average over the splits
+            Dur_arr[e] = np.mean(Dur_arr0, axis=0)
+            Occ_arr[e] = np.mean(Occ_arr0, axis=0)
+            TCo_arr[e] = np.mean(TCo_arr0, axis=0)
+            TMx_arr[e] = np.mean(TMx_arr0, axis=0)
+            Ent_arr[e] = np.mean(Ent_arr0, axis=0)
+            
+        else:
+            m_labels0 = m_labels[ep_idx,:]
+            m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+            # Compute duration, occurrence and time covered
+            l_, d_ = microstate_run_length_encoding(m_labels0_flat)
+            # For each microstate
+            for ii in range(n_maps):
+                if np.isnan(np.nanmean(d_[l_==ii])):
+                    # The specific microstate did not occur at all for this event
+                    Dur_arr[e,ii] = 0
+                    Occ_arr[e,ii] = 0
+                    TCo_arr[e,ii] = 0
+                else:
+                    Dur_arr[e,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms
+                    Occ_arr[e,ii] = len(d_[l_==ii])/len(d_) * sfreq
+                    TCo_arr[e,ii] = np.sum(d_[l_==ii])/np.sum(d_)
+            # Compute transition matrix
+            TMx_arr[e] = T_empirical(m_labels0_flat, n_maps)
+            # Compute Shannon Entropy relative to max
+            Ent_arr[e] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps))
+    # Save all microstate features together
+    MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr]
+    return m_labels, events, MFeatures
+
+def interbrain_microstate_run_length_encoding(m_labels1, m_labels2):
+    """
+    Takes two 1D streams of microstate labels and returns the label
+    and duration for the common microstate. If there are no common
+    microstate, the label -1 is returned with a corresponding duration.
+    
+    Adapted from RLE Matlab code by Abdulrahman Ikram Siddiq, 2011
+    """
+    assert len(m_labels1) == len(m_labels2), "The provided microstate labels do not have equal length"
+    # Initialize
+    counter = 0
+    label = [9999] * len(m_labels1)
+    duration = [9999] * len(m_labels1)
+    # Starting the lists
+    if m_labels1[counter] == m_labels2[counter]:
+        label[counter] = m_labels1[counter]
+    else:
+        label[counter] = -1
+    duration[counter] = 1 # first element
+    for i in range(1,len(m_labels1)):
+        # If the previous timepoint was not common microstate, then add
+        # 1 to the duration if the next timepoint is also not common
+        if label[counter] == -1:
+            if m_labels1[i] != m_labels2[i]:
+                duration[counter] += 1
+            # If it becomes a common microstate, then get new microstate
+            # label and restart duration
+            elif m_labels1[i] == m_labels2[i]:
+                counter += 1
+                label[counter] = m_labels1[i]
+                duration[counter] = 1
+        # If the previous time point was a common microstate, then add
+        # 1 to duration if the next time point is also common and the same label
+        elif label[counter] != -1:
+            if m_labels1[i] == m_labels2[i]:
+                if label[counter] == m_labels1[i]:
+                    duration[counter] += 1
+                # If still common, but a new label for both timeseries
+                # then restart with new label and duration
+                else:
+                    counter += 1
+                    label[counter] = m_labels1[i]
+                    duration[counter] = 1
+            # If the two timeseries have different microstates, then
+            # set lable to -1 and restart duration
+            elif m_labels1[i] != m_labels2[i]:
+                counter += 1
+                label[counter] = -1
+                duration[counter] = 1
+    
+    # Trim the unused parts of the list
+    label = label[:counter+1]
+    duration = duration[:counter+1]
+    assert not any(np.array(label) == 9999) # all fillers removed
+    assert not any(np.array(duration) == 9999) # all fillers removed
+    
+    return np.array(label), np.array(duration)
+
+def interbrain_T_matrix(m_labels1, m_labels2, n_maps):
+    """
+    Takes two 1D microstate labels and returns the transition matrix
+    The first row and column correspond to label -1, which indicates
+    no common microstate
+    
+    Adapted from von Wegner F and Laufs H, 2018
+    """
+    assert len(m_labels1) == len(m_labels2), "The provided microstate labels do not have equal length"
+    T = np.zeros((n_maps+1, n_maps+1))
+    n = len(m_labels1)
+    for i in range(n-1):
+        # If common microstate, then use that label
+        if m_labels1[i] == m_labels2[i]:
+            row_idx = m_labels1[i]+1
+        # Or set row as 0, the not common microstate
+        else:
+            row_idx = 0
+        # If next state is also common, set it to that state
+        if m_labels1[i+1] == m_labels2[i+1]:
+            col_idx = m_labels1[i+1]+1
+        # Or set col as 0, if it is not common microstate
+        else:
+            col_idx = 0
+        # Add 1 count to the transition matrix
+        T[row_idx,col_idx] += 1.0
+    assert n - np.sum(T) == 1 # check whether all transitions have been counted
+    # Normalize row sums to 1
+    T /= T.sum(axis=1, keepdims=True)
+    
+    return T
+
+def interbrain_microstate_feature_computation(i, n_maps, microstate_results, trialinfo_list, sfreq, event_id,
+                                              collapsed_event_id):
+    """
+    Interbrain features:
+        1. Average duration of common interbrain microstates (IBDur)
+        2. Frequency occurrence of common interbrain microstates in the pair (IBOcc)
+        3. Ratio of total time covered by interbrain common microstates in the pair (IBCov)
+        4. Transition probability towards common interbrain microstates in the pair (IBTMx)
+        5. Ratio of joint shannon entropy relative to theoretical max chaos (IBEnt)
+    
+    Parameters
+    ----------
+    i : int
+        The index.
+    n_maps : int
+        The number of maps (clusters) used.
+    microstate_results : list
+        The estimated microstates.
+    trialinfo_list : list
+        List with trial informations.
+    sfreq : int
+        The sampling frequency.
+    event_id : dict
+        The mappings between event id and condition.
+    collapsed_brain_event_id : dict
+        The renamed mappings between event id and collapsed conditions
+
+    Returns
+    -------
+    m_labels : list of two np.array
+        The microstate sequence (labels) time series in the format (epoch, time).
+    events : list of two pd.DataFrame
+        The events corresponding to each epoch.
+    MFeatures : list
+        List of arrays of each microstate feature.
+        
+    """
+    # Here i refers to the pair in range(n_subjects//2)
+    sub_idx = microstate_results[5]
+    
+    # Get the microstate labels and events for participant 1
+    subject_indices1 = sub_idx[2*i], sub_idx[2*i+1]
+    m_labels1 = microstate_results[1][subject_indices1[0]:subject_indices1[1]]
+    # Get the trialinfo with conditions
+    Subject1, trialinfo1 = trialinfo_list[2*i]
+    # Convert to dataframe
+    event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id
+    
+    events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"])
+    events1["Event_label"] = events1["Event_id"].replace(event_id_inv)
+    events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"})
+    # Reshape m_labels to (epoch, time)
+    m_labels1 = m_labels1.reshape(len(trialinfo1),sfreq)
+    
+    # Get the microstate labels and events for participant 2
+    subject_indices2 = sub_idx[2*i+1], sub_idx[2*i+2]
+    m_labels2 = microstate_results[1][subject_indices2[0]:subject_indices2[1]]
+    # Get the trialinfo with conditions
+    Subject2, trialinfo2 = trialinfo_list[2*i+1]
+    # Convert to dataframe
+    events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"])
+    events2["Event_label"] = events2["Event_id"].replace(event_id_inv)
+    events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"})
+    # Reshape m_labels to (epoch, time)
+    m_labels2 = m_labels2.reshape(len(trialinfo2),sfreq)
+    
+    # check that the participants from a pair was loaded
+    assert Subject2-1 == Subject1
+    # Check that the same amount of event types are present
+    assert trialinfo1[-1,1] == trialinfo2[-1,1]
+    # Synchronize the events from the pair based on timing info
+    # By trimming the epochs to only include the epochs that are present
+    # in both participants of the pair
+    # Initialize with an array filled with a unique number not in use
+    sync_m_labels1 = np.zeros_like(m_labels1); sync_m_labels1.fill(9999)
+    sync_m_labels2 = np.zeros_like(m_labels2); sync_m_labels2.fill(9999)
+    for t in np.unique(trialinfo1[:,1]):
+        t_idx1 = np.where(trialinfo1[:,1] == t)[0]
+        t_idx2 = np.where(trialinfo2[:,1] == t)[0]
+        # Get the timings for the epochs for the specific trial t
+        t_timings1 = trialinfo1[t_idx1,2]
+        t_timings2 = trialinfo2[t_idx2,2]
+        timings_intersect = np.intersect1d(t_timings1,t_timings2)
+        # Get the indices where the timings matches
+        t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)]
+        t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)]
+        # Get the actual values from the synchronized epochs
+        sync_m_labels1[t_idx_match1] = m_labels1[t_idx_match1]
+        sync_m_labels2[t_idx_match2] = m_labels2[t_idx_match2]
+    # Find the epochs that were asynchronous, which have to be trimmed
+    asynch_epochs1 = np.unique(np.where(sync_m_labels1==9999)[0])
+    asynch_epochs2 = np.unique(np.where(sync_m_labels2==9999)[0])
+    # Trim/delete the asynchronous epochs
+    sync_m_labels1 = np.delete(sync_m_labels1,asynch_epochs1,axis=0)
+    sync_m_labels2 = np.delete(sync_m_labels2,asynch_epochs2,axis=0)
+    assert len(sync_m_labels1) == len(sync_m_labels2) # check the amount of synchronized epochs are equal
+    # Fix events
+    sync_events1 = events1.drop(asynch_epochs1,axis=0).reset_index(drop=True)
+    sync_events2 = events2.drop(asynch_epochs2,axis=0).reset_index(drop=True)
+    
+    # Since they are synchronized already, just take the first and fix
+    # epoch idx column with the new idx for the synchronized labels
+    events = sync_events1
+    events["Epoch_idx"] = events.index
+    # Notice that for the intrabrain fit all alpha v6 I only corrected
+    # ppn2. So by taking ppn1 I get the original event labels.
+    
+    # Update in v7
+    # # Collapse event_id 6 and 7 to 4 and 5
+    # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"]
+    # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"]
+
+    # Due to the hard constraint for both participants being in the same
+    # microstate. It does not matter who will be treated as ppn1 and ppn2
+    # since the prototypical microstate topographies are exactly the same!
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    sync_m_labels1 = sync_m_labels1[np.invert(pre_trial_epochs)]
+    sync_m_labels2 = sync_m_labels2[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Pre-allocate memory
+    Dur_arr = np.zeros((len(event_id),n_maps+1)); Dur_arr.fill(np.nan)
+    Occ_arr = np.zeros((len(event_id),n_maps+1)); Occ_arr.fill(np.nan)
+    TCo_arr = np.zeros((len(event_id),n_maps+1)); TCo_arr.fill(np.nan)
+    TMx_arr = np.zeros((len(event_id),n_maps+1,n_maps+1))
+    Ent_arr = np.zeros(len(event_id))
+    for e in range(len(event_id)):
+        ev_idx = list(event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            Dur_arr0 = np.zeros((2,n_maps+1))
+            Occ_arr0 = np.zeros((2,n_maps+1))
+            TCo_arr0 = np.zeros((2,n_maps+1))
+            TMx_arr0 = np.zeros((2,n_maps+1,n_maps+1))
+            Ent_arr0 = np.zeros(2)
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                # Get the microstate labels
+                m_labels10 = sync_m_labels1[ep_idx0,:]
+                m_labels20 = sync_m_labels2[ep_idx0,:]
+                # Flatten the labels
+                m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1])
+                m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1])
+                # Compute average duration of common microstate
+                # Output: label and duration of common microstate. Label -1 is used
+                # for not common microstate
+                l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_flat,m_labels20_flat)
+                # For each microstate
+                for ii in range(n_maps+1):
+                    if np.isnan(np.nanmean(d_[l_==ii-1])):
+                        # The specific microstate did not occur at all for this event
+                        Dur_arr0[s,ii] = 0
+                        Occ_arr0[s,ii] = 0
+                        TCo_arr0[s,ii] = 0
+                    else:
+                        Dur_arr0[s,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms
+                        Occ_arr0[s,ii] = len(d_[l_==ii-1])/len(d_) * sfreq
+                        TCo_arr0[s,ii] = np.sum(d_[l_==ii-1])/np.sum(d_)
+                # Compute transition matrix
+                TMx_arr0[s] = interbrain_T_matrix(m_labels10_flat,m_labels20_flat,n_maps)
+                # Compute Joint Shannon Entropy relative to max
+                # Max is the sum of max individual entropies
+                Ent_arr0[s] = H_2(m_labels10_flat,m_labels20_flat,n_maps)/(2*np.log(float(n_maps)))
+            # Average over the splits
+            Dur_arr[e] = np.mean(Dur_arr0, axis=0)
+            Occ_arr[e] = np.mean(Occ_arr0, axis=0)
+            TCo_arr[e] = np.mean(TCo_arr0, axis=0)
+            TMx_arr[e] = np.mean(TMx_arr0, axis=0)
+            Ent_arr[e] = np.mean(Ent_arr0, axis=0)
+            
+        else:
+            # Get the microstate labels
+            m_labels10 = sync_m_labels1[ep_idx,:]
+            m_labels20 = sync_m_labels2[ep_idx,:]
+            # Flatten the labels
+            m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1])
+            m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1])
+            # Compute average duration of common microstate
+            # Output: label and duration of common microstate. Label -1 is used
+            # for not common microstate
+            l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_flat,m_labels20_flat)
+            # For each microstate
+            for ii in range(n_maps+1):
+                if np.isnan(np.nanmean(d_[l_==ii-1])):
+                    # The specific microstate did not occur at all for this event
+                    Dur_arr[e,ii] = 0
+                    Occ_arr[e,ii] = 0
+                    TCo_arr[e,ii] = 0
+                else:
+                    Dur_arr[e,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms
+                    Occ_arr[e,ii] = len(d_[l_==ii-1])/len(d_) * sfreq
+                    TCo_arr[e,ii] = np.sum(d_[l_==ii-1])/np.sum(d_)
+            # Compute transition matrix
+            TMx_arr[e] = interbrain_T_matrix(m_labels10_flat,m_labels20_flat,n_maps)
+            # Compute Joint Shannon Entropy relative to max
+            # Max is the sum of max individual entropies
+            Ent_arr[e] = H_2(m_labels10_flat,m_labels20_flat,n_maps)/(2*np.log(float(n_maps)))
+            
+    # Combine all features in a list
+    MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr]
+    
+    # Update in v7 - Collapse after computations in single events
+    Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps+1))
+    Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps+1))
+    TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps+1))
+    TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps+1,n_maps+1))
+    Ent_arr2 = np.zeros(len(collapsed_event_id))
+    MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2]
+    
+    for f in range(len(MFeatures2)):
+        tmp_feat = MFeatures[f]
+        tmp_feat2 = MFeatures2[f]
+        for e in range(len(collapsed_event_id)):
+            ee = list(collapsed_event_id.keys())[e]
+            if (ee == 'observer_actor'):
+                old_ev_idx1 = list(event_id.keys()).index("observe, actor")
+                old_ev_idx2 = list(event_id.keys()).index("observe, observer")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            elif ee == 'follower_leader':
+                old_ev_idx1 = list(event_id.keys()).index("imitate, leader")
+                old_ev_idx2 = list(event_id.keys()).index("imitate, follower")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            else:
+                new_ev_idx = list(collapsed_event_id.values())[e]
+                old_ev_idx = list(event_id.values()).index(new_ev_idx)
+                tmp_feat2[e] = tmp_feat[old_ev_idx]
+    
+    return [sync_m_labels1, sync_m_labels2], [sync_events1, sync_events2], MFeatures2
+
+
+def get_synch_events_from_pairs(i, trialinfo1, trialinfo2, event_id):
+    """ Get the synchronized events for each pair """
+    # Get environmental variables
+    Subject_id = os.environ.get('Subject_id')
+    # Check that the participants from a pair was loaded
+    assert Subject_id[2*i+1]-1 == Subject_id[2*i]
+    # Synchronize the events from the pair based on timing info
+    # By trimming the epochs to only include the epochs that are present
+    # in both participants of the pair
+    event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id
+    events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"])
+    events1["Event_label"] = events1["Event_id"].replace(event_id_inv)
+    events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"})
+    
+    events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"])
+    events2["Event_label"] = events2["Event_id"].replace(event_id_inv)
+    events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"})
+    # Concatenate all the synched events
+    sync_events1 = np.zeros((len(events1)))
+    sync_events2 = np.zeros((len(events2)))
+    for t in np.unique(trialinfo1[:,1]):
+        t_idx1 = np.where(trialinfo1[:,1] == t)[0]
+        t_idx2 = np.where(trialinfo2[:,1] == t)[0]
+        # Get the timings for the epochs for the specific trial t
+        t_timings1 = trialinfo1[t_idx1,2]
+        t_timings2 = trialinfo2[t_idx2,2]
+        timings_intersect = np.intersect1d(t_timings1,t_timings2)
+        # Get the indices where the timings matches
+        t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)]
+        t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)]
+        # Save the trials that should be kept
+        sync_events1[t_idx_match1] = 1
+        sync_events2[t_idx_match2] = 1
+    # Find the epochs that were asynchronous, which have to be trimmed
+    asynch_epochs1 = np.where(sync_events1==0)[0]
+    asynch_epochs2 = np.where(sync_events2==0)[0]
+    # Trim/delete the asynchronous epochs
+    events1 = events1.drop(asynch_epochs1,axis=0).reset_index(drop=True)
+    events2 = events2.drop(asynch_epochs2,axis=0).reset_index(drop=True)
+    assert len(events1) == len(events2) # check the amount of synchronized epochs are equal
+    return events1, events2
+
+def prepare_2P_micro_arr_collapsed_events(i, sfreq, event_id, freq_range=None, standardize:bool=True):
+    # Here i refers to the pair in range(n_subjects//2)
+    # Load epochs
+    epoch1, trialinfo1 = load_epoch_from_fieldtrip(2*i)
+    epoch2, trialinfo2 = load_epoch_from_fieldtrip(2*i+1)
+    # Ensure it is averaged referenced
+    epoch1 = epoch1.set_eeg_reference("average")
+    epoch2 = epoch2.set_eeg_reference("average")
+    # Get numpy arrays
+    micro_data1 = epoch1.get_data() # get data
+    micro_data2 = epoch2.get_data() # get data
+    # Get only the synchronized epochs
+    events = get_synch_events_from_pairs(i, trialinfo1, trialinfo2, event_id)
+    micro_data1 = micro_data1[events[0]["Epoch_idx"]]
+    micro_data2 = micro_data2[events[1]["Epoch_idx"]]
+    assert micro_data1.shape == micro_data2.shape
+    # Get event labels for each epoch
+    # Since they are already synchronized I just take the first
+    event_labels0 = events[0]["Event_label"]
+    if standardize == True:
+        # Standardize data - will make EEG amplitudes comparable between
+        # recordings/trials. Standardized along the ch axis, meaning the mean
+        # and std of the topomap at each timepoint is normalized
+        # It will be normalized separately for each participant
+        # in order to ensure both participants are treated equally by the
+        # kmeans
+        micro_data10 = micro_data1 - micro_data1.mean(axis=1, keepdims=True)
+        micro_data10 /= micro_data10.std(axis=1, keepdims=True)
+        micro_data20 = micro_data2 - micro_data2.mean(axis=1, keepdims=True)
+        micro_data20 /= micro_data20.std(axis=1, keepdims=True)
+    elif standardize == False:
+        micro_data10 = micro_data1
+        micro_data20 = micro_data2
+    # Concatenate along the channel axis, to treat the paired EEG as one entity
+    micro_data = np.concatenate([micro_data10,micro_data20],axis=1)
+
+    # Flip the micro_data between ppn1 and ppn2 to fix first microstate
+    # always being observer/follower, and second microstate being actor/leader
+    cond6_idx = event_labels0 == "observe, observer"
+    cond7_idx = event_labels0 == "imitate, follower"
+    cond67_idx = cond6_idx+cond7_idx
+    micro_data[cond67_idx] = np.concatenate([micro_data20[cond67_idx],
+                                            micro_data10[cond67_idx]],axis=1)
+    # Transform data to correct shape
+    arr_shape = micro_data.shape # get shape
+    micro_data = micro_data.swapaxes(1,2) # swap ch and time axis
+    micro_data = micro_data.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
+    
+    # Filter the data
+    if freq_range == None:
+        micro_data_filtered = micro_data # do not perform filtering
+    elif not freq_range == None:
+        # Notice it is done only after combining epochs and time, as filter length
+        # would be too long for 1s epochs. The filter function wants time on the last axis
+        micro_data = micro_data.transpose()
+        micro_data_filtered = mne.filter.filter_data(micro_data, sfreq, freq_range[0], freq_range[1])
+        # Reverse the shape
+        micro_data_filtered = micro_data_filtered.transpose()
+    
+    return micro_data_filtered, [trialinfo1, trialinfo2], events
+    
+def plot_dualmicro(n_maps, maps, gev, epoch_info, vlims=(None, None)):
+    # Split the map from the two participants
+    n_channels = epoch_info["nchan"]
+    maps1, maps2, _ = np.split(maps, [n_channels,2*n_channels], axis=1)
+    fig, axarr = plt.subplots(2, n_maps, figsize=(20,10))
+    fig.patch.set_facecolor('white')
+    for imap in range(n_maps):
+        mne.viz.plot_topomap(maps1[imap,:], pos = epoch_info, vlim = vlims, axes = axarr[0,imap]) # plot
+        axarr[0,imap].set_title("GEV: {:.2f}".format(gev[imap]), fontsize=16, fontweight="bold") # title
+        mne.viz.plot_topomap(maps2[imap,:], pos = epoch_info, vlim = vlims, axes = axarr[1,imap])
+    fig.suptitle("Microstates: {}".format(n_maps), fontsize=20, fontweight="bold")
+    plt.tight_layout()
+    return fig
+
+def sign_swap_microstates(sign_swap, maps, n_maps, n_channels):
+    # Split maps
+    maps1, maps2, _ = np.split(maps, [n_channels,2*n_channels], axis=1)
+    for m in range(n_maps):
+        # Sign swap each
+        maps1[m] *= sign_swap[0][m]
+        maps2[m] *= sign_swap[1][m]
+    # Concatenate maps
+    maps = np.concatenate([maps1,maps2],axis=1)
+    return maps
+
+def dualmicro_fit_all_feature_computation(i, n_maps, microstate_results, trialinfo_list, sfreq, event_id,
+                                              collapsed_event_id):
+    """
+    Overview of common microstate features:
+        1. Average duration a given microstate remains stable (Dur)
+        2. Frequency occurrence, independent of individual duration (Occ)
+            Average number of times a microstate becomes dominant per second
+        3. Ratio of total Time Covered (TCo)
+        4. Transition probabilities (TMx)
+        5. Ratio of shannon entropy relative to theoretical max chaos (Ent)
+    
+    Parameters
+    ----------
+    i : int
+        The index.
+    n_maps : int
+        The number of maps (clusters) used.
+    microstate_results : list
+        The estimated microstates.
+    trialinfo_list : list
+        List with trial informations.
+    sfreq : int
+        The sampling frequency.
+    event_id : dict
+        The mappings between event id and condition.
+    collapsed_brain_event_id : dict
+        The renamed mappings between event id and collapsed conditions
+
+    Returns
+    -------
+    m_labels : list of two np.array
+        The microstate sequence (labels) time series in the format (epoch, time).
+    events : list of two pd.DataFrame
+        The events corresponding to each epoch.
+    MFeatures : list
+        List of arrays of each microstate feature.
+
+    """
+    
+    pair_idx = microstate_results[5]
+    pair_indices = pair_idx[i], pair_idx[i+1]
+    m_labels = microstate_results[1][pair_indices[0]:pair_indices[1]]
+    
+    events = trialinfo_list[2][i]
+    # Since they are synchronized already, just take the first and fix
+    # epoch idx column with the new idx for the synchronized labels
+    events = events[0]
+    
+    # Update in v7
+    # # Collapse event_id 6 and 7 to 4 and 5
+    # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"]
+    # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"]
+    # The microstate clustering was performed on flipped (collapsed) events,
+    # but I will compute the features on the 8 trials to avoid the flipping
+    # effect and then collapse by averaging afterwards
+    
+    # Reshape m_labels to (epoch, time)
+    m_labels = m_labels.reshape(len(events),sfreq)
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    m_labels = m_labels[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Pre-allocate memory
+    Dur_arr = np.zeros((len(event_id),n_maps)); Dur_arr.fill(np.nan)
+    Occ_arr = np.zeros((len(event_id),n_maps)); Occ_arr.fill(np.nan)
+    TCo_arr = np.zeros((len(event_id),n_maps)); TCo_arr.fill(np.nan)
+    TMx_arr = np.zeros((len(event_id),n_maps,n_maps))
+    Ent_arr = np.zeros(len(event_id))
+    for e in range(len(event_id)):
+        ev_idx = list(event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            Dur_arr0 = np.zeros((2,n_maps))
+            Occ_arr0 = np.zeros((2,n_maps))
+            TCo_arr0 = np.zeros((2,n_maps))
+            TMx_arr0 = np.zeros((2,n_maps,n_maps))
+            Ent_arr0 = np.zeros(2)
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                m_labels0 = m_labels[ep_idx0,:]
+                m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+                # Compute duration, occurrence and time covered
+                l_, d_ = microstate_run_length_encoding(m_labels0_flat)
+                # For each microstate
+                for ii in range(n_maps):
+                    if np.isnan(np.nanmean(d_[l_==ii])):
+                        # The specific microstate did not occur at all for this event
+                        Dur_arr0[s,ii] = 0
+                        Occ_arr0[s,ii] = 0
+                        TCo_arr0[s,ii] = 0
+                    else:
+                        Dur_arr0[s,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms
+                        Occ_arr0[s,ii] = len(d_[l_==ii])/len(d_) * sfreq
+                        TCo_arr0[s,ii] = np.sum(d_[l_==ii])/np.sum(d_)
+                # Compute transition matrix
+                TMx_arr0[s] = T_empirical(m_labels0_flat, n_maps)
+                # Compute Shannon Entropy relative to max
+                Ent_arr0[s] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps))
+            # Average over the splits
+            Dur_arr[e] = np.mean(Dur_arr0, axis=0)
+            Occ_arr[e] = np.mean(Occ_arr0, axis=0)
+            TCo_arr[e] = np.mean(TCo_arr0, axis=0)
+            TMx_arr[e] = np.mean(TMx_arr0, axis=0)
+            Ent_arr[e] = np.mean(Ent_arr0, axis=0)
+            
+        else:
+            m_labels0 = m_labels[ep_idx,:]
+            m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+            # Compute duration, occurrence and time covered
+            l_, d_ = microstate_run_length_encoding(m_labels0_flat)
+            # For each microstate
+            for ii in range(n_maps):
+                if np.isnan(np.nanmean(d_[l_==ii])):
+                    # The specific microstate did not occur at all for this event
+                    Dur_arr[e,ii] = 0
+                    Occ_arr[e,ii] = 0
+                    TCo_arr[e,ii] = 0
+                else:
+                    Dur_arr[e,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms
+                    Occ_arr[e,ii] = len(d_[l_==ii])/len(d_) * sfreq
+                    TCo_arr[e,ii] = np.sum(d_[l_==ii])/np.sum(d_)
+            # Compute transition matrix
+            TMx_arr[e] = T_empirical(m_labels0_flat, n_maps)
+            # Compute Shannon Entropy relative to max
+            Ent_arr[e] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps))
+    # Combine all features in a list
+    MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr]
+    
+    # Update in v7 - Collapse after computations in single events
+    Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps))
+    Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps))
+    TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps))
+    TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps,n_maps))
+    Ent_arr2 = np.zeros(len(collapsed_event_id))
+    MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2]
+    
+    for f in range(len(MFeatures2)):
+        tmp_feat = MFeatures[f]
+        tmp_feat2 = MFeatures2[f]
+        for e in range(len(collapsed_event_id)):
+            ee = list(collapsed_event_id.keys())[e]
+            if (ee == 'observer_actor'):
+                old_ev_idx1 = list(event_id.keys()).index("observe, actor")
+                old_ev_idx2 = list(event_id.keys()).index("observe, observer")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            elif ee == 'follower_leader':
+                old_ev_idx1 = list(event_id.keys()).index("imitate, leader")
+                old_ev_idx2 = list(event_id.keys()).index("imitate, follower")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            else:
+                new_ev_idx = list(collapsed_event_id.values())[e]
+                old_ev_idx = list(event_id.values()).index(new_ev_idx)
+                tmp_feat2[e] = tmp_feat[old_ev_idx]
+    
+    return m_labels, events, MFeatures2
+
+def load_microstate_arrays(i, standardize:bool=True):
+    """ Loads the EEG data and convert to np.array """
+    epoch1, trialinfo1 = load_epoch_from_fieldtrip(i)
+    # Ensure it is averaged referenced
+    epoch1 = epoch1.set_eeg_reference("average")
+    # Get numpy arrays
+    micro_data1 = epoch1.get_data() # get data
+    if standardize == True:
+        # Standardize data - will make EEG amplitudes comparable between
+        # recordings/trials. Standardized along the ch axis, meaning the mean
+        # and std of the topomap at each timepoint is normalized
+        # It will be normalized separately for each participant
+        micro_data10 = micro_data1 - micro_data1.mean(axis=1, keepdims=True)
+        micro_data10 /= micro_data10.std(axis=1, keepdims=True)
+    elif standardize == False:
+        micro_data10 = micro_data1
+    return micro_data10, trialinfo1
+
+def get_synch_events_from_pseudo_pairs(trialinfo1, trialinfo2, event_id):
+    """
+    Takes two subject indices from a pseudo pair and align their events (based
+    on when they occur, e.g. first coupled in ppn1 with first coupled in ppn2)
+    After their microstates have been synchronized, the microstate features
+    are computed to serve as baseline as a non-interacting pair
+    """
+    # Synchronize the events from the pair based on timing info
+    # By trimming the epochs to only include the epochs that are present
+    # in both participants of the pair
+    event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id
+    events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"])
+    events1["Event_label"] = events1["Event_id"].replace(event_id_inv)
+    events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"})
+    
+    events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"])
+    events2["Event_label"] = events2["Event_id"].replace(event_id_inv)
+    events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"})
+    
+    # I should just make sure to align the timings of the trials for each event type
+    counter_idx = 0
+    # Initialize events as dataframe filled with unique number
+    sync_events1 = events1.applymap(lambda x: 9999)
+    sync_events2 = events2.applymap(lambda x: 9999)
+
+    for e in range(len(event_id)):
+        # Get event number for the specific event
+        ev_idx = list(event_id.values())[e]
+        # Get the indices corresponding to the event for each participant
+        ep_idx1 = events1["Epoch_idx"][events1["Event_id"] == ev_idx]
+        ep_idx2 = events2["Epoch_idx"][events2["Event_id"] == ev_idx]
+        # Get the first trial from ppn1 and ppn2 that correspond to the event
+        event_trial_idx1 = np.unique(trialinfo1[ep_idx1][:,1])
+        event_trial_idx2 = np.unique(trialinfo2[ep_idx2][:,1])
+        if len(event_trial_idx1) != len(event_trial_idx2):
+            print(f"Not equal number of {list(event_id.keys())[e]} trials")
+            min_trials = np.min([len(event_trial_idx1),len(event_trial_idx2)])
+            print(f"Using the first {min_trials} trials")
+            event_trial_idx1 = event_trial_idx1[:min_trials]
+            event_trial_idx2 = event_trial_idx2[:min_trials]
+
+        for t1, t2 in zip(event_trial_idx1,event_trial_idx2):
+            t_idx1 = np.where(trialinfo1[:,1] == t1)[0]
+            t_idx2 = np.where(trialinfo2[:,1] == t2)[0]
+            # Get the timings for the epochs for the specific trial t
+            t_timings1 = trialinfo1[t_idx1,2]
+            t_timings2 = trialinfo2[t_idx2,2]
+            timings_intersect = np.intersect1d(t_timings1,t_timings2)
+            # Get the indices where the timings matches
+            t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)]
+            t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)]
+            # Save the event info
+            sync_events1.iloc[counter_idx:(counter_idx+len(timings_intersect))] = events1.iloc[t_idx_match1]
+            sync_events2.iloc[counter_idx:(counter_idx+len(timings_intersect))] = events2.iloc[t_idx_match2]
+            # Move counter
+            counter_idx += len(timings_intersect)
+    # Find the epochs that were asynchronous, which have to be trimmed
+    asynch_epochs1 = np.unique(np.where(sync_events1==9999)[0])
+    asynch_epochs2 = np.unique(np.where(sync_events2==9999)[0])
+    # Fix events
+    sync_events1 = sync_events1.drop(asynch_epochs1,axis=0).reset_index(drop=True)
+    sync_events2 = sync_events2.drop(asynch_epochs2,axis=0).reset_index(drop=True)
+    assert len(sync_events1) == len(sync_events2) # check the amount of synchronized epochs are equal
+    return [sync_events1, sync_events2]
+
+def combine_two_person_microstate_arrays(micro_data1, micro_data2, events, sfreq, freq_range=None):
+    """ Combine two EEG array data into one array for microstate estimation """
+    # Get the synchronized epochs
+    micro_data1 = micro_data1[events[0]["Epoch_idx"]]
+    micro_data2 = micro_data2[events[1]["Epoch_idx"]]
+    # Get event labels for each epoch
+    # Since they are already synchronized I just take the first
+    event_labels0 = events[0]["Event_label"]
+    # Concatenate along the channel axis, to treat the paired EEG as one entity
+    micro_data = np.concatenate([micro_data1,micro_data2],axis=1)
+    
+    ### Update in v4.1
+    # Flip the micro_data between ppn1 and ppn2 to fix first microstate
+    # always being observer/follower, and second microstate being actor/leader
+    cond6_idx = event_labels0 == "observe, observer"
+    cond7_idx = event_labels0 == "imitate, follower"
+    cond67_idx = cond6_idx+cond7_idx
+    micro_data[cond67_idx] = np.concatenate([micro_data2[cond67_idx],
+                                            micro_data1[cond67_idx]],axis=1)
+    # Transform data to correct shape
+    arr_shape = micro_data.shape # get shape
+    micro_data = micro_data.swapaxes(1,2) # swap ch and time axis
+    micro_data = micro_data.reshape(arr_shape[0]*arr_shape[2],arr_shape[1]) # reshape by combining epochs and times
+    # Filter the data in alpha
+    # Notice it is done only after combining epochs and time, as filter length
+    # would be too long for 1s epochs. The filter function wants time on the last axis
+    micro_data = micro_data.transpose()
+    micro_data_alpha = mne.filter.filter_data(micro_data, sfreq, freq_range[0], freq_range[1])
+    # Reverse the shape
+    micro_data_alpha = micro_data_alpha.transpose()
+    
+    return micro_data_alpha
+
+def pseudo_pair_dualmicro_backfitting(micro_data, prototype_map, events, n_maps, sfreq):
+    """ Backfit microstate labels based on prototype map previously determined """
+    assert prototype_map.shape[0] == n_maps, "Template map size is not equal to n_maps"
+    # Load micro data
+    micro_data0 = micro_data
+    n_ch = micro_data.shape[1]
+    # Estimate global explained variance (GEV) by the averaged template maps
+    # on the global field potential peaks in our data
+    # Find GFPs
+    gfp = np.std(micro_data0, axis=1)
+    gfp_peaks = locmax(gfp)
+    gfp_values = gfp[gfp_peaks]
+    gfp2 = np.sum(gfp_values**2) # normalizing constant in GEV
+    # Calculate spatial correlation
+    # Using absolute value of the topographies to obtain polarity invariance
+    # Since we are working on two-person microstates, we also need to test
+    # the different configurations. There are 4 in total, but we only need to try 2
+    tmp_maps = np.split(prototype_map, [n_ch//2,n_ch], axis=1)
+    all_polarity_combinations = [np.concatenate([tmp_maps[0],tmp_maps[1]],axis=1),
+                                 np.concatenate([tmp_maps[0],-tmp_maps[1]],axis=1)]
+    C_arr = np.zeros((micro_data0.shape[0],n_maps,len(all_polarity_combinations)))
+    for p in range(len(all_polarity_combinations)):
+        C_arr[:,:,p] = np.dot(micro_data0, all_polarity_combinations[p].T)
+        # rescale cov
+        C_arr[:,:,p] /= (n_ch*np.outer(gfp, np.std(prototype_map, axis=1)))
+    # Get C as the highest correlation independent of polarity
+    # Take max for all polarity configurations and then argmax to find label
+    C = np.sqrt(np.max(C_arr**2,axis=2)) # notice sign is lost here, but it is only used as C^2 later so it is fine
+    L = np.argmax(C**2, axis=1)
+
+    L_gfp = L[gfp_peaks]
+    C_gfp = C[gfp_peaks]
+
+    gev = np.zeros(n_maps)
+    for k in range(n_maps):
+        r = L_gfp==k
+        gev[k] = np.sum(gfp_values[r]**2 * C_gfp[r,k]**2)/gfp2
+
+    # Reshape labels back to epoch, time
+    L_reshaped = L.reshape(len(events[0]),sfreq)
+
+    return L_reshaped, gev
+
+def dualmicro_fit_all_pseudo_pair_feature_computation(i, n_maps, backfit_results, sfreq,
+                                                      event_id, collapsed_event_id):
+    """
+    Overview of common microstate features:
+        1. Average duration a given microstate remains stable (Dur)
+        2. Frequency occurrence, independent of individual duration (Occ)
+            Average number of times a microstate becomes dominant per second
+        3. Ratio of total Time Covered (TCo)
+        4. Transition probabilities (TMx)
+        5. Ratio of shannon entropy relative to theoretical max chaos (Ent)
+    
+    Parameters
+    ----------
+    i : int
+        The index.
+    n_maps : int
+        The number of maps (clusters) used.
+    backfit_results : list
+        The estimated back-fitted microstates.
+    sfreq : int
+        The sampling frequency.
+    event_id : dict
+        The mappings between event id and condition.
+    collapsed_brain_event_id : dict
+        The renamed mappings between event id and collapsed conditions
+
+    Returns
+    -------
+    m_labels : list of two np.array
+        The microstate sequence (labels) time series in the format (epoch, time).
+    events : list of two pd.DataFrame
+        The events corresponding to each epoch.
+    MFeatures : list
+        List of arrays of each microstate feature.
+
+    """
+    m_labels = backfit_results[1][i]
+    events = backfit_results[3][i]
+    
+    # Since they are synchronized already, just take the first and fix
+    # epoch idx column with the new idx for the synchronized labels
+    events = events[0]
+    
+    # Update in v7
+    # # Collapse event_id 6 and 7 to 4 and 5
+    # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"]
+    # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"]
+    # The microstate clustering was performed on flipped (collapsed) events,
+    # but I will compute the features on the 8 trials to avoid the flipping
+    # effect and then collapse by averaging afterwards
+    
+    # Reshape m_labels to (epoch, time)
+    m_labels = m_labels.reshape(len(events),sfreq)
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    m_labels = m_labels[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Pre-allocate memory
+    Dur_arr = np.zeros((len(event_id),n_maps)); Dur_arr.fill(np.nan)
+    Occ_arr = np.zeros((len(event_id),n_maps)); Occ_arr.fill(np.nan)
+    TCo_arr = np.zeros((len(event_id),n_maps)); TCo_arr.fill(np.nan)
+    TMx_arr = np.zeros((len(event_id),n_maps,n_maps))
+    Ent_arr = np.zeros(len(event_id))
+    for e in range(len(event_id)):
+        ev_idx = list(event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            Dur_arr0 = np.zeros((2,n_maps))
+            Occ_arr0 = np.zeros((2,n_maps))
+            TCo_arr0 = np.zeros((2,n_maps))
+            TMx_arr0 = np.zeros((2,n_maps,n_maps))
+            Ent_arr0 = np.zeros(2)
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                m_labels0 = m_labels[ep_idx0,:]
+                m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+                # Compute duration, occurrence and time covered
+                l_, d_ = microstate_run_length_encoding(m_labels0_flat)
+                # For each microstate
+                for ii in range(n_maps):
+                    if np.isnan(np.nanmean(d_[l_==ii])):
+                        # The specific microstate did not occur at all for this event
+                        Dur_arr0[s,ii] = 0
+                        Occ_arr0[s,ii] = 0
+                        TCo_arr0[s,ii] = 0
+                    else:
+                        Dur_arr0[s,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms
+                        Occ_arr0[s,ii] = len(d_[l_==ii])/len(d_) * sfreq
+                        TCo_arr0[s,ii] = np.sum(d_[l_==ii])/np.sum(d_)
+                # Compute transition matrix
+                TMx_arr0[s] = T_empirical(m_labels0_flat, n_maps)
+                # Compute Shannon Entropy relative to max
+                Ent_arr0[s] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps))
+            # Average over the splits
+            Dur_arr[e] = np.mean(Dur_arr0, axis=0)
+            Occ_arr[e] = np.mean(Occ_arr0, axis=0)
+            TCo_arr[e] = np.mean(TCo_arr0, axis=0)
+            TMx_arr[e] = np.mean(TMx_arr0, axis=0)
+            Ent_arr[e] = np.mean(Ent_arr0, axis=0)
+            
+        else:
+            m_labels0 = m_labels[ep_idx,:]
+            m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+            # Compute duration, occurrence and time covered
+            l_, d_ = microstate_run_length_encoding(m_labels0_flat)
+            # For each microstate
+            for ii in range(n_maps):
+                if np.isnan(np.nanmean(d_[l_==ii])):
+                    # The specific microstate did not occur at all for this event
+                    Dur_arr[e,ii] = 0
+                    Occ_arr[e,ii] = 0
+                    TCo_arr[e,ii] = 0
+                else:
+                    Dur_arr[e,ii] = np.mean(d_[l_==ii]) * 1000/sfreq # convert to ms
+                    Occ_arr[e,ii] = len(d_[l_==ii])/len(d_) * sfreq
+                    TCo_arr[e,ii] = np.sum(d_[l_==ii])/np.sum(d_)
+            # Compute transition matrix
+            TMx_arr[e] = T_empirical(m_labels0_flat, n_maps)
+            # Compute Shannon Entropy relative to max
+            Ent_arr[e] = H_1(m_labels0_flat, n_maps)/np.log(float(n_maps))
+    # Combine all features in a list
+    MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr]
+    
+    # Update in v7 - Collapse after computations in single events
+    Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps))
+    Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps))
+    TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps))
+    TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps,n_maps))
+    Ent_arr2 = np.zeros(len(collapsed_event_id))
+    MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2]
+    
+    for f in range(len(MFeatures2)):
+        tmp_feat = MFeatures[f]
+        tmp_feat2 = MFeatures2[f]
+        for e in range(len(collapsed_event_id)):
+            ee = list(collapsed_event_id.keys())[e]
+            if (ee == 'observer_actor'):
+                old_ev_idx1 = list(event_id.keys()).index("observe, actor")
+                old_ev_idx2 = list(event_id.keys()).index("observe, observer")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            elif ee == 'follower_leader':
+                old_ev_idx1 = list(event_id.keys()).index("imitate, leader")
+                old_ev_idx2 = list(event_id.keys()).index("imitate, follower")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            else:
+                new_ev_idx = list(collapsed_event_id.values())[e]
+                old_ev_idx = list(event_id.values()).index(new_ev_idx)
+                tmp_feat2[e] = tmp_feat[old_ev_idx]
+    
+    return m_labels, events, MFeatures2
+
+def label_to_walk_direction(labels):
+    directions = np.zeros(len(labels))
+    directions[pd.Series(labels).isin([0,1,2,3])] = 1 # A/B/C/D assigned to 1
+    directions[pd.Series(labels).isin([4,5,6,7])] = -1 # E/F/G/H assigned to -1
+    assert np.sum(directions==0)==0
+    return directions
+
+def compute_dualmicro_DFA(i, microstate_results, trialinfo_list, sfreq, window_sizes, event_id, collapsed_event_id, overlap=True):
+    """
+    See Hardstone et al, 2012 for more info
+    Perform DFA
+        1 Compute cumulative sum of time series to create signal profile
+        2 Define set of window sizes (see below)
+        3 Remove the linear trend using least-squares for each window
+        4 Calculate standard deviation for each window and take the mean
+        5 Plot fluctuation function (Standard deviation) as function
+          for all window sizes, on double logarithmic scale
+        6 The DFA exponent alpha correspond to Hurst exponent
+          f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)
+      
+    Parameters
+    ----------
+    i : int
+        The index.
+    microstate_results : list
+        The estimated microstates.
+    trialinfo_list : list
+        List with trial informations.
+    sfreq : int
+        The sampling frequency.
+    window_sizes : np.array
+        Window sizes to compute DFA over.
+    event_id : dict
+        The mappings between event id and condition.
+    collapsed_brain_event_id : dict
+        The renamed mappings between event id and collapsed conditions
+    overlap : bool, optional
+        Boolean to determine whether to use overlapping windows. The default is True.
+
+    Returns
+    -------
+    dfa_array : np.array
+        The Hurst Exponents.
+    fluctuations : np.array
+        The fluctuations estimated at different window sizes.
+
+    """
+    # Load all microstate results    
+    pair_idx = microstate_results[5]
+    pair_indices = pair_idx[i], pair_idx[i+1]
+    m_labels = microstate_results[1][pair_indices[0]:pair_indices[1]]
+
+    events = trialinfo_list[2][i]
+    # Since they are synchronized already, just take the first and fix
+    # epoch idx column with the new idx for the synchronized labels
+    events = events[0]
+    
+    # Reshape m_labels to (epoch, time)
+    m_labels = m_labels.reshape(len(events),sfreq)
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    m_labels = m_labels[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Pre-allocate memory
+    dfa_array = np.zeros((len(event_id)))
+    dfa_array[:] = np.nan
+    fluctuations = np.zeros((len(event_id),len(window_sizes)))
+    fluctuations[:] = np.nan
+    # As I am working with linear regression on log-values, I can just take
+    # the avg of the x, y coordinates and the coefficients will correspond
+    # to the avg for each separate linear regression
+
+    for e in range(len(event_id)):
+        ev_idx = list(event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            dfa0 = []
+            fluct0 = np.zeros((2,len(window_sizes)))
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                m_labels0 = m_labels[ep_idx0,:]
+                m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+                # Convert the labels to the directons for computation of cumulative
+                # sum in order to get the signal profile
+                data = label_to_walk_direction(m_labels0_flat)
+                # plt.plot(np.arange(1,len(data)+1,1),np.cumsum(data)) # plot signal profile
+                # Estimate and save DFA
+                dfa, fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True)
+                dfa0.append(dfa)
+                fluct0[s] = fluctuations_2d[1]
+            # Average the two DFA estimations
+            dfa_array[e] = np.mean(dfa0)
+            fluctuations[e,:] = np.mean(fluct0, axis=0)
+            
+        else:
+            m_labels0 = m_labels[ep_idx,:]
+            m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+            # Convert the labels to the directons for computation of cumulative
+            # sum in order to get the signal profile
+            data = label_to_walk_direction(m_labels0_flat)
+            # Estimate and save DFA
+            dfa_array[e],fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True)
+            fluctuations[e,:] = fluctuations_2d[1]
+    
+    # Update in DFA v2 - Collapse after computation of DFA
+    dfa_array2 = np.zeros((len(collapsed_event_id)))
+    dfa_array2[:] = np.nan
+    fluctuations2 = np.zeros((len(collapsed_event_id),len(window_sizes)))
+    fluctuations2[:] = np.nan
+    for e in range(len(collapsed_event_id)):
+        ee = list(collapsed_event_id.keys())[e]
+        if ee == 'observer_actor':
+            old_ev_idx1 = list(event_id.keys()).index("observe, actor")
+            old_ev_idx2 = list(event_id.keys()).index("observe, observer")
+            dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]])
+            fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0)
+        elif ee == 'follower_leader':
+            old_ev_idx1 = list(event_id.keys()).index("imitate, leader")
+            old_ev_idx2 = list(event_id.keys()).index("imitate, follower")
+            dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]])
+            fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0)
+        else:
+            new_ev_idx = list(collapsed_event_id.values())[e]
+            old_ev_idx = list(event_id.values()).index(new_ev_idx)
+            dfa_array2[e] = dfa_array[old_ev_idx]
+            fluctuations2[e] = fluctuations[old_ev_idx]
+    
+    return dfa_array2, fluctuations2
+
+
+def compute_dualmicro_DFA_pseudo(i, backfit_results, sfreq, window_sizes, event_id, collapsed_event_id, overlap=True):
+    """
+    See Hardstone et al, 2012 for more info
+    Perform DFA
+        1 Compute cumulative sum of time series to create signal profile
+        2 Define set of window sizes (see below)
+        3 Remove the linear trend using least-squares for each window
+        4 Calculate standard deviation for each window and take the mean
+        5 Plot fluctuation function (Standard deviation) as function
+          for all window sizes, on double logarithmic scale
+        6 The DFA exponent alpha correspond to Hurst exponent
+          f(L) = sd = L^alpha (with alpha as linear coefficient in log plot)
+      
+    Parameters
+    ----------
+    i : int
+        The index.
+    backfit_results : list
+        The estimated back-fitted microstates.
+    sfreq : int
+        The sampling frequency.
+    window_sizes : np.array
+        Window sizes to compute DFA over.
+    event_id : dict
+        The mappings between event id and condition.
+    collapsed_brain_event_id : dict
+        The renamed mappings between event id and collapsed conditions
+    overlap : bool, optional
+        Boolean to determine whether to use overlapping windows. The default is True.
+
+    Returns
+    -------
+    dfa_array : np.array
+        The Hurst Exponents.
+    fluctuations : np.array
+        The fluctuations estimated at different window sizes.
+
+    """
+    m_labels = backfit_results[1][i]
+    events = backfit_results[3][i]
+    
+    # Since they are synchronized already, just take the first and fix
+    # epoch idx column with the new idx for the synchronized labels
+    events = events[0]
+    
+    # Update in DFA v2
+    # # Collapse event_id 6 and 7 to 4 and 5
+    # events.loc[events["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"]
+    # events.loc[events["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"]
+    # The microstate clustering was performed on flipped (collapsed) events,
+    # but I will compute the DFA on the 8 trials to avoid the flipping
+    # effect on the linear detrending, and then collapse by averaging
+    # after DFA computation
+
+    # Reshape m_labels to (epoch, time)
+    m_labels = m_labels.reshape(len(events),sfreq)
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    m_labels = m_labels[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Pre-allocate memory
+    dfa_array = np.zeros((len(event_id)))
+    dfa_array[:] = np.nan
+    fluctuations = np.zeros((len(event_id),len(window_sizes)))
+    fluctuations[:] = np.nan
+    # As I am working with linear regression on log-values, I can just take
+    # the avg of the x, y coordinates and the coefficients will correspond
+    # to the avg for each separate linear regression
+
+    for e in range(len(event_id)):
+        ev_idx = list(event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            dfa0 = []
+            fluct0 = np.zeros((2,len(window_sizes)))
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                m_labels0 = m_labels[ep_idx0,:]
+                m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+                # Convert the labels to the directons for computation of cumulative
+                # sum in order to get the signal profile
+                data = label_to_walk_direction(m_labels0_flat)
+                # plt.plot(np.arange(1,len(data)+1,1),np.cumsum(data)) # plot signal profile
+                # Estimate and save DFA
+                dfa, fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True)
+                dfa0.append(dfa)
+                fluct0[s] = fluctuations_2d[1]
+            # Average the two DFA estimations
+            dfa_array[e] = np.mean(dfa0)
+            fluctuations[e,:] = np.mean(fluct0, axis=0)
+            
+        else:
+            m_labels0 = m_labels[ep_idx,:]
+            m_labels0_flat = m_labels0.reshape(m_labels0.shape[0]*m_labels0.shape[1])
+            # Convert the labels to the directons for computation of cumulative
+            # sum in order to get the signal profile
+            data = label_to_walk_direction(m_labels0_flat)
+            # Estimate and save DFA
+            dfa_array[e],fluctuations_2d = nolds.dfa(data,nvals=window_sizes,overlap=overlap,debug_data=True)
+            fluctuations[e,:] = fluctuations_2d[1]
+    
+    # Update in DFA v2 - Collapse after computation of DFA
+    dfa_array2 = np.zeros((len(collapsed_event_id)))
+    dfa_array2[:] = np.nan
+    fluctuations2 = np.zeros((len(collapsed_event_id),len(window_sizes)))
+    fluctuations2[:] = np.nan
+    for e in range(len(collapsed_event_id)):
+        ee = list(collapsed_event_id.keys())[e]
+        if ee == 'observer_actor':
+            old_ev_idx1 = list(event_id.keys()).index("observe, actor")
+            old_ev_idx2 = list(event_id.keys()).index("observe, observer")
+            dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]])
+            fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0)
+        elif ee == 'follower_leader':
+            old_ev_idx1 = list(event_id.keys()).index("imitate, leader")
+            old_ev_idx2 = list(event_id.keys()).index("imitate, follower")
+            dfa_array2[e] = np.mean([dfa_array[old_ev_idx1],dfa_array[old_ev_idx2]])
+            fluctuations2[e] = np.mean([fluctuations[old_ev_idx1],fluctuations[old_ev_idx2]], axis=0)
+        else:
+            new_ev_idx = list(collapsed_event_id.values())[e]
+            old_ev_idx = list(event_id.values()).index(new_ev_idx)
+            dfa_array2[e] = dfa_array[old_ev_idx]
+            fluctuations2[e] = fluctuations[old_ev_idx]
+    
+    return dfa_array2, fluctuations2
+
+def similar_interbrain_microstates(x, y):
+    assert len(x) == len(y)
+    ratio_in_similar_state = sum(x == y)/len(x)
+    return ratio_in_similar_state
+
+def shift_label_time_series(x, y, shift):
+    assert len(x) == len(y)
+    x = pd.Series(x)
+    y = pd.Series(y)
+    # Shift the data
+    y = y.shift(shift)
+    # Drop the NaN
+    drop_idx = np.where(y.isna())[0]
+    y = y.drop(drop_idx)
+    x = x.drop(drop_idx)
+    assert len(y) == len(x)
+    assert sum(y.isna()) == 0
+    return x.to_numpy().astype(int), y.to_numpy().astype(int)
+
+def shifted_interbrain_microstate_feature_computation(i, n_maps, microstate_results,
+       trialinfo_list, sfreq, event_id, collapsed_event_id, lag_search_range, lag_interval):
+    """
+    Compute inter-brain features, but for time-lagged microstate label time series.
+    Similar to cross-correlation, but maximizing the amount of synchronized
+    (similar) microstates at a given lag
+    
+    Parameters
+    ----------
+    i : int
+        The index.
+    n_maps : int
+        The number of maps (clusters) used.
+    microstate_results : list
+        The estimated microstates.
+    trialinfo_list : list
+        List with trial informations.
+    sfreq : int
+        The sampling frequency.
+    event_id : dict
+        The mappings between event id and condition.
+    collapsed_brain_event_id : dict
+        The renamed mappings between event id and collapsed conditions
+    lag_search_range : float
+        The lag in both directions the data is being shifted
+    lag_interval : np.array
+        The sample points corresponding to the lags the data is being shifted
+    
+    Returns
+    -------
+    m_labels : list of two np.array
+        The microstate sequence (labels) time series in the format (epoch, time).
+    events : list of two pd.DataFrame
+        The events corresponding to each epoch.
+    MFeatures : list
+        List of arrays of each microstate feature.
+    shift_info : list
+        List containing the information regarding the shift, e.g. the cross-
+        similarity at different time lags and the optimal time lag
+    
+    """
+    
+    # Here i refers to the pair in range(n_subjects//2)
+    sub_idx = microstate_results[5]
+    
+    # Get the microstate labels and events for participant 1
+    subject_indices1 = sub_idx[2*i], sub_idx[2*i+1]
+    m_labels1 = microstate_results[1][subject_indices1[0]:subject_indices1[1]]
+    # Get the trialinfo with conditions
+    Subject1, trialinfo1 = trialinfo_list[2*i]
+    # Convert to dataframe
+    event_id_inv = {v: k for k, v in event_id.items()} # Inverse the event id
+    events1 = pd.DataFrame(trialinfo1,columns=["Event_id","Trial_number","Trial_start_time"])
+    events1["Event_label"] = events1["Event_id"].replace(event_id_inv)
+    events1 = events1.reset_index().rename(columns={"index":"Epoch_idx"})
+    # Reshape m_labels to (epoch, time)
+    m_labels1 = m_labels1.reshape(len(trialinfo1),sfreq)
+    
+    # Get the microstate labels and events for participant 2
+    subject_indices2 = sub_idx[2*i+1], sub_idx[2*i+2]
+    m_labels2 = microstate_results[1][subject_indices2[0]:subject_indices2[1]]
+    # Get the trialinfo with conditions
+    Subject2, trialinfo2 = trialinfo_list[2*i+1]
+    # Convert to dataframe
+    events2 = pd.DataFrame(trialinfo2,columns=["Event_id","Trial_number","Trial_start_time"])
+    events2["Event_label"] = events2["Event_id"].replace(event_id_inv)
+    events2 = events2.reset_index().rename(columns={"index":"Epoch_idx"})
+    # Reshape m_labels to (epoch, time)
+    m_labels2 = m_labels2.reshape(len(trialinfo2),sfreq)
+    
+    # check that the participants from a pair was loaded
+    assert Subject2-1 == Subject1
+    # Check that the same amount of event types are present
+    assert trialinfo1[-1,1] == trialinfo2[-1,1]
+    # Synchronize the events from the pair based on timing info
+    # By trimming the epochs to only include the epochs that are present
+    # in both participants of the pair
+    # Initialize with an array filled with a unique number not in use
+    sync_m_labels1 = np.zeros_like(m_labels1); sync_m_labels1.fill(9999)
+    sync_m_labels2 = np.zeros_like(m_labels2); sync_m_labels2.fill(9999)
+    for t in np.unique(trialinfo1[:,1]):
+        t_idx1 = np.where(trialinfo1[:,1] == t)[0]
+        t_idx2 = np.where(trialinfo2[:,1] == t)[0]
+        # Get the timings for the epochs for the specific trial t
+        t_timings1 = trialinfo1[t_idx1,2]
+        t_timings2 = trialinfo2[t_idx2,2]
+        timings_intersect = np.intersect1d(t_timings1,t_timings2)
+        # Get the indices where the timings matches
+        t_idx_match1 = t_idx1[pd.Series(t_timings1).isin(timings_intersect)]
+        t_idx_match2 = t_idx2[pd.Series(t_timings2).isin(timings_intersect)]
+        # Get the actual values from the synchronized epochs
+        sync_m_labels1[t_idx_match1] = m_labels1[t_idx_match1]
+        sync_m_labels2[t_idx_match2] = m_labels2[t_idx_match2]
+    # Find the epochs that were asynchronous, which have to be trimmed
+    asynch_epochs1 = np.unique(np.where(sync_m_labels1==9999)[0])
+    asynch_epochs2 = np.unique(np.where(sync_m_labels2==9999)[0])
+    # Trim/delete the asynchronous epochs
+    sync_m_labels1 = np.delete(sync_m_labels1,asynch_epochs1,axis=0)
+    sync_m_labels2 = np.delete(sync_m_labels2,asynch_epochs2,axis=0)
+    assert len(sync_m_labels1) == len(sync_m_labels2) # check the amount of synchronized epochs are equal
+    # Fix events
+    sync_events1 = events1.drop(asynch_epochs1,axis=0).reset_index(drop=True)
+    sync_events2 = events2.drop(asynch_epochs2,axis=0).reset_index(drop=True)
+    
+    # Since they are synchronized already, just take the first and fix
+    # epoch idx column with the new idx for the synchronized labels
+    events = sync_events1
+    events["Epoch_idx"] = events.index
+    # Notice that for the intrabrain fit all alpha v6 I only corrected
+    # ppn2. So by taking ppn1 I get the original event labels.
+    
+    # Remove pre-trial epochs
+    pre_trial_epochs = events["Trial_start_time"] < 0
+    sync_m_labels1 = sync_m_labels1[np.invert(pre_trial_epochs)]
+    sync_m_labels2 = sync_m_labels2[np.invert(pre_trial_epochs)]
+    events = events.loc[np.invert(pre_trial_epochs)].reset_index(drop=True)
+    events["Epoch_idx"] = events.index
+    
+    # Determine the optimal lag for highest interbrain state synchrony
+    # For each condition - using the collapsed dictionary
+    # To get one value of optimal lag for e.g. follower-leader and observer-actor
+    # Collapse event_id 6 and 7 to 4 and 5
+    events_collapsed = events.copy()
+    events_collapsed.loc[events_collapsed["Event_id"] == 6,["Event_id","Event_label"]] = [4, "observe, actor"]
+    events_collapsed.loc[events_collapsed["Event_id"] == 7,["Event_id","Event_label"]] = [5, "imitate, leader"]
+    normal_events_to_collapsed_map = {"1":1, "2":2, "3":3, "4":4, "5":5, "6":4, "7":5, "8":8}
+    
+    shift_info = [0]*len(collapsed_event_id)
+    for e in range(len(collapsed_event_id)):
+        ev_idx = list(collapsed_event_id.values())[e]
+        ep_idx = events_collapsed["Epoch_idx"][events_collapsed["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events_collapsed["Trial_number"][ep_idx])
+        
+        lts1 = sync_m_labels1[ep_idx,:]
+        lts2 = sync_m_labels2[ep_idx,:]
+        # Flatten the labels
+        lts1_flat = pd.Series(lts1.ravel())
+        lts2_flat = pd.Series(lts2.ravel())
+        # Compute cross similarity
+        cross_similarity = [lts1_flat.corr(lts2_flat.shift(lag), method=similar_interbrain_microstates) for lag in lag_interval]
+        # Original ratio of time in similar states (time = 0)
+        t_zero_ratio_in_similar_microstate = cross_similarity[lag_search_range]
+        # The optimal lag
+        opt_lag = lag_interval[np.argmax(cross_similarity)]
+        opt_ratio_in_similar_microstate = cross_similarity[np.argmax(cross_similarity)]
+        # Shift the label time series with the optimal lag prior to computation of interbrain features
+        # Save the features
+        shift_info[e] = [t_zero_ratio_in_similar_microstate, opt_lag, opt_ratio_in_similar_microstate, cross_similarity]
+    
+    # Pre-allocate memory
+    Dur_arr = np.zeros((len(event_id),n_maps+1)); Dur_arr.fill(np.nan)
+    Occ_arr = np.zeros((len(event_id),n_maps+1)); Occ_arr.fill(np.nan)
+    TCo_arr = np.zeros((len(event_id),n_maps+1)); TCo_arr.fill(np.nan)
+    TMx_arr = np.zeros((len(event_id),n_maps+1,n_maps+1))
+    Ent_arr = np.zeros(len(event_id))
+    for e in range(len(event_id)):
+        ev_idx = list(event_id.values())[e]
+        ep_idx = events["Epoch_idx"][events["Event_id"] == ev_idx]
+        trial_numbers0 = np.unique(events["Trial_number"][ep_idx])
+        # Get the opt_lag from collapsed events
+        ev_collapsed_idx = normal_events_to_collapsed_map[str(ev_idx)]
+        shift_info_idx = list(collapsed_event_id.values()).index(ev_collapsed_idx)
+        opt_lag = shift_info[shift_info_idx][1]
+        
+        if len(trial_numbers0) > 8:
+            # Compute the first 8 trials and the last 8 separately, before
+            # taking the average to be consistent with asymmetric trials
+            Dur_arr0 = np.zeros((2,n_maps+1))
+            Occ_arr0 = np.zeros((2,n_maps+1))
+            TCo_arr0 = np.zeros((2,n_maps+1))
+            TMx_arr0 = np.zeros((2,n_maps+1,n_maps+1))
+            Ent_arr0 = np.zeros(2)
+            trial_numbers_split = np.array_split(trial_numbers0, 2)
+            # Array_split is used in cases where a trial might have been
+            # dropped, hence the split is only 8/7
+            for s in range(len(trial_numbers_split)):
+                ep_idx0 = events.loc[(events["Event_id"] == ev_idx)&
+                                    (events["Trial_number"].isin(trial_numbers_split[s])),
+                                    "Epoch_idx"]
+                # Get the microstate labels
+                m_labels10 = sync_m_labels1[ep_idx0,:]
+                m_labels20 = sync_m_labels2[ep_idx0,:]
+                # Flatten the labels
+                m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1])
+                m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1])
+                # Shift the label time-series to maximize interbrain synchrony
+                m_labels10_shifted, m_labels20_shifted = shift_label_time_series(m_labels10_flat, m_labels20_flat, opt_lag)
+                
+                # Compute average duration of common microstate
+                # Output: label and duration of common microstate. Label -1 is used
+                # for not common microstate
+                l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_shifted,m_labels20_shifted)
+                # For each microstate
+                for ii in range(n_maps+1):
+                    if np.isnan(np.nanmean(d_[l_==ii-1])):
+                        # The specific microstate did not occur at all for this event
+                        Dur_arr0[s,ii] = 0
+                        Occ_arr0[s,ii] = 0
+                        TCo_arr0[s,ii] = 0
+                    else:
+                        Dur_arr0[s,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms
+                        Occ_arr0[s,ii] = len(d_[l_==ii-1])/len(d_) * sfreq
+                        TCo_arr0[s,ii] = np.sum(d_[l_==ii-1])/np.sum(d_)
+                # Compute transition matrix
+                TMx_arr0[s] = interbrain_T_matrix(m_labels10_shifted,m_labels20_shifted,n_maps)
+                # Compute Joint Shannon Entropy relative to max
+                # Max is the sum of max individual entropies
+                Ent_arr0[s] = H_2(m_labels10_shifted,m_labels20_shifted,n_maps)/(2*np.log(float(n_maps)))
+            # Average over the splits
+            Dur_arr[e] = np.mean(Dur_arr0, axis=0)
+            Occ_arr[e] = np.mean(Occ_arr0, axis=0)
+            TCo_arr[e] = np.mean(TCo_arr0, axis=0)
+            TMx_arr[e] = np.mean(TMx_arr0, axis=0)
+            Ent_arr[e] = np.mean(Ent_arr0, axis=0)
+            
+        else:
+            # Get the microstate labels
+            m_labels10 = sync_m_labels1[ep_idx,:]
+            m_labels20 = sync_m_labels2[ep_idx,:]
+            # Flatten the labels
+            m_labels10_flat = m_labels10.reshape(m_labels10.shape[0]*m_labels10.shape[1])
+            m_labels20_flat = m_labels20.reshape(m_labels20.shape[0]*m_labels20.shape[1])
+            # Shift the label time-series to maximize interbrain synchrony
+            m_labels10_shifted, m_labels20_shifted = shift_label_time_series(m_labels10_flat, m_labels20_flat, opt_lag)
+            
+            # Compute average duration of common microstate
+            # Output: label and duration of common microstate. Label -1 is used
+            # for not common microstate
+            l_, d_ = interbrain_microstate_run_length_encoding(m_labels10_shifted,m_labels20_shifted)
+            # For each microstate
+            for ii in range(n_maps+1):
+                if np.isnan(np.nanmean(d_[l_==ii-1])):
+                    # The specific microstate did not occur at all for this event
+                    Dur_arr[e,ii] = 0
+                    Occ_arr[e,ii] = 0
+                    TCo_arr[e,ii] = 0
+                else:
+                    Dur_arr[e,ii] = np.mean(d_[l_==ii-1]) * 1000/sfreq # convert to ms
+                    Occ_arr[e,ii] = len(d_[l_==ii-1])/len(d_) * sfreq
+                    TCo_arr[e,ii] = np.sum(d_[l_==ii-1])/np.sum(d_)
+            # Compute transition matrix
+            TMx_arr[e] = interbrain_T_matrix(m_labels10_shifted,m_labels20_shifted,n_maps)
+            # Compute Joint Shannon Entropy relative to max
+            # Max is the sum of max individual entropies
+            Ent_arr[e] = H_2(m_labels10_shifted,m_labels20_shifted,n_maps)/(2*np.log(float(n_maps)))
+            
+    # Combine all features in a list
+    MFeatures = [Dur_arr,Occ_arr,TCo_arr,TMx_arr,Ent_arr]
+    
+    # Update in v7 - Collapse after computations in single events
+    Dur_arr2 = np.zeros((len(collapsed_event_id),n_maps+1))
+    Occ_arr2 = np.zeros((len(collapsed_event_id),n_maps+1))
+    TCo_arr2 = np.zeros((len(collapsed_event_id),n_maps+1))
+    TMx_arr2 = np.zeros((len(collapsed_event_id),n_maps+1,n_maps+1))
+    Ent_arr2 = np.zeros(len(collapsed_event_id))
+    MFeatures2 = [Dur_arr2,Occ_arr2,TCo_arr2,TMx_arr2,Ent_arr2]
+    
+    for f in range(len(MFeatures2)):
+        tmp_feat = MFeatures[f]
+        tmp_feat2 = MFeatures2[f]
+        for e in range(len(collapsed_event_id)):
+            ee = list(collapsed_event_id.keys())[e]
+            if (ee == 'observer_actor'):
+                old_ev_idx1 = list(event_id.keys()).index("observe, actor")
+                old_ev_idx2 = list(event_id.keys()).index("observe, observer")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            elif ee == 'follower_leader':
+                old_ev_idx1 = list(event_id.keys()).index("imitate, leader")
+                old_ev_idx2 = list(event_id.keys()).index("imitate, follower")
+                tmp_feat2[e] = np.mean([tmp_feat[old_ev_idx1],tmp_feat[old_ev_idx2]], axis=0)
+            else:
+                new_ev_idx = list(collapsed_event_id.values())[e]
+                old_ev_idx = list(event_id.values()).index(new_ev_idx)
+                tmp_feat2[e] = tmp_feat[old_ev_idx]
+    
+    return [sync_m_labels1, sync_m_labels2], [sync_events1, sync_events2], MFeatures2, shift_info
\ No newline at end of file