DuguidLab · Aidan-MT · Mar 7, 2022 · Mar 8, 2022 · Mar 11, 2022 · Mar 12, 2022
diff --git a/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py b/pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py
@@ -0,0 +1,109 @@
+# Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis
+# If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise
+
+# First import required packages
+import sys
+import json
+
+from turtle import fd
+from channeldepth import *
+from channeldepth import *
+from sklearn.cluster import KMeans
+
+from pixels import Experiment
+from pixels.behaviours.leverpush import LeverPush
+from pixels.behaviours.pushpull import PushPull
+from pixels.behaviours.reach import Reach
+from pixels.behaviours.no_behaviour import NoBehaviour
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import datetime
+import matplotlib.pyplot as plt
+
+from pixtools import clusters
+from pixtools import utils
+
+def noise_per_channeldepth(myexp):
+    """
+    Function  extracts the noise for each channel, combining this into a dataframe
+
+    myexp: the experiment defined in base.py, will extract the depth information from here.
+    """
+    noise = pd.DataFrame(columns=["session", "project", "SDs", "x", "y"])  # Create the empty array to hold the noise information
+    depths = meta_spikeglx(myexp, 0) 
+    depths = depths.to_dataframe() 
+    coords = depths[["x", "y"]] # Create a dataframe containing the generic x and y coords. 
+    tot_noise = []
+
+    #Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
+    for s, session in enumerate(myexp):
+        for i in range(len(session.files)):
+            path = session.processed / f"noise_{i}.json"
+            with path.open() as fd:
+                ses_noise = json.load(fd)
+
+            chan_noises = []
+            for j, SD in enumerate(ses_noise["SDs"][0:-1]): #This will iterate over first 384 channels, and exclude the sync channel
+                x = coords["x"].iloc[j]
+                y = coords["y"].iloc[j]
+                noise_row = pd.DataFrame.from_records(
+                    {"session":[session.name], "SDs":[SD], "x": x, "y": y}
+                )
+                chan_noises.append(noise_row)
+
+        #Take all datafrom channel noises for a session, then concatenate
+        noise = pd.concat(chan_noises)
+        tot_noise.append(noise) #Take all channel noises and add to a master file
+        df2 = pd.concat(tot_noise) #Convert this master file, containing every sessions noise data into a dataframe
+
+    return df2
+
+
+#Now determine the optimal number of clusters to use in the K-means analysis by producing elbow plots
+def elbowplot(data, myexp):
+
+    """
+
+    This function takes data formatted according to the function above, containing the noise values for all channels
+    Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
+    This point is defined as the boundary where additional clusters no longer explain much more variance in the data.
+
+    data: The dataframe, as formatted by noise_per_channel()
+
+    myexp: The experiment, defined in base.py containing the session information.
+
+    """
+
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = data.loc[data["session"] == name]
+        df3 = ses_data["SDs"].values.reshape(
+            -1, 1
+        )  # Just gives all noise values, for each session
+        Sum_of_squares = []  # create an empty list to store these in.
+
+        k = range(1, 10)
+        for num_clusters in k:
+            kmeans = KMeans(n_clusters=num_clusters)
+            kmeans.fit(df3)
+            Sum_of_squares.append(kmeans.inertia_)
+
+        fig, ax = plt.subplots()
+
+        # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
+        # This gives the distance from the centroids, as a measure of the variability explained
+        # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion
+
+        # Figure has two rows, one columns, this is the first plot
+        plt.plot(k, Sum_of_squares, "bx-")  # bx gives blue x as each point.
+        plt.xlabel("Putative Number of Clusters")
+        plt.ylabel("Sum of Squares Distances/Inertia")
+        plt.title(
+            f"Determining Optimal Number of Clusters for Analysis - Session {name}"
+        )
+
+        plt.show()
+
+
diff --git a/pixtools/clusters/noise_analysis_sd_kmeans_clustering.py b/pixtools/clusters/noise_analysis_sd_kmeans_clustering.py
@@ -0,0 +1,109 @@
+# Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis
+# If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise
+
+# First import required packages
+import sys
+import json
+
+from turtle import fd
+from channeldepth import *
+from channeldepth import *
+from sklearn.cluster import KMeans
+
+from pixels import Experiment
+from pixels.behaviours.leverpush import LeverPush
+from pixels.behaviours.pushpull import PushPull
+from pixels.behaviours.reach import Reach
+from pixels.behaviours.no_behaviour import NoBehaviour
+
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import datetime
+import matplotlib.pyplot as plt
+
+from pixtools import clusters
+from pixtools import utils
+
+def noise_per_channeldepth(myexp):
+    """
+    Function  extracts the noise for each channel, combining this into a dataframe
+
+    myexp: the experiment defined in base.py, will extract the depth information from here.
+    """
+    noise = pd.DataFrame(columns=["session", "project", "SDs", "x", "y"])  # Create the empty array to hold the noise information
+    depths = meta_spikeglx(myexp, 0) 
+    depths = depths.to_dataframe() 
+    coords = depths[["x", "y"]] # Create a dataframe containing the generic x and y coords. 
+    tot_noise = []
+
+    #Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
+    for s, session in enumerate(myexp):
+        for i in range(len(session.files)):
+            path = session.processed / f"noise_{i}.json"
+            with path.open() as fd:
+                ses_noise = json.load(fd)
+
+            chan_noises = []
+            for j, SD in enumerate(ses_noise["SDs"][0:-1]): #This will iterate over first 384 channels, and exclude the sync channel
+                x = coords["x"].iloc[j]
+                y = coords["y"].iloc[j]
+                noise_row = pd.DataFrame.from_records(
+                    {"session":[session.name], "SDs":[SD], "x": x, "y": y}
+                )
+                chan_noises.append(noise_row)
+
+        #Take all datafrom channel noises for a session, then concatenate
+        noise = pd.concat(chan_noises)
+        tot_noise.append(noise) #Take all channel noises and add to a master file
+        df2 = pd.concat(tot_noise) #Convert this master file, containing every sessions noise data into a dataframe
+
+    return df2
+
+
+#Now determine the optimal number of clusters to use in the K-means analysis by producing elbow plots
+def elbowplot(data, myexp):
+
+    """
+
+    This function takes data formatted according to the function above, containing the noise values for all channels
+    Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
+    This point is defined as the boundary where additional clusters no longer explain much more variance in the data.
+
+    data: The dataframe, as formatted by noise_per_channel()
+
+    myexp: The experiment, defined in base.py containing the session information.
+
+    """
+
+    for s, session in enumerate(myexp):
+        name = session.name
+        ses_data = data.loc[data["session"] == name]
+        df3 = ses_data["SDs"].values.reshape(
+            -1, 1
+        )  # Just gives all noise values, for each session
+        Sum_of_squares = []  # create an empty list to store these in.
+
+        k = range(1, 10)
+        for num_clusters in k:
+            kmeans = KMeans(n_clusters=num_clusters)
+            kmeans.fit(df3)
+            Sum_of_squares.append(kmeans.inertia_)
+
+        fig, ax = plt.subplots()
+
+        # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
+        # This gives the distance from the centroids, as a measure of the variability explained
+        # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion
+
+        # Figure has two rows, one columns, this is the first plot
+        plt.plot(k, Sum_of_squares, "bx-")  # bx gives blue x as each point.
+        plt.xlabel("Putative Number of Clusters")
+        plt.ylabel("Sum of Squares Distances/Inertia")
+        plt.title(
+            f"Determining Optimal Number of Clusters for Analysis - Session {name}"
+        )
+
+        plt.show()
+
+
diff --git a/pixtools/clusters/unit_depths.py b/pixtools/clusters/unit_depths.py
@@ -19,17 +19,17 @@ def unit_depths(exp):
 
     for s, session in enumerate(exp):
         session_depths = {}
-
+        rec_num=0
         for rec_num, probe_depth in enumerate(session.get_probe_depth()):
             rec_depths = {}
-            rec_info = info[s][rec_num]
+            rec_info = info[s]
             id_key = 'id' if 'id' in rec_info else 'cluster_id'  # Depends on KS version
 
             for unit in rec_info[id_key]:
                 unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict()
                 rec_depths[unit] = probe_depth - unit_info["depth"]
 
-            session_depths[rec_num] = pd.DataFrame(rec_depths, index=["depth"])
+            session_depths[0] = pd.DataFrame(rec_depths, index=["depth"])
 
         depths.append(pd.concat(session_depths, axis=1, names=["rec_num", "unit"]))
 

diff --git a/pixtools/responsiveness/CI_Analysis_pointplot.py b/pixtools/responsiveness/CI_Analysis_pointplot.py
@@ -0,0 +1,81 @@
+def significance_extraction(CI):
+    """
+    This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. 
+
+    CI: The dataframe created by the CI calculation previously mentioned
+
+    """
+
+    sig = []
+    keys=[]
+    rec_num = 0
+
+    #This loop iterates through each column, storing the data as un, and the location as s
+    for s, unit in CI.items():
+        #Now iterate through each recording, and unit
+        #Take any significant values and append them to lists.
+        if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
+            sig.append(unit) #Append the percentile information for this column to a list
+            keys.append(s) #append the information containing the point at which the iteration currently stands
+
+
+    #Now convert this list to a dataframe, using the information stored in the keys list to index it
+    sigs = pd.concat(
+        sig, axis = 1, copy = False,
+        keys=keys,
+        names=["session", "unit", "rec_num"]
+    )
+
+    return sigs
+
+def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
+    """
+
+    This function takes the CI data and significant values and plots them relative to zero. 
+    May specify if percentiles should be plotted in ascending or descending order. 
+
+    CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.
+
+    sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero
+
+    exp: The experimental session to analyse, defined in base.py
+
+    sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)
+
+    dir_ascending: Whether to plot the values in ascending order (True/False)
+
+    """
+    #First sort the data into long form for the full dataset, by percentile
+    CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
+    CIs_long = CIs_long.reset_index()
+    CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting
+
+    #Now select if we want only significant values plotted, else raise an error. 
+    if sig_only is True:
+        CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
+        CIs_long_sig = CIs_long_sig.reset_index()
+        CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))
+
+        data = CIs_long_sig
+
+    elif sig_only is False:
+        data = CIs_long
+
+    else:
+        raise TypeError("Sig_only argument must be a boolean operator (True/False)")
+
+    #Plot this data for the experimental sessions as a pointplot. 
+    for s, session in enumerate(exp):
+        name = session.name
+
+        p = sns.pointplot(
+        x="unit", y = "value", data = data.loc[(data.session == s)],
+        order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting
+
+        p.set_xlabel("Unit")
+        p.set_ylabel("Confidence Interval")
+        p.set(xticklabels=[])
+        p.axhline(0)
+        plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.
+
+        plt.show()