Skip to content
Open
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
109 changes: 109 additions & 0 deletions pixtools/clusters/noise_analysis_SD_kmeans_Clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please could you move this module into a new folder pixtools/noise/ rather than pixtools/clusters/? 'clusters' here refers to clusters of spikes as determined by kilosort, not clustering analyses.

# If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise

# First import required packages
import sys
import json

from turtle import fd
from channeldepth import *
from channeldepth import *
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please could you remove this turtle import? It isn't used in this module, and in any case isn't a dependency of pixels so it would be good to keep those limited to what is already needed.

Also what is channeldepth here? I dont see the module

from sklearn.cluster import KMeans

from pixels import Experiment
from pixels.behaviours.leverpush import LeverPush
from pixels.behaviours.pushpull import PushPull
from pixels.behaviours.reach import Reach
from pixels.behaviours.no_behaviour import NoBehaviour

import numpy as np
import pandas as pd
import seaborn as sns
import datetime
import matplotlib.pyplot as plt

from pixtools import clusters
from pixtools import utils

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you remove imports that are not needed in this file? None of those behaviour classes are needed

def noise_per_channeldepth(myexp):
"""
Function extracts the noise for each channel, combining this into a dataframe

myexp: the experiment defined in base.py, will extract the depth information from here.
"""
noise = pd.DataFrame(columns=["session", "project", "SDs", "x", "y"]) # Create the empty array to hold the noise information
depths = meta_spikeglx(myexp, 0)
depths = depths.to_dataframe()
coords = depths[["x", "y"]] # Create a dataframe containing the generic x and y coords.
tot_noise = []

#Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
for s, session in enumerate(myexp):
for i in range(len(session.files)):
path = session.processed / f"noise_{i}.json"
with path.open() as fd:
ses_noise = json.load(fd)

chan_noises = []
for j, SD in enumerate(ses_noise["SDs"][0:-1]): #This will iterate over first 384 channels, and exclude the sync channel
x = coords["x"].iloc[j]
y = coords["y"].iloc[j]
noise_row = pd.DataFrame.from_records(
{"session":[session.name], "SDs":[SD], "x": x, "y": y}
)
chan_noises.append(noise_row)

#Take all datafrom channel noises for a session, then concatenate
noise = pd.concat(chan_noises)
tot_noise.append(noise) #Take all channel noises and add to a master file
df2 = pd.concat(tot_noise) #Convert this master file, containing every sessions noise data into a dataframe

return df2


#Now determine the optimal number of clusters to use in the K-means analysis by producing elbow plots
def elbowplot(data, myexp):

"""

This function takes data formatted according to the function above, containing the noise values for all channels
Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
This point is defined as the boundary where additional clusters no longer explain much more variance in the data.

data: The dataframe, as formatted by noise_per_channel()

myexp: The experiment, defined in base.py containing the session information.

"""

for s, session in enumerate(myexp):
name = session.name
ses_data = data.loc[data["session"] == name]
df3 = ses_data["SDs"].values.reshape(
-1, 1
) # Just gives all noise values, for each session
Sum_of_squares = [] # create an empty list to store these in.

k = range(1, 10)
for num_clusters in k:
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(df3)
Sum_of_squares.append(kmeans.inertia_)

fig, ax = plt.subplots()

# This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
# This gives the distance from the centroids, as a measure of the variability explained
# We want this to drop off indicating that there is no remaining data explained by further centroid inclusion

# Figure has two rows, one columns, this is the first plot
plt.plot(k, Sum_of_squares, "bx-") # bx gives blue x as each point.
plt.xlabel("Putative Number of Clusters")
plt.ylabel("Sum of Squares Distances/Inertia")
plt.title(
f"Determining Optimal Number of Clusters for Analysis - Session {name}"
)

plt.show()


109 changes: 109 additions & 0 deletions pixtools/clusters/noise_analysis_sd_kmeans_clustering.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
# Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis
# If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise

# First import required packages
import sys
import json

from turtle import fd
from channeldepth import *
from channeldepth import *
from sklearn.cluster import KMeans

from pixels import Experiment
from pixels.behaviours.leverpush import LeverPush
from pixels.behaviours.pushpull import PushPull
from pixels.behaviours.reach import Reach
from pixels.behaviours.no_behaviour import NoBehaviour

import numpy as np
import pandas as pd
import seaborn as sns
import datetime
import matplotlib.pyplot as plt

from pixtools import clusters
from pixtools import utils

def noise_per_channeldepth(myexp):
"""
Function extracts the noise for each channel, combining this into a dataframe

myexp: the experiment defined in base.py, will extract the depth information from here.
"""
noise = pd.DataFrame(columns=["session", "project", "SDs", "x", "y"]) # Create the empty array to hold the noise information
depths = meta_spikeglx(myexp, 0)
depths = depths.to_dataframe()
coords = depths[["x", "y"]] # Create a dataframe containing the generic x and y coords.
tot_noise = []

#Iterate through each session, taking the noise for each file and loading them into one continuous data frame.
for s, session in enumerate(myexp):
for i in range(len(session.files)):
path = session.processed / f"noise_{i}.json"
with path.open() as fd:
ses_noise = json.load(fd)

chan_noises = []
for j, SD in enumerate(ses_noise["SDs"][0:-1]): #This will iterate over first 384 channels, and exclude the sync channel
x = coords["x"].iloc[j]
y = coords["y"].iloc[j]
noise_row = pd.DataFrame.from_records(
{"session":[session.name], "SDs":[SD], "x": x, "y": y}
)
chan_noises.append(noise_row)

#Take all datafrom channel noises for a session, then concatenate
noise = pd.concat(chan_noises)
tot_noise.append(noise) #Take all channel noises and add to a master file
df2 = pd.concat(tot_noise) #Convert this master file, containing every sessions noise data into a dataframe

return df2


#Now determine the optimal number of clusters to use in the K-means analysis by producing elbow plots
def elbowplot(data, myexp):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You have this function twice?


"""

This function takes data formatted according to the function above, containing the noise values for all channels
Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends.
This point is defined as the boundary where additional clusters no longer explain much more variance in the data.

data: The dataframe, as formatted by noise_per_channel()

myexp: The experiment, defined in base.py containing the session information.

"""

for s, session in enumerate(myexp):
name = session.name
ses_data = data.loc[data["session"] == name]
df3 = ses_data["SDs"].values.reshape(
-1, 1
) # Just gives all noise values, for each session
Sum_of_squares = [] # create an empty list to store these in.

k = range(1, 10)
for num_clusters in k:
kmeans = KMeans(n_clusters=num_clusters)
kmeans.fit(df3)
Sum_of_squares.append(kmeans.inertia_)

fig, ax = plt.subplots()

# This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters
# This gives the distance from the centroids, as a measure of the variability explained
# We want this to drop off indicating that there is no remaining data explained by further centroid inclusion

# Figure has two rows, one columns, this is the first plot
plt.plot(k, Sum_of_squares, "bx-") # bx gives blue x as each point.
plt.xlabel("Putative Number of Clusters")
plt.ylabel("Sum of Squares Distances/Inertia")
plt.title(
f"Determining Optimal Number of Clusters for Analysis - Session {name}"
)

plt.show()


6 changes: 3 additions & 3 deletions pixtools/clusters/unit_depths.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,17 @@ def unit_depths(exp):

for s, session in enumerate(exp):
session_depths = {}

rec_num=0
for rec_num, probe_depth in enumerate(session.get_probe_depth()):
rec_depths = {}
rec_info = info[s][rec_num]
rec_info = info[s]
id_key = 'id' if 'id' in rec_info else 'cluster_id' # Depends on KS version

for unit in rec_info[id_key]:
unit_info = rec_info.loc[rec_info[id_key] == unit].iloc[0].to_dict()
rec_depths[unit] = probe_depth - unit_info["depth"]

session_depths[rec_num] = pd.DataFrame(rec_depths, index=["depth"])
session_depths[0] = pd.DataFrame(rec_depths, index=["depth"])

depths.append(pd.concat(session_depths, axis=1, names=["rec_num", "unit"]))

Expand Down
81 changes: 81 additions & 0 deletions pixtools/responsiveness/CI_Analysis_pointplot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
def significance_extraction(CI):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems that the functions in this module are duplicated elsewhere in one of your own scripts. What's the difference between them?

"""
This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format.

CI: The dataframe created by the CI calculation previously mentioned

"""

sig = []
keys=[]
rec_num = 0

#This loop iterates through each column, storing the data as un, and the location as s
for s, unit in CI.items():
#Now iterate through each recording, and unit
#Take any significant values and append them to lists.
if unit.loc[2.5] > 0 or unit.loc[97.5] < 0:
sig.append(unit) #Append the percentile information for this column to a list
keys.append(s) #append the information containing the point at which the iteration currently stands


#Now convert this list to a dataframe, using the information stored in the keys list to index it
sigs = pd.concat(
sig, axis = 1, copy = False,
keys=keys,
names=["session", "unit", "rec_num"]
)

return sigs

def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False):
"""

This function takes the CI data and significant values and plots them relative to zero.
May specify if percentiles should be plotted in ascending or descending order.

CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points.

sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero

exp: The experimental session to analyse, defined in base.py

sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False)

dir_ascending: Whether to plot the values in ascending order (True/False)

"""
#First sort the data into long form for the full dataset, by percentile
CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending)
CIs_long = CIs_long.reset_index()
CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting

#Now select if we want only significant values plotted, else raise an error.
if sig_only is True:
CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending)
CIs_long_sig = CIs_long_sig.reset_index()
CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0]))

data = CIs_long_sig

elif sig_only is False:
data = CIs_long

else:
raise TypeError("Sig_only argument must be a boolean operator (True/False)")

#Plot this data for the experimental sessions as a pointplot.
for s, session in enumerate(exp):
name = session.name

p = sns.pointplot(
x="unit", y = "value", data = data.loc[(data.session == s)],
order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting

p.set_xlabel("Unit")
p.set_ylabel("Confidence Interval")
p.set(xticklabels=[])
p.axhline(0)
plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page.

plt.show()
Loading