-
Notifications
You must be signed in to change notification settings - Fork 2
Updated pixels analysis #1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 6 commits
e0f43d7
1bdfe1e
c714062
5d752d2
2b890cd
0002ff3
3bb076c
f9f7256
ab5255f
c7f5f7b
be089ce
f543b03
8aaa5ec
9e45ffb
7883abd
8ede878
7c37a03
266c567
49d6914
cbb794e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,178 @@ | ||
| # Unlike other noiseplot file (by channel) this file will cluster the SDs, allowing for a mean square analysis | ||
| # If there are distinct clusters able to be seperated by depth, it will indicate that there is a clear relationship to noise | ||
| # TODO: Test Functionality after noise analysis is complete, push to pixtools. | ||
|
|
||
| # First import required packages | ||
| import sys | ||
| import json | ||
|
|
||
| sys.path.insert( | ||
| 0, "/home/s1735718/PixelsAnalysis/pixels" | ||
| ) # Line will allow me to debug base.py code locally rather than github copy | ||
| sys.path.append("/home/s1735718/PixelsAnalysis/pixels-analysis") | ||
|
|
||
| from turtle import fd | ||
| from base import * | ||
| from channeldepth import * | ||
| from channeldepth import * | ||
| from sklearn.cluster import KMeans | ||
|
|
||
| from pixels import Experiment | ||
| from pixels.behaviours.leverpush import LeverPush | ||
| from pixels.behaviours.pushpull import PushPull | ||
| from pixels.behaviours.reach import Reach | ||
| from pixels.behaviours.no_behaviour import NoBehaviour | ||
|
|
||
| import numpy as np | ||
| import pandas as pd | ||
| import seaborn as sns | ||
| import datetime | ||
| import matplotlib.pyplot as plt | ||
|
|
||
| from pixtools import clusters | ||
| from pixtools import utils | ||
|
|
||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you remove imports that are not needed in this file? None of those behaviour classes are needed |
||
| # Now we shall read in the experimental data, defined below | ||
| # Remember, this selection is defined as exps, NOT myexp which is defined in the normal base file. Check this before continuing! | ||
|
|
||
| # First define the experiment to analyse and sessions | ||
| noise_tests = Experiment( | ||
| ["noisetest1"], | ||
| NoBehaviour, | ||
| "~/duguidlab/visuomotor_control/neuropixels", | ||
| ) | ||
|
|
||
| noise_test_unchanged = Experiment( | ||
| ["noisetest_unchanged"], | ||
| NoBehaviour, | ||
| "~/duguidlab/visuomotor_control/neuropixels", | ||
| ) | ||
|
|
||
| noise_test_nopi = Experiment( | ||
| ["noisetest_nopi"], | ||
| NoBehaviour, | ||
| "~/duguidlab/visuomotor_control/neuropixels", | ||
| ) | ||
|
|
||
| noise_test_no_caps = Experiment( | ||
| "VR50", | ||
| Reach, | ||
| "~/duguidlab/visuomotor_control/neuropixels", | ||
| ) | ||
|
|
||
| # This contains the list of experiments we want to plot noise for, here only interested in the reaching task | ||
| # Remember to change this in base.py | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Likewise the |
||
| exps = {"reaching": myexp} | ||
|
|
||
|
|
||
| def noise_per_channeldepth(exp_list, myexp): | ||
| """ | ||
| Function takes the experiments defined above (as exps dictionary) and extracts the noise for each channel, combining this into a dataframe | ||
|
|
||
| exp_list: the experiment dictionary defined above. | ||
|
|
||
| myexp: the experiment defined in base.py, will extract the depth information from here. | ||
| """ | ||
| noise = [] # Create the empty array to hold the noise information | ||
|
|
||
| for name, exp in exp_list.items(): | ||
| for session in exp: | ||
| for i in range(len(session.files)): | ||
| path = session.processed / f"noise_{i}.json" | ||
| with path.open() as fd: | ||
| ses_noise = json.load(fd) | ||
| date = datetime.datetime.strptime(session.name[:6], "%y%m%d") | ||
| for SD in ses_noise["SDs"]: | ||
| noise.append((session.name, date, name, SD)) | ||
|
|
||
| df = pd.DataFrame(noise, columns=["session", "date", "project", "SDs"]) | ||
|
|
||
| # Now that we have saved the noise for this experiment, must import the depth information and concatenate | ||
| depth = meta_spikeglx(myexp, 0) | ||
| depth = depth.to_dataframe() | ||
|
|
||
| df2 = pd.concat([df, depth], axis=1) | ||
|
|
||
| return df2 | ||
|
|
||
|
|
||
| # Now that we have both noise and depth information for every channel, we may perform a means clustering | ||
| # Initiate the kmeans class, describing the parameters of our analysis | ||
| # First double check how many clusters are required to best describe the data | ||
| # Reshape the array to allow clustering | ||
| df2 = noise_per_channeldepth(exps, myexp) | ||
|
|
||
| # Now determine the optimal number of clusters | ||
| def elbowplot(data, myexp): | ||
|
|
||
| """ | ||
|
|
||
| This function takes data formatted according to the function above, containing the noise values for all channels | ||
| Will iterate through each experimental session, producing the appropriate graph. Should take the optimal number of clusters as the point at which the elbow bends. | ||
| This point is defined as the boundary where additional clusters no longer explain much more variance in the data. | ||
|
|
||
| data: The dataframe, as formatted by noise_per_channel() | ||
|
|
||
| myexp: The experiment, defined in base.py containing the session information. | ||
|
|
||
| """ | ||
|
|
||
| for s, session in enumerate(myexp): | ||
| name = session.name | ||
| ses_data = data.loc[data["session"] == name] | ||
| df3 = ses_data["SDs"].values.reshape( | ||
| -1, 1 | ||
| ) # Just gives all noise values, for each session | ||
| Sum_of_squares = [] # create an empty list to store these in. | ||
|
|
||
| k = range(1, 10) | ||
| for num_clusters in k: | ||
| kmeans = KMeans(n_clusters=num_clusters) | ||
| kmeans.fit(df3) | ||
| Sum_of_squares.append(kmeans.inertia_) | ||
|
|
||
| fig, ax = plt.subplots() | ||
|
|
||
| # This code will plot the elbow graph to give an overview of the variance in the data explained by the varying the number of clusters | ||
| # This gives the distance from the centroids, as a measure of the variability explained | ||
| # We want this to drop off indicating that there is no remaining data explained by further centroid inclusion | ||
|
|
||
| # Figure has two rows, one columns, this is the first plot | ||
| plt.plot(k, Sum_of_squares, "bx-") # bx gives blue x as each point. | ||
| plt.xlabel("Putative Number of Clusters") | ||
| plt.ylabel("Sum of Squares Distances/Inertia") | ||
| plt.title( | ||
| f"Determining Optimal Number of Clusters for Analysis - Session {name}" | ||
| ) | ||
|
|
||
| plt.show() | ||
|
|
||
|
|
||
| # elbowplot(df2, myexp) | ||
| # Must now define kmeans parameters based on elbow analysis! | ||
| # Seems that 2 clusters is still optimal across datasets | ||
|
|
||
| kmeans = KMeans( | ||
| init="random", # Initiate the iterative analysis with random centres | ||
| n_clusters=2, # How many clusters to bin the data into, based on the elbow analysis! | ||
| n_init=10, # Number of centroids to generate initially | ||
| max_iter=300, # Max number of iterations before ceasing analysis | ||
| random_state=42, # The random number seed for centroid generation, can really be anything for our purposes | ||
| ) | ||
|
|
||
|
|
||
| for s, session in enumerate(myexp): | ||
| name = session.name | ||
|
|
||
| df2 = df2.loc[data["session"] == name] | ||
| x_kmeans = kmeans.fit(df2) | ||
| y_means = kmeans.fit_predict(df2) | ||
|
|
||
| # Now plot the kmeans analysis | ||
| # Remember we use our original data (df2) but use the df3 analysis to generate the labels | ||
| plt.scatter(df2["y"], df2["SDs"], c=y_means, cmap="viridis") | ||
| plt.xlabel("Probe Channel Y-Coordinate") | ||
| plt.ylabel("Channel Noise (SD)") | ||
| plt.title(f"{myexp[0].name} Channel Noise k-Mean Clustering Analysis") | ||
|
|
||
| plt.show() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| def significance_extraction(CI): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems that the functions in this module are duplicated elsewhere in one of your own scripts. What's the difference between them? |
||
| """ | ||
| This function takes the output of the get_aligned_spike_rate_CI method under the myexp class and extracts any significant values, returning a dataframe in the same format. | ||
|
|
||
| CI: The dataframe created by the CI calculation previously mentioned | ||
|
|
||
| """ | ||
|
|
||
| sig = [] | ||
| keys=[] | ||
| rec_num = 0 | ||
|
|
||
| #This loop iterates through each column, storing the data as un, and the location as s | ||
| for s, unit in CI.items(): | ||
| #Now iterate through each recording, and unit | ||
| #Take any significant values and append them to lists. | ||
| if unit.loc[2.5] > 0 or unit.loc[97.5] < 0: | ||
| sig.append(unit) #Append the percentile information for this column to a list | ||
| keys.append(s) #append the information containing the point at which the iteration currently stands | ||
|
|
||
|
|
||
| #Now convert this list to a dataframe, using the information stored in the keys list to index it | ||
| sigs = pd.concat( | ||
| sig, axis = 1, copy = False, | ||
| keys=keys, | ||
| names=["session", "unit", "rec_num"] | ||
| ) | ||
|
|
||
| return sigs | ||
|
|
||
| def percentile_plot(CIs, sig_CIs, exp, sig_only = False, dir_ascending = False): | ||
| """ | ||
|
|
||
| This function takes the CI data and significant values and plots them relative to zero. | ||
| May specify if percentiles should be plotted in ascending or descending order. | ||
|
|
||
| CIs: The output of the get_aligned_spike_rate_CI function, i.e., bootstrapped confidence intervals for spike rates relative to two points. | ||
|
|
||
| sig_CIs: The output of the significance_extraction function, i.e., the units from the bootstrapping analysis whose confidence intervals do not straddle zero | ||
|
|
||
| exp: The experimental session to analyse, defined in base.py | ||
|
|
||
| sig_only: Whether to plot only the significant values obtained from the bootstrapping analysis (True/False) | ||
|
|
||
| dir_ascending: Whether to plot the values in ascending order (True/False) | ||
|
|
||
| """ | ||
| #First sort the data into long form for both datasets, by percentile | ||
| CIs_long = CIs.reset_index().melt("percentile").sort_values("value", ascending= dir_ascending) | ||
| CIs_long = CIs_long.reset_index() | ||
| CIs_long["index"] = pd.Series(range(0, CIs_long.shape[0]))#reset the index column to allow ordered plotting | ||
|
|
||
| CIs_long_sig = sig_CIs.reset_index().melt("percentile").sort_values("value", ascending=dir_ascending) | ||
| CIs_long_sig = CIs_long_sig.reset_index() | ||
| CIs_long_sig["index"] = pd.Series(range(0, CIs_long_sig.shape[0])) | ||
|
|
||
| #Now select if we want only significant values plotted, else raise an error. | ||
| if sig_only is True: | ||
| data = CIs_long_sig | ||
|
|
||
| elif sig_only is False: | ||
| data = CIs_long | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe move the dataframe wrangling into the if statement? That way |
||
|
|
||
| else: | ||
| raise TypeError("Sig_only argument must be a boolean operator (True/False)") | ||
|
|
||
| #Plot this data for the experimental sessions as a pointplot. | ||
| for s, session in enumerate(exp): | ||
| name = session.name | ||
|
|
||
| p = sns.pointplot( | ||
| x="unit", y = "value", data = data.loc[(data.session == s)], | ||
| order = data.loc[(data.session == s)]["unit"].unique(), join = False, legend = None) #Plots in the order of the units as previously set, uses unique values to prevent double plotting | ||
|
|
||
| p.set_xlabel("Unit") | ||
| p.set_ylabel("Confidence Interval") | ||
| p.set(xticklabels=[]) | ||
| p.axhline(0) | ||
| plt.suptitle("\n".join(wrap(f"Confidence Intervals By Unit - Grasp vs. Baseline - Session {name}"))) #Wraps the title of the plot to fit on the page. | ||
|
|
||
| plt.show() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,3 @@ | ||
| { | ||
| "python.formatting.provider": "black" | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please could you move this module into a new folder
pixtools/noise/rather thanpixtools/clusters/? 'clusters' here refers to clusters of spikes as determined by kilosort, not clustering analyses.