Source code for cicada.preprocessing.cicada_data_to_nwb

import yaml
import os
from cicada.preprocessing.utils import class_name_to_module_name, get_subfiles, get_subdirs
import importlib
from pynwb import NWBHDF5IO
from pynwb import NWBFile
from pynwb.file import Subject
from pynwb.epoch import TimeIntervals
from datetime import datetime
from dateutil.tz import tzlocal
import numpy as np
import hdf5storage

"""
Load data files and create the NWB file
"""

[docs]def filter_list_of_files(dir_path, files, extensions, directory=None): """ Take a list of file names and either no extensions (empty list or None) and remove the directory that starts by "." or a list of extension and remove the files that are not with this extension. It returns a new list with full paths Args: dir_path (str): path in which the files ares files (list): List of files to be filtered extensions (str) : File extension to use as a filter directory (str): directory keyword, allows to identify directories in which looking for files with a given extensions or return files in this directory Exemples: >>> print(filter_list_of_files(["file1.py", "file2.c", "file3.h"],"py")) ["file1.py"] """ """ Args: test (int) : Ok """ filtered_list = [] if directory: # if directory is a list, it means we're going though a list of directory, following the depth order if isinstance(directory, str): directories_keywords = [directory] else: directories_keywords = directory directories = [] for dir_keyword in directories_keywords: for (dirpath, dirnames, filenames) in os.walk(dir_path): for dir_name in dirnames: if dir_keyword in dir_name: directories.append(dir_name) # only first level break if len(directories) > 0: files = [] # looking for files in directories for dir_name in directories: subfiles = get_subfiles(os.path.join(dir_path, dir_name)) # using fullpath subfiles = [os.path.join(dir_path, dir_name, f) for f in subfiles] files.extend(subfiles) else: files = [] else: # full path option files = [os.path.join(dir_path, f) for f in files] if not extensions: filtered_list = [file for file in files if not os.path.basename(file).startswith(".")] return filtered_list for extension in extensions: filtered_list.extend([file for file in files if os.path.basename(file).endswith("." + extension) and (not os.path.basename(file).startswith("."))]) return filtered_list
[docs]def filter_list_according_to_keywords(list_to_filter, keywords, keywords_to_exclude): """ Conditional loop to remove all files or directories not containing the keywords # or containing excluded keywords. Inplace list modification Args: list_to_filter (list): List containing all files/directories to be filtered (full_path) keywords (str): If the list doesn't contain the keyword, remove it from list keywords_to_exclude (str): If the list contains the keyword, remove it from list Exemples: >>> print(filter_list_of_files(["file1.py", "file2.c", "file2.h"],"2","h")) ["file2.c"] """ # print(f"filter_list_according_to_keywords {list_to_filter} with keyword: {keywords}") counter = 0 while counter < len(list_to_filter): delete = True file_name = os.path.basename(list_to_filter[counter]) # the file should contain at least one keyword for keyword in keywords: # first removing file starting by . if (not file_name.startswith(".")) and (keyword.lower() in file_name.lower()): delete = False if delete: del list_to_filter[counter] if (not delete) and keywords_to_exclude: delete = False for keyword_to_exclude in keywords_to_exclude: # we lower both, to make them insensible to case if keyword_to_exclude.lower() in file_name.lower(): # print("Excluded keyword found in : " + str(filtered_list)) delete = True if delete: del list_to_filter[counter] # print("New list : " + str(filtered_list)) if not delete: counter += 1
[docs]def create_nwb_file(subject_data_yaml_file, session_data_yaml_file): """ Create an NWB file object using all metadata containing in YAML file Args: subject_data_yaml_file (str): Absolute path to YAML file containing the subject metadata session_data_yaml_file (str): Absolute path to YAML file containing the session metadata """ subject_data_yaml = None with open(subject_data_yaml_file, 'r') as stream: subject_data_yaml = yaml.load(stream, Loader=yaml.FullLoader) if subject_data_yaml is None: print(f"Issue while reading the file {subject_data_yaml}") return None session_data_yaml = None with open(session_data_yaml_file, 'r') as stream: session_data_yaml = yaml.load(stream, Loader=yaml.FullLoader) if session_data_yaml is None: print(f"Issue while reading the file {session_data_yaml}") return None keys_kwargs_subject = ["age", "weight", "genotype", "strain", "subject_id", "species", "sex", "date_of_birth"] kwargs_subject = dict() for key in keys_kwargs_subject: kwargs_subject[key] = subject_data_yaml.get(key) if kwargs_subject[key] is not None: kwargs_subject[key] = str(kwargs_subject[key]) if "date_of_birth" in kwargs_subject: kwargs_subject["date_of_birth"] = datetime.strptime(kwargs_subject["date_of_birth"], '%m/%d/%Y') print(f'kwargs_subject {kwargs_subject}') subject = Subject(**kwargs_subject) ##################################### # ### creating the NWB file ### ##################################### keys_kwargs_nwb_file = ["session_description", "identifier", "session_id", "session_start_time", "experimenter", "experiment_description", "institution", "keywords", "notes", "pharmacology", "protocol", "related_publications", "source_script", "source_script_file_name", "surgery", "virus", "stimulus_notes", "slices", "lab"] kwargs_nwb_file = dict() for key in keys_kwargs_nwb_file: kwargs_nwb_file[key] = session_data_yaml.get(key) if kwargs_nwb_file[key] is not None: if not isinstance(kwargs_nwb_file[key], list): kwargs_nwb_file[key] = str(kwargs_nwb_file[key]) if "session_description" not in kwargs_nwb_file: print(f"session_description is needed in the file {session_data_yaml_file}") return if "identifier" not in kwargs_nwb_file: print(f"identifier is needed in the file {session_data_yaml_file}") return if "session_start_time" not in kwargs_nwb_file: print(f"session_start_time is needed in the file {session_data_yaml_file}") return else: kwargs_nwb_file["session_start_time"] = datetime.strptime(kwargs_nwb_file["session_start_time"], '%m/%d/%y %H:%M:%S') print(f"kwargs_nwb_file['session_start_time'] {kwargs_nwb_file['session_start_time']}") if "session_id" not in kwargs_nwb_file: kwargs_nwb_file["session_id"] = kwargs_nwb_file["identifier"] # #### arguments that are not in the yaml file (yet ?) # file_create_date, timestamps_reference_time=None, acquisition=None, analysis=None, stimulus=None, # stimulus_template=None, epochs=None, epoch_tags=set(), trials=None, invalid_times=None, # time_intervals=None, units=None, modules=None, electrodes=None, # electrode_groups=None, ic_electrodes=None, sweep_table=None, imaging_planes=None, # ogen_sites=None, devices=None kwargs_nwb_file["subject"] = subject kwargs_nwb_file["file_create_date"] = datetime.now(tzlocal()) # TODO: See how to load invalid_times, from yaml file ? # kwargs_nwb_file["invalid_times"] = invalid_times print(f'kwargs_nwb_file {kwargs_nwb_file}') nwb_file = NWBFile(**kwargs_nwb_file) # nwb_file.invalid_times = TimeIntervals(name="invalid_times", # description="Time intervals to be removed from analysis'") return nwb_file
[docs]def convert_data_to_nwb(data_to_convert_dir, default_convert_to_nwb_yml_file, nwb_files_dir): """ Convert all default_config_data_for_conversion located in dir_path and put it in NWB format then create the file. Use the yaml file contains in dir_path to convert the default_config_data_for_conversion. A yaml file with in its name session_data and one with subject_data must be in directory. Otherwise nothing will happend. A yaml file with abf in its name will need to be present to convert the abf default_config_data_for_conversion. Args: data_to_convert_dir (str): Absolute path to the directory containing all data default_convert_to_nwb_yml_file (str): Absolute path to the default YAML file to convert an NWB file nwb_files_dir (str): Absolute path to the directory where to save the nwb file created """ # Get all files and directories present in the path files = get_subfiles(data_to_convert_dir) dirs = get_subdirs(data_to_convert_dir) files = files + dirs with open(default_convert_to_nwb_yml_file, 'r') as stream: default_config_data_for_conversion = yaml.safe_load(stream) config_data_for_conversion = dict() # Look for another YAML file containing the keywords, extensions and keywords to exclude for file in files: if not(file.endswith(".yaml") or file.endswith(".yml")): continue if file.startswith("."): continue if "create_nwb_data" in file: print(f"################# create_nwb_data found in file") with open(os.path.join(data_to_convert_dir, file), 'r') as stream: config_data_for_conversion = yaml.safe_load(stream) if len(config_data_for_conversion) == 0: config_data_for_conversion = default_config_data_for_conversion # If 2 files are provided, the one given by the user will take the priority # for now we just take the information from the new file so the next lines are commented # if default_config_data_for_conversion is not None: # difference = set(list(default_config_data_for_conversion.keys())) - set(list(config_data_for_conversion.keys())) # for arg in list(difference): # config_data_for_conversion[arg] = default_config_data_for_conversion[arg] # First we create the nwb file because it will be needed for everything # The class ConvertToNWB will create the nwb file, based on the 2 yaml file create_nwb_data = config_data_for_conversion.pop("CreateNWB") session_data_yaml_file = None subject_data_yaml_file = None for arg in create_nwb_data: # If no extension is provided it means we are looking for a directory, so we filter the list of files and # directory to only contain directories filtered_list = filter_list_of_files(dir_path=data_to_convert_dir, files=files, extensions=create_nwb_data[arg].get("extension")) # Conditional loop to remove all files or directories not containing the keywords # or containing excluded keywords filter_list_according_to_keywords(list_to_filter=filtered_list, keywords=create_nwb_data[arg].get("keyword"), keywords_to_exclude=create_nwb_data[arg].get("keyword_to_exclude")) print("Files to pass for " + arg + ": " + str(filtered_list)) if len(filtered_list) == 0: continue # If files were found respecting every element, add the whole path to pass them as arguments if arg == "session_data_yaml": session_data_yaml_file = os.path.join(data_to_convert_dir, filtered_list[0]) elif arg == "subject_data_yaml": subject_data_yaml_file = os.path.join(data_to_convert_dir, filtered_list[0]) if subject_data_yaml_file is None: print(f"Conversion of data in {data_to_convert_dir} not possible, no yaml file found with 'subject_data' in its name.") return if session_data_yaml_file is None: print(f"Conversion of data in {data_to_convert_dir} not possible, no yaml file found with 'session_data' in its name.") return nwb_file = create_nwb_file(subject_data_yaml_file=subject_data_yaml_file, session_data_yaml_file=session_data_yaml_file) # raise Exception("NOT TODAY") if nwb_file is None: return # contains the instances of ConvertNWB classes, so we can get values from them converter_dict = dict() order_list = [] # TODO: See to change it so only classes in order would be instantiated if config_data_for_conversion.get("order"): order_list = config_data_for_conversion.pop("order") class_names_list = list(config_data_for_conversion.keys()) # putting them on the right order class_names_list = order_list + list(set(class_names_list) - set(order_list)) for class_name in class_names_list: if class_name not in config_data_for_conversion: # in case a class in order would not have been added to the yaml file continue keys = list(config_data_for_conversion[class_name].keys()) if len(keys) == 0: continue first_key = keys[0] if isinstance(first_key, int): # means we need to create more than one instance of this class for key, config_dict in config_data_for_conversion[class_name].items(): create_convert_class(class_name, config_dict, converter_dict, nwb_file, default_convert_to_nwb_yml_file, files, data_to_convert_dir) else: create_convert_class(class_name, config_data_for_conversion[class_name], converter_dict, nwb_file, default_convert_to_nwb_yml_file, files, data_to_convert_dir) # Create NWB file in the data folder # nwb_name = path_leaf(data_to_convert_dir) + ".nwb" # adding the time of creation of the file, to make sure we don't erase another one time_str = datetime.now().strftime("%Y_%m_%d.%H-%M-%S") nwb_name = nwb_file.identifier + "_" + time_str + ".nwb" print(f"Before NWBHDF5IO write: nwb_file.epoch_tags {nwb_file.epoch_tags}") with NWBHDF5IO(os.path.join(nwb_files_dir, nwb_name), 'w') as io: io.write(nwb_file) print("NWB file created at : " + str(os.path.join(nwb_files_dir, nwb_name)))
[docs]def create_convert_class(class_name, config_dict, converter_dict, nwb_file, yaml_path, files, dir_path): """ Args: class_name: config_dict: converter_dict: nwb_file: yaml_path: files: dir_path: Returns: """ # Get classname then instantiate it module_name = class_name_to_module_name(class_name=class_name) module_imported = importlib.import_module("cicada.preprocessing." + module_name) class_instance = getattr(module_imported, class_name) converter = class_instance(nwb_file) # Initialize a dict to contain the arguments to call convert arg_dict = {} print("Class name : " + str(class_name)) # Loop through all arguments of the convert of the corresponding class for arg in config_dict: if not isinstance(config_dict[arg], dict): # in this case we keep the actual value continue if config_dict[arg].get("from_other_converter"): # means we get the argument value from an instance of a converter, a value should be indicated attribute_name = config_dict[arg].get("value") if attribute_name is None: raise Exception(f"A value argument should be indicated for {class_name} argument {arg} " f"in the yaml file {yaml_path}") converter_name = config_dict[arg].get("from_other_converter") if converter_name not in converter_dict: raise Exception(f"No convert class by the name {converter_name} has been instanciated") if isinstance(attribute_name, list): attribute_name = attribute_name[0] if not isinstance(attribute_name, str): raise Exception(f"{attribute_name} is not a string for {class_name} argument {arg} " f"in the yaml file {yaml_path}") arg_dict[arg] = getattr(converter_dict[converter_name], attribute_name) # If value if found, and no extension is given, it means the argument is not a file but a string/int/etc elif (config_dict[arg].get("value") is not None) and \ ((not isinstance(config_dict[arg].get("value"), list)) or (isinstance(config_dict[arg].get("value"), list) and len(config_dict[arg].get("value")) > 0)) \ and not config_dict[arg].get("extension") and \ (not config_dict[arg].get("keyword") or (not config_dict[arg].get("keyword_to_exclude"))): # print(config_dict[arg].get("value")[0]) value = config_dict[arg].get("value") # transforming list of 1 element on this element if isinstance(value, list) and len(value) == 1: value = value[0] arg_dict[arg] = value else: # If no extension is provided it means we are looking for a directory, # so we filter the list of files and # directory to only contain directories filtered_list = filter_list_of_files(files=files, dir_path=dir_path, extensions=config_dict[arg].get("extension"), directory=config_dict[arg].get("dir")) # Conditional loop to remove all files or directories not containing the keywords # or containing excluded keywords if "keyword" in config_dict[arg]: filter_list_according_to_keywords(list_to_filter=filtered_list, keywords=config_dict[arg].get("keyword"), keywords_to_exclude=config_dict[arg].get("keyword_to_exclude")) print("Files to pass for " + arg + ": " + str(filtered_list)) # If files were found respecting every element, add the whole path to pass them as arguments if filtered_list: file_name = filtered_list[0] if config_dict[arg].get("dir"): directory = config_dict[arg].get("dir") arg_dict[arg] = os.path.join(dir_path, *directory, file_name) else: arg_dict[arg] = os.path.join(dir_path, file_name) if (file_name.endswith("mat") or file_name.endswith("npz")) and \ config_dict[arg].get("value"): if file_name.endswith("npz"): file_name = os.path.basename(arg_dict[arg]) arg_dict[arg] = np.load(arg_dict[arg]) try: arg_dict[arg] = arg_dict[arg][config_dict[arg].get("value")] except KeyError: raise Exception(f'{config_dict[arg].get("value")} is not a valid key of {file_name}. ' f'Valid keys are: {list(arg_dict[arg].keys())}') else: arg_dict[arg] = hdf5storage.loadmat(arg_dict[arg]) # print(f"arg_dict[arg].keys() {arg_dict[arg].keys()}") # print(f'config_dict[arg].get("value") {config_dict[arg].get("value")}') # print(f'arg_dict[arg][config_dict[arg].get("value")] ' # f'{arg_dict[arg][config_dict[arg].get("value")][0][0][0][0]}') if len(arg_dict[arg][config_dict[arg].get("value")]) == 1: arg_dict[arg] = arg_dict[arg][config_dict[arg].get("value")][0] # if len(arg_dict[arg]) == 1: # arg_dict[arg] = arg_dict[arg][0][0][0][0][0] # print(f'arg_dict[arg] ' # f'{arg_dict[arg]}') else: arg_dict[arg] = arg_dict[arg][config_dict[arg].get("value")] # arg_dict[arg] = arg_dict[arg][config_dict[arg].get("value")][0] # another option will be to pass the file_name + the value field like this: # arg_dict[arg] = [arg_dict[arg]] + list(config_dict[arg].get("value")) # arg_dict[arg] = arg_dict[arg] # If no file found, put the argument at None else: arg_dict[arg] = None # if there is more than one instance of class_name, the last_one will be the one kept in memory in converter_dict # useful if another class as a field "from_other_converter" converter_dict[class_name] = converter converter.convert(**arg_dict)