Source code for move_files_to_hivebox_location

import os

import pandas as pd
from loguru import logger


[docs]
class MoveFilesToHiveboxLocation:
    """
    This class is designed to solve `Issue #306 <https://github.com/ASU-CS-Research/AppMAIS/issues/306>`_.
    The issue is that previously file locations were tied to both location and bee colony generation, and we want to
    decouple these files from the population while retaining compatibility with existing applications. This will be
    done by renaming and moving the files, while leaving symbolic links so there is a record of every file at its
    previous location.
    """


[docs]
    def __init__(self, old_directories: list):
        """
        Args:
            old_directories: List of directories to move files out of.
        """

        # All directories are of the form 'AppMAIS' + Int + ('R' or 'L') + ('B' or 'C')
        self._old_directories = old_directories
        # Create a list of unique, new directories by removing the population indicator from the old directories
        self._new_directories = list(set([name[:-1] for name in old_directories]))
        self._base_location = os.path.abspath('/usr/local/bee/appmais/')
        self._sensor_directories = ['airquality', 'temp', 'scale', 'cpu', 'audio', 'video']



[docs]
    def move_and_link(self):
        """
        Move each file from the old location to the new one. Where that file was, replace it with a symbolic link to
        the file's new location.

        Raises:
            OSError: Raised when the new locations could not be created.
        """
        for new_directory in self._new_directories:
            # Get all the old directories that are being combined into the new directory
            directories_to_concatenate = []
            for directory in self._old_directories:
                if directory[:-1] == new_directory:
                    directories_to_concatenate.append(directory)

            logger.info(f'\nConcatenating {directories_to_concatenate} to {new_directory}.')

            # Create list of dates in all concatenation directories
            dates = self._get_dates(directories_to_concatenate)
            logger.info(f'Found dates: {dates}.')

            # Create directories for files to be moved to
            try:
                self._build_directories(new_directory, dates)
                logger.info(f'\nAll new directories for {new_directory} built.')
            except OSError as e:
                raise OSError(f'{new_directory} failed to be built.\n{e}')

            # Move files from old to new, leaving symlinks at the old locations
            for directory in directories_to_concatenate:
                self._move_and_symlink_files(directory, new_directory)
                logger.info(f'\nAll files in {directory} moved and linked to {new_directory}.')
            logger.info(f'\nAll files successfully moved to {new_directory}.')
        logger.info('\nAll files moved and linked.')




[docs]
    def _build_directories(self, new_directory: str, dates: list):
        """
        Verify new directories and subdirectories exist. If not, create them.

        Args:
            new_directory (str): Location to create new directory.
            dates (list): List of needed date subdirectories.
        """
        path = os.path.join(self._base_location, new_directory)
        # Create new directory if it doesn't exist
        if os.path.exists(path):
            logger.info(f'{new_directory} directory exists.')
        else:
            logger.info(f'{new_directory} directory does not exist. Creating...')
            os.mkdir(path)

        for date in dates:
            path_to_date = os.path.join(path, date)
            # Create new subdirectory for the date if it doesn't exist
            if not os.path.exists(path_to_date):
                logger.info(f'Creating new directory in {new_directory} for date: {date}')
                os.mkdir(path_to_date)
            # Create new subdirectories for each sensor if they don't exist
            for sensor in self._sensor_directories:
                path_to_sensor = os.path.join(path_to_date, sensor)
                if not os.path.exists(path_to_sensor):
                    logger.info(f'Creating new directory in {new_directory}\\{date} for sensor: {sensor}')
                    os.mkdir(path_to_sensor)



[docs]
    def _get_dates(self, directories: list):
        """
        Get a unique list of all date subdirectories contained within the provided directories.

        Args:
            directories (list): A list of all target directories.
        Returns:
            dates (list): List of unique dates from all directories.
        """
        dates = []
        # If only one directory to concatenate, just copy the files
        # Otherwise, concatenate
        if len(directories) != 1:
            for directory in directories:
                # Add all dates for this directory to the full date list
                path_to_dir = os.path.join(self._base_location, directory)
                for date in os.listdir(path_to_dir):
                    dates.append(date)
        else:
            path_to_dir = os.path.join(self._base_location, directories[0])
            dates = os.listdir(path_to_dir)

        # Make sure dates only contains unique values
        dates = list(set(dates))
        return dates



[docs]
    def _move_and_symlink_files(self, root_directory: str, target_directory: str):
        """
        Move all files from old directory to new directory. For each file, put a symlink to its new location in the
        old directory.

        Args:
            root_directory (str): The old location of files, where they will be replaced by symlinks.
            target_directory (str): The new location the files will be moved to.
        Raises:
            FileExistsError: Raised when a file already exists at the new location
            FileNotFoundError: Raised when either the old or new file location cannot be found
            OSError: Raised when the old file fails to be moved to the new location, or when a symlink at the old
                    location already exists.
            ValueError: Raised when values in .csv files are not of the same type, causing concatenation to fail.
        """
        root_path = os.path.join(self._base_location, root_directory)
        target_path = os.path.join(self._base_location, target_directory)

        # Cycle through each date and all subdirectories, copying files and leaving symlinks
        for date in os.listdir(root_path):
            date_path = os.path.join(root_path, date)
            for sensor in os.listdir(date_path):
                sensor_path = os.path.join(date_path, sensor)
                for filename in os.listdir(sensor_path):
                    # Get path to old and new locations
                    old_file_path = os.path.join(sensor_path, filename)
                    # Rename the file by replacing the old hive name (before the first @) with the new file name
                    name_split = filename.split('@', 1)
                    new_file_name = target_directory + '@' + name_split[1]
                    new_file_path = os.path.join(target_path, date, sensor, new_file_name)

                    # Check that locations exist. If they somehow do not, halt execution.
                    if not os.path.exists(old_file_path):
                        raise FileNotFoundError(f'Could not find old file: {old_file_path}.')
                    path_to_new_directory = os.path.join(target_path, date, sensor)
                    if not os.path.exists(path_to_new_directory):
                        raise FileNotFoundError(f'Could not find path to new file location: {path_to_new_directory}.')

                    # Check that there's nothing already at the new location.
                    # If there is, and it's a .csv file, concatenate the two.
                    # If there is a file, but it is not a .csv, halt execution.
                    # If there is no file, move the old file to the new location
                    if os.path.exists(new_file_path):
                        if new_file_path.endswith('.csv'):
                            try:
                                self._concatenate_csv(old_file_path, new_file_path, sensor)
                                logger.info(f'Concatenated {old_file_path} and {new_file_path}')
                            except ValueError as e:
                                raise ValueError(f'{old_file_path} and {new_file_path} failed to concatenate.\n{e}')
                        else:
                            raise FileExistsError(f'{new_file_path} already exists.')
                    else:
                        # Move the file from the old location to the new location
                        try:
                            os.rename(old_file_path, new_file_path)
                            logger.info(f'Moved file from {old_file_path} to {new_file_path}.')
                        except OSError as e:
                            raise OSError(f'Failed to move file from {old_file_path} to {new_file_path}.\n{e}')

                    # Create a symlink to the new location at the old location
                    try:
                        os.symlink(new_file_path, old_file_path)
                        logger.info(f'Symbolic link created at {old_file_path} to {new_file_path}.')
                    except OSError as e:
                        files_not_linked = os.listdir(path_to_new_directory)
                        raise OSError(f'Failed to link files: {files_not_linked}.\n{e}')



[docs]
    @staticmethod
    def _concatenate_csv(old_file_path: str, new_file_path: str, sensor: str):
        """
        Concatenate a pair of .csv files.

        Args:
            old_file_path (str): Path to the old file.
            new_file_path (str): Path to the file in the new location. Both old and new will be concatenated to this
                                new location.
            sensor (str): Name of the sensor for which the .csv is being concatenated.
        """
        # List of column names depending on sensor - used for sorting
        column_names = {
            "airquality": ["1-timestamp", "2-pm10", "3-pm25"],  # ?
            "temp": ["1-timestamp", "2-temperature", "3-humidity"],
            "scale": ["1-timestamp", "2-weight"],
            "cpu": ["1-timestamp", "2-cpu", "3-memory"],
            "audio": ["1-timestamp", "2-temperature", "3-voltage"],
            "video": ["1-timestamp", "2-file_size"]
        }

        # Read the .csv files into dataframes, labeling the columns depending on which sensor the file is for
        old_file = pd.read_csv(old_file_path, names=column_names[sensor])
        new_file = pd.read_csv(new_file_path, names=column_names[sensor])

        concatenated_csv = pd.concat([old_file, new_file],
                       ignore_index=True, sort=True)
        concatenated_csv.sort_index(axis=1, inplace=True)
        # Sort the concatenated dataframe by the column names
        concatenated_csv.sort_values(by=list(column_names[sensor]), axis=0, inplace=True)
        # Drop any duplicate rows
        concatenated_csv.drop_duplicates(inplace=True)
        # Reset the index of the dataframe
        concatenated_csv.reset_index(drop=True, inplace=True)

        # Place concatenated file in new location
        concatenated_csv.to_csv(new_file_path, na_rep='nan', header=False, index=False)

        # Remove file in old location
        os.remove(old_file_path)




def main(target_directories: list):
    """
    Main method to run the class.
    """
    move_files = MoveFilesToHiveboxLocation(old_directories=target_directories)
    move_files.move_and_link()

if __name__ == '__main__':
    target_dirs = ['AppMAIS1LB', 'AppMAIS2RB', 'AppMAIS2RC', 'AppMAIS3LB', 'AppMAIS3RB','AppMAIS4LB', 'AppMAIS4RB',
                   'AppMAIS5LB', 'AppMAIS6RB', 'AppMAIS6RC', 'AppMAIS8LB', 'AppMAIS9LB']
    main(target_dirs)