Source code for move_files_to_hivebox_location

import os

import pandas as pd
from loguru import logger

[docs] class MoveFilesToHiveboxLocation: """ This class is designed to solve `Issue #306 <https://github.com/ASU-CS-Research/AppMAIS/issues/306>`_. The issue is that previously file locations were tied to both location and bee colony generation, and we want to decouple these files from the population while retaining compatibility with existing applications. This will be done by renaming and moving the files, while leaving symbolic links so there is a record of every file at its previous location. """
[docs] def __init__(self, old_directories: list): """ Args: old_directories: List of directories to move files out of. """ # All directories are of the form 'AppMAIS' + Int + ('R' or 'L') + ('B' or 'C') self._old_directories = old_directories # Create a list of unique, new directories by removing the population indicator from the old directories self._new_directories = list(set([name[:-1] for name in old_directories])) self._base_location = os.path.abspath('/usr/local/bee/appmais/') self._sensor_directories = ['airquality', 'temp', 'scale', 'cpu', 'audio', 'video']
[docs] def _build_directories(self, new_directory: str, dates: list): """ Verify new directories and subdirectories exist. If not, create them. Args: new_directory (str): Location to create new directory. dates (list): List of needed date subdirectories. """ path = os.path.join(self._base_location, new_directory) # Create new directory if it doesn't exist if os.path.exists(path): logger.info(f'{new_directory} directory exists.') else: logger.info(f'{new_directory} directory does not exist. Creating...') os.mkdir(path) for date in dates: path_to_date = os.path.join(path, date) # Create new subdirectory for the date if it doesn't exist if not os.path.exists(path_to_date): logger.info(f'Creating new directory in {new_directory} for date: {date}') os.mkdir(path_to_date) # Create new subdirectories for each sensor if they don't exist for sensor in self._sensor_directories: path_to_sensor = os.path.join(path_to_date, sensor) if not os.path.exists(path_to_sensor): logger.info(f'Creating new directory in {new_directory}\\{date} for sensor: {sensor}') os.mkdir(path_to_sensor)
[docs] def _get_dates(self, directories: list): """ Get a unique list of all date subdirectories contained within the provided directories. Args: directories (list): A list of all target directories. Returns: dates (list): List of unique dates from all directories. """ dates = [] # If only one directory to concatenate, just copy the files # Otherwise, concatenate if len(directories) != 1: for directory in directories: # Add all dates for this directory to the full date list path_to_dir = os.path.join(self._base_location, directory) for date in os.listdir(path_to_dir): dates.append(date) else: path_to_dir = os.path.join(self._base_location, directories[0]) dates = os.listdir(path_to_dir) # Make sure dates only contains unique values dates = list(set(dates)) return dates
[docs] @staticmethod def _concatenate_csv(old_file_path: str, new_file_path: str, sensor: str): """ Concatenate a pair of .csv files. Args: old_file_path (str): Path to the old file. new_file_path (str): Path to the file in the new location. Both old and new will be concatenated to this new location. sensor (str): Name of the sensor for which the .csv is being concatenated. """ # List of column names depending on sensor - used for sorting column_names = { "airquality": ["1-timestamp", "2-pm10", "3-pm25"], # ? "temp": ["1-timestamp", "2-temperature", "3-humidity"], "scale": ["1-timestamp", "2-weight"], "cpu": ["1-timestamp", "2-cpu", "3-memory"], "audio": ["1-timestamp", "2-temperature", "3-voltage"], "video": ["1-timestamp", "2-file_size"] } # Read the .csv files into dataframes, labeling the columns depending on which sensor the file is for old_file = pd.read_csv(old_file_path, names=column_names[sensor]) new_file = pd.read_csv(new_file_path, names=column_names[sensor]) concatenated_csv = pd.concat([old_file, new_file], ignore_index=True, sort=True) concatenated_csv.sort_index(axis=1, inplace=True) # Sort the concatenated dataframe by the column names concatenated_csv.sort_values(by=list(column_names[sensor]), axis=0, inplace=True) # Drop any duplicate rows concatenated_csv.drop_duplicates(inplace=True) # Reset the index of the dataframe concatenated_csv.reset_index(drop=True, inplace=True) # Place concatenated file in new location concatenated_csv.to_csv(new_file_path, na_rep='nan', header=False, index=False) # Remove file in old location os.remove(old_file_path)
def main(target_directories: list): """ Main method to run the class. """ move_files = MoveFilesToHiveboxLocation(old_directories=target_directories) move_files.move_and_link() if __name__ == '__main__': target_dirs = ['AppMAIS1LB', 'AppMAIS2RB', 'AppMAIS2RC', 'AppMAIS3LB', 'AppMAIS3RB','AppMAIS4LB', 'AppMAIS4RB', 'AppMAIS5LB', 'AppMAIS6RB', 'AppMAIS6RC', 'AppMAIS8LB', 'AppMAIS9LB'] main(target_dirs)