import os
import pandas as pd
from loguru import logger
[docs]
class MoveFilesToHiveboxLocation:
"""
This class is designed to solve `Issue #306 <https://github.com/ASU-CS-Research/AppMAIS/issues/306>`_.
The issue is that previously file locations were tied to both location and bee colony generation, and we want to
decouple these files from the population while retaining compatibility with existing applications. This will be
done by renaming and moving the files, while leaving symbolic links so there is a record of every file at its
previous location.
"""
[docs]
def __init__(self, old_directories: list):
"""
Args:
old_directories: List of directories to move files out of.
"""
# All directories are of the form 'AppMAIS' + Int + ('R' or 'L') + ('B' or 'C')
self._old_directories = old_directories
# Create a list of unique, new directories by removing the population indicator from the old directories
self._new_directories = list(set([name[:-1] for name in old_directories]))
self._base_location = os.path.abspath('/usr/local/bee/appmais/')
self._sensor_directories = ['airquality', 'temp', 'scale', 'cpu', 'audio', 'video']
[docs]
def move_and_link(self):
"""
Move each file from the old location to the new one. Where that file was, replace it with a symbolic link to
the file's new location.
Raises:
OSError: Raised when the new locations could not be created.
"""
for new_directory in self._new_directories:
# Get all the old directories that are being combined into the new directory
directories_to_concatenate = []
for directory in self._old_directories:
if directory[:-1] == new_directory:
directories_to_concatenate.append(directory)
logger.info(f'\nConcatenating {directories_to_concatenate} to {new_directory}.')
# Create list of dates in all concatenation directories
dates = self._get_dates(directories_to_concatenate)
logger.info(f'Found dates: {dates}.')
# Create directories for files to be moved to
try:
self._build_directories(new_directory, dates)
logger.info(f'\nAll new directories for {new_directory} built.')
except OSError as e:
raise OSError(f'{new_directory} failed to be built.\n{e}')
# Move files from old to new, leaving symlinks at the old locations
for directory in directories_to_concatenate:
self._move_and_symlink_files(directory, new_directory)
logger.info(f'\nAll files in {directory} moved and linked to {new_directory}.')
logger.info(f'\nAll files successfully moved to {new_directory}.')
logger.info('\nAll files moved and linked.')
[docs]
def _build_directories(self, new_directory: str, dates: list):
"""
Verify new directories and subdirectories exist. If not, create them.
Args:
new_directory (str): Location to create new directory.
dates (list): List of needed date subdirectories.
"""
path = os.path.join(self._base_location, new_directory)
# Create new directory if it doesn't exist
if os.path.exists(path):
logger.info(f'{new_directory} directory exists.')
else:
logger.info(f'{new_directory} directory does not exist. Creating...')
os.mkdir(path)
for date in dates:
path_to_date = os.path.join(path, date)
# Create new subdirectory for the date if it doesn't exist
if not os.path.exists(path_to_date):
logger.info(f'Creating new directory in {new_directory} for date: {date}')
os.mkdir(path_to_date)
# Create new subdirectories for each sensor if they don't exist
for sensor in self._sensor_directories:
path_to_sensor = os.path.join(path_to_date, sensor)
if not os.path.exists(path_to_sensor):
logger.info(f'Creating new directory in {new_directory}\\{date} for sensor: {sensor}')
os.mkdir(path_to_sensor)
[docs]
def _get_dates(self, directories: list):
"""
Get a unique list of all date subdirectories contained within the provided directories.
Args:
directories (list): A list of all target directories.
Returns:
dates (list): List of unique dates from all directories.
"""
dates = []
# If only one directory to concatenate, just copy the files
# Otherwise, concatenate
if len(directories) != 1:
for directory in directories:
# Add all dates for this directory to the full date list
path_to_dir = os.path.join(self._base_location, directory)
for date in os.listdir(path_to_dir):
dates.append(date)
else:
path_to_dir = os.path.join(self._base_location, directories[0])
dates = os.listdir(path_to_dir)
# Make sure dates only contains unique values
dates = list(set(dates))
return dates
[docs]
def _move_and_symlink_files(self, root_directory: str, target_directory: str):
"""
Move all files from old directory to new directory. For each file, put a symlink to its new location in the
old directory.
Args:
root_directory (str): The old location of files, where they will be replaced by symlinks.
target_directory (str): The new location the files will be moved to.
Raises:
FileExistsError: Raised when a file already exists at the new location
FileNotFoundError: Raised when either the old or new file location cannot be found
OSError: Raised when the old file fails to be moved to the new location, or when a symlink at the old
location already exists.
ValueError: Raised when values in .csv files are not of the same type, causing concatenation to fail.
"""
root_path = os.path.join(self._base_location, root_directory)
target_path = os.path.join(self._base_location, target_directory)
# Cycle through each date and all subdirectories, copying files and leaving symlinks
for date in os.listdir(root_path):
date_path = os.path.join(root_path, date)
for sensor in os.listdir(date_path):
sensor_path = os.path.join(date_path, sensor)
for filename in os.listdir(sensor_path):
# Get path to old and new locations
old_file_path = os.path.join(sensor_path, filename)
# Rename the file by replacing the old hive name (before the first @) with the new file name
name_split = filename.split('@', 1)
new_file_name = target_directory + '@' + name_split[1]
new_file_path = os.path.join(target_path, date, sensor, new_file_name)
# Check that locations exist. If they somehow do not, halt execution.
if not os.path.exists(old_file_path):
raise FileNotFoundError(f'Could not find old file: {old_file_path}.')
path_to_new_directory = os.path.join(target_path, date, sensor)
if not os.path.exists(path_to_new_directory):
raise FileNotFoundError(f'Could not find path to new file location: {path_to_new_directory}.')
# Check that there's nothing already at the new location.
# If there is, and it's a .csv file, concatenate the two.
# If there is a file, but it is not a .csv, halt execution.
# If there is no file, move the old file to the new location
if os.path.exists(new_file_path):
if new_file_path.endswith('.csv'):
try:
self._concatenate_csv(old_file_path, new_file_path, sensor)
logger.info(f'Concatenated {old_file_path} and {new_file_path}')
except ValueError as e:
raise ValueError(f'{old_file_path} and {new_file_path} failed to concatenate.\n{e}')
else:
raise FileExistsError(f'{new_file_path} already exists.')
else:
# Move the file from the old location to the new location
try:
os.rename(old_file_path, new_file_path)
logger.info(f'Moved file from {old_file_path} to {new_file_path}.')
except OSError as e:
raise OSError(f'Failed to move file from {old_file_path} to {new_file_path}.\n{e}')
# Create a symlink to the new location at the old location
try:
os.symlink(new_file_path, old_file_path)
logger.info(f'Symbolic link created at {old_file_path} to {new_file_path}.')
except OSError as e:
files_not_linked = os.listdir(path_to_new_directory)
raise OSError(f'Failed to link files: {files_not_linked}.\n{e}')
[docs]
@staticmethod
def _concatenate_csv(old_file_path: str, new_file_path: str, sensor: str):
"""
Concatenate a pair of .csv files.
Args:
old_file_path (str): Path to the old file.
new_file_path (str): Path to the file in the new location. Both old and new will be concatenated to this
new location.
sensor (str): Name of the sensor for which the .csv is being concatenated.
"""
# List of column names depending on sensor - used for sorting
column_names = {
"airquality": ["1-timestamp", "2-pm10", "3-pm25"], # ?
"temp": ["1-timestamp", "2-temperature", "3-humidity"],
"scale": ["1-timestamp", "2-weight"],
"cpu": ["1-timestamp", "2-cpu", "3-memory"],
"audio": ["1-timestamp", "2-temperature", "3-voltage"],
"video": ["1-timestamp", "2-file_size"]
}
# Read the .csv files into dataframes, labeling the columns depending on which sensor the file is for
old_file = pd.read_csv(old_file_path, names=column_names[sensor])
new_file = pd.read_csv(new_file_path, names=column_names[sensor])
concatenated_csv = pd.concat([old_file, new_file],
ignore_index=True, sort=True)
concatenated_csv.sort_index(axis=1, inplace=True)
# Sort the concatenated dataframe by the column names
concatenated_csv.sort_values(by=list(column_names[sensor]), axis=0, inplace=True)
# Drop any duplicate rows
concatenated_csv.drop_duplicates(inplace=True)
# Reset the index of the dataframe
concatenated_csv.reset_index(drop=True, inplace=True)
# Place concatenated file in new location
concatenated_csv.to_csv(new_file_path, na_rep='nan', header=False, index=False)
# Remove file in old location
os.remove(old_file_path)
def main(target_directories: list):
"""
Main method to run the class.
"""
move_files = MoveFilesToHiveboxLocation(old_directories=target_directories)
move_files.move_and_link()
if __name__ == '__main__':
target_dirs = ['AppMAIS1LB', 'AppMAIS2RB', 'AppMAIS2RC', 'AppMAIS3LB', 'AppMAIS3RB','AppMAIS4LB', 'AppMAIS4RB',
'AppMAIS5LB', 'AppMAIS6RB', 'AppMAIS6RC', 'AppMAIS8LB', 'AppMAIS9LB']
main(target_dirs)