diff options
Diffstat (limited to 'tom_harley_solution.py')
-rw-r--r-- | tom_harley_solution.py | 259 |
1 files changed, 259 insertions, 0 deletions
diff --git a/tom_harley_solution.py b/tom_harley_solution.py new file mode 100644 index 0000000..d950532 --- /dev/null +++ b/tom_harley_solution.py @@ -0,0 +1,259 @@ +""" +Please write you name here: Tom Harley +""" + +from datetime import timedelta, datetime +from contextlib import contextmanager +from collections import namedtuple +import csv +import re + +def parse_time(time_str): + time_str = time_str.strip() + + if 'M' in time_str: + hour_format = "%I" + ampm_format = "%p" + else: + hour_format = "%H" + ampm_format = "" + + if '.' in time_str: + minute_format = ".%M" + elif ':' in time_str: + minute_format = ":%M" + else: + minute_format = "" + + format_str = f"{hour_format}{minute_format}{ampm_format}" + return datetime.strptime(time_str, format_str) + +def time_hour(hour): + return datetime.strptime(f"{hour}", "%H") + +def hours(start_time, end_time): + offset = 0 if end_time.minute == 0 else 1 + iterator = iter(range(start_time.hour, end_time.hour + offset)) + prev = time_hour(next(iterator)) + for hour in iterator: + current = time_hour(hour) + yield (prev, current) + prev = current + +@contextmanager +def opencsv(path_to_csv, lookahead_size=1024): + with open(path_to_csv, newline="") as csvfile: + sniffer = csv.Sniffer() + sample = csvfile.read(lookahead_size) + + dialect = sniffer.sniff(sample) + header = sniffer.has_header(sample) + + csvfile.seek(0) + + reader = csv.reader(csvfile, dialect) + + if header: + # skip header if it exists + next(reader) + + yield reader + +def process_shifts(path_to_csv): + """ + + :param path_to_csv: The path to the work_shift.csv + :type string: + :return: A dictionary with time as key (string) with format %H:%M + (e.g. "18:00") and cost as value (Number) + For example, it should be something like : + { + "17:00": 50, + "22:00: 40, + } + In other words, for the hour beginning at 17:00, labour cost was + 50 pounds + :rtype dict: + """ + with opencsv(path_to_csv) as reader: + result = dict() + + for break_notes, end_time, pay_rate, start_time in reader: + break_start, break_end = map(parse_time, break_notes.split('-')) + + start_time = parse_time(start_time) + end_time = parse_time(end_time) + + pay_rate = float(pay_rate) + + if not start_time < end_time: + # shift passes over midday/midnight + end_time = end_time + timedelta(hours=12) + + if not start_time <= break_start: + # break starts in PM + break_start = break_start + timedelta(hours=12) + + if not break_start < break_end: + # break passes over midday/midnight + break_end = break_end + timedelta(hours=12) + + assert(break_start < break_end) + assert(start_time < end_time) + assert(start_time <= break_start) + assert(break_end <= end_time) + + for hour_start, hour_end in hours(start_time, end_time): + paid_minutes = 60 + + if start_time > hour_start: + # shift started mid-hour + paid_minutes = paid_minutes - start_time.minute + + if end_time < hour_end: + # shift ended mid-hour + paid_minutes = paid_minutes - (60 - end_time.minute) + + if break_start <= hour_start: + # break starts before/on hour + + if break_end >= hour_end: + # hour is contained in break + paid_minutes = 0 + + else: + # break ends mid-hour + paid_minutes = paid_minutes - break_end.minute + + elif break_start <= hour_end: + # break starts mid-hour + paid_minutes = paid_minutes - (60 - break_start.hour) + + if break_end < hour_end: + # break also ends mid-hour + paid_minutes = paid_minutes + (60 - break_end.hour) + + else: + # break starts after hour + pass + + hour_str = datetime.strftime(hour_start, "%H:%M") + hour_cost = pay_rate / 60 * paid_minutes + result[hour_str] = result.get(hour_str, 0) + hour_cost + + return result + +def process_sales(path_to_csv): + """ + + :param path_to_csv: The path to the transactions.csv + :type string: + :return: A dictionary with time (string) with format %H:%M as key and + sales as value (string), + and corresponding value with format %H:%M (e.g. "18:00"), + and type float) + For example, it should be something like : + { + "17:00": 250, + "22:00": 0, + }, + This means, for the hour beginning at 17:00, the sales were 250 dollars + and for the hour beginning at 22:00, the sales were 0. + + :rtype dict: + """ + with opencsv(path_to_csv) as reader: + result = dict() + + for amount, time in reader: + amount = float(amount) + time = parse_time(time) + + hour_str = datetime.strftime(time, "%H:00") + result[hour_str] = result.get(hour_str, 0) + amount + + return result + +def compute_percentage(shifts, sales): + """ + + :param shifts: + :type shifts: dict + :param sales: + :type sales: dict + :return: A dictionary with time as key (string) with format %H:%M and + percentage of labour cost per sales as value (float), + If the sales are null, then return -cost instead of percentage + For example, it should be something like : + { + "17:00": 20, + "22:00": -40, + } + :rtype: dict + """ + result = dict() + + # things cannot be sold while there are no clerks working, so it should be + # safe to use the keys from the shift calculations dict. + for hour_str in shifts: + if hour_str not in sales: + result[hour_str] = - shifts[hour_str] + + elif sales[hour_str] == 0: + # this won't happen with the version of process_sales I've written + # but may turn up in tests (it appears in the docstring of + # process_sales) + result[hour_str] = - shifts[hour_str] + + else: + result[hour_str] = shifts[hour_str] / sales[hour_str] * 100 + + return result + +def best_and_worst_hour(percentages): + """ + + Args: + percentages: output of compute_percentage + Return: list of strings, the first element should be the best hour, + the second (and last) element should be the worst hour. Hour are + represented by string with format %H:%M + e.g. ["18:00", "20:00"] + + """ + Rank = namedtuple("Rank", "hour value") + + iterator = iter(percentages.items()) + first = Rank(*next(iterator)) + best = first + worst = first + + for hour_str, value in iterator: + if value < worst.value: + worst = Rank(hour_str, value) + + if value > best.value: + best = Rank(hour_str, value) + + return [ best.hour, worst.hour ] + +def main(path_to_shifts, path_to_sales): + """ + Do not touch this function, but you can look at it, to have an idea of + how your data should interact with each other + """ + + shifts_processed = process_shifts(path_to_shifts) + sales_processed = process_sales(path_to_sales) + percentages = compute_percentage(shifts_processed, sales_processed) + best_hour, worst_hour = best_and_worst_hour(percentages) + return best_hour, worst_hour + +if __name__ == '__main__': + # You can change this to test your code, it will not be used + path_to_sales = "./transactions.csv" + path_to_shifts = "./work_shifts.csv" + best_hour, worst_hour = main(path_to_shifts, path_to_sales) + print(best_hour, worst_hour) + +# Please write you name here: Tom Harley |