aboutsummaryrefslogblamecommitdiff
path: root/tom_harley_solution.py
blob: d9505328b26fad3e9a1a7e5bc2781a79595aa09d (plain) (tree)


































































































































































































































































                                                                              
"""
Please write you name here: Tom Harley
"""

from datetime import timedelta, datetime
from contextlib import contextmanager
from collections import namedtuple
import csv
import re

def parse_time(time_str):
    time_str = time_str.strip()

    if 'M' in time_str:
        hour_format = "%I"
        ampm_format = "%p"
    else:
        hour_format = "%H"
        ampm_format = ""

    if '.' in time_str:
        minute_format = ".%M"
    elif ':' in time_str:
        minute_format = ":%M"
    else:
        minute_format = ""

    format_str = f"{hour_format}{minute_format}{ampm_format}"
    return datetime.strptime(time_str, format_str)

def time_hour(hour):
    return datetime.strptime(f"{hour}", "%H")

def hours(start_time, end_time):
    offset = 0 if end_time.minute == 0 else 1
    iterator = iter(range(start_time.hour, end_time.hour + offset))
    prev = time_hour(next(iterator))
    for hour in iterator:
        current = time_hour(hour)
        yield (prev, current)
        prev = current

@contextmanager
def opencsv(path_to_csv, lookahead_size=1024):
    with open(path_to_csv, newline="") as csvfile:
        sniffer = csv.Sniffer()
        sample = csvfile.read(lookahead_size)

        dialect = sniffer.sniff(sample)
        header = sniffer.has_header(sample)

        csvfile.seek(0)

        reader = csv.reader(csvfile, dialect)

        if header:
            # skip header if it exists
            next(reader)

        yield reader

def process_shifts(path_to_csv):
    """

    :param path_to_csv: The path to the work_shift.csv
    :type string:
    :return: A dictionary with time as key (string) with format %H:%M
        (e.g. "18:00") and cost as value (Number)
    For example, it should be something like :
    {
        "17:00": 50,
        "22:00: 40,
    }
    In other words, for the hour beginning at 17:00, labour cost was
    50 pounds
    :rtype dict:
    """
    with opencsv(path_to_csv) as reader:
        result = dict()

        for break_notes, end_time, pay_rate, start_time in reader:
            break_start, break_end = map(parse_time, break_notes.split('-'))

            start_time = parse_time(start_time)
            end_time = parse_time(end_time)

            pay_rate = float(pay_rate)

            if not start_time < end_time:
                # shift passes over midday/midnight
                end_time = end_time + timedelta(hours=12)

            if not start_time <= break_start:
                # break starts in PM
                break_start = break_start + timedelta(hours=12)

            if not break_start < break_end:
                # break passes over midday/midnight
                break_end = break_end + timedelta(hours=12)

            assert(break_start < break_end)
            assert(start_time < end_time)
            assert(start_time <= break_start)
            assert(break_end <= end_time)

            for hour_start, hour_end in hours(start_time, end_time):
                paid_minutes = 60

                if start_time > hour_start:
                    # shift started mid-hour
                    paid_minutes = paid_minutes - start_time.minute

                if end_time < hour_end:
                    # shift ended mid-hour
                    paid_minutes = paid_minutes - (60 - end_time.minute)

                if break_start <= hour_start:
                    # break starts before/on hour

                    if break_end >= hour_end:
                        # hour is contained in break
                        paid_minutes = 0

                    else:
                        # break ends mid-hour
                        paid_minutes = paid_minutes - break_end.minute

                elif break_start <= hour_end:
                    # break starts mid-hour
                    paid_minutes = paid_minutes - (60 - break_start.hour)

                    if break_end < hour_end:
                        # break also ends mid-hour
                        paid_minutes = paid_minutes + (60 - break_end.hour)

                else:
                    # break starts after hour
                    pass

                hour_str = datetime.strftime(hour_start, "%H:%M")
                hour_cost = pay_rate / 60 * paid_minutes
                result[hour_str] = result.get(hour_str, 0) + hour_cost

        return result

def process_sales(path_to_csv):
    """

    :param path_to_csv: The path to the transactions.csv
    :type string:
    :return: A dictionary with time (string) with format %H:%M as key and
    sales as value (string),
    and corresponding value with format %H:%M (e.g. "18:00"),
    and type float)
    For example, it should be something like :
    {
        "17:00": 250,
        "22:00": 0,
    },
    This means, for the hour beginning at 17:00, the sales were 250 dollars
    and for the hour beginning at 22:00, the sales were 0.

    :rtype dict:
    """
    with opencsv(path_to_csv) as reader:
        result = dict()

        for amount, time in reader:
            amount = float(amount)
            time = parse_time(time)

            hour_str = datetime.strftime(time, "%H:00")
            result[hour_str] = result.get(hour_str, 0) + amount

        return result

def compute_percentage(shifts, sales):
    """

    :param shifts:
    :type shifts: dict
    :param sales:
    :type sales: dict
    :return: A dictionary with time as key (string) with format %H:%M and
    percentage of labour cost per sales as value (float),
    If the sales are null, then return -cost instead of percentage
    For example, it should be something like :
    {
        "17:00": 20,
        "22:00": -40,
    }
    :rtype: dict
    """
    result = dict()

    # things cannot be sold while there are no clerks working, so it should be
    # safe to use the keys from the shift calculations dict.
    for hour_str in shifts:
        if hour_str not in sales:
            result[hour_str] = - shifts[hour_str]

        elif sales[hour_str] == 0:
            # this won't happen with the version of process_sales I've written
            # but may turn up in tests (it appears in the docstring of
            # process_sales)
            result[hour_str] = - shifts[hour_str]

        else:
            result[hour_str] = shifts[hour_str] / sales[hour_str] * 100

    return result

def best_and_worst_hour(percentages):
    """

    Args:
    percentages: output of compute_percentage
    Return: list of strings, the first element should be the best hour,
    the second (and last) element should be the worst hour. Hour are
    represented by string with format %H:%M
    e.g. ["18:00", "20:00"]

    """
    Rank = namedtuple("Rank", "hour value")

    iterator = iter(percentages.items())
    first = Rank(*next(iterator))
    best = first
    worst = first

    for hour_str, value in iterator:
        if value < worst.value:
            worst = Rank(hour_str, value)

        if value > best.value:
            best = Rank(hour_str, value)

    return [ best.hour, worst.hour ]

def main(path_to_shifts, path_to_sales):
    """
    Do not touch this function, but you can look at it, to have an idea of
    how your data should interact with each other
    """

    shifts_processed = process_shifts(path_to_shifts)
    sales_processed = process_sales(path_to_sales)
    percentages = compute_percentage(shifts_processed, sales_processed)
    best_hour, worst_hour = best_and_worst_hour(percentages)
    return best_hour, worst_hour

if __name__ == '__main__':
    # You can change this to test your code, it will not be used
    path_to_sales = "./transactions.csv"
    path_to_shifts = "./work_shifts.csv"
    best_hour, worst_hour = main(path_to_shifts, path_to_sales)
    print(best_hour, worst_hour)

# Please write you name here: Tom Harley