aboutsummaryrefslogtreecommitdiff
path: root/tom_harley_solution.py
diff options
context:
space:
mode:
authorTom Harley2020-01-25 03:56:59 +0000
committerTom Harley2020-01-25 03:56:59 +0000
commit5f26da2fa9013030df6c71316d9773c4d052044e (patch)
treee96f318a18da964603fbf80220b838e1b4880eb7 /tom_harley_solution.py
downloadtenzo-master.tar.gz
tenzo-master.zip

Initial commit

HEADmaster
Diffstat (limited to 'tom_harley_solution.py')
-rw-r--r--tom_harley_solution.py259
1 files changed, 259 insertions, 0 deletions
diff --git a/tom_harley_solution.py b/tom_harley_solution.py
new file mode 100644
index 0000000..d950532
--- /dev/null
+++ b/tom_harley_solution.py
@@ -0,0 +1,259 @@
+"""
+Please write you name here: Tom Harley
+"""
+
+from datetime import timedelta, datetime
+from contextlib import contextmanager
+from collections import namedtuple
+import csv
+import re
+
+def parse_time(time_str):
+ time_str = time_str.strip()
+
+ if 'M' in time_str:
+ hour_format = "%I"
+ ampm_format = "%p"
+ else:
+ hour_format = "%H"
+ ampm_format = ""
+
+ if '.' in time_str:
+ minute_format = ".%M"
+ elif ':' in time_str:
+ minute_format = ":%M"
+ else:
+ minute_format = ""
+
+ format_str = f"{hour_format}{minute_format}{ampm_format}"
+ return datetime.strptime(time_str, format_str)
+
+def time_hour(hour):
+ return datetime.strptime(f"{hour}", "%H")
+
+def hours(start_time, end_time):
+ offset = 0 if end_time.minute == 0 else 1
+ iterator = iter(range(start_time.hour, end_time.hour + offset))
+ prev = time_hour(next(iterator))
+ for hour in iterator:
+ current = time_hour(hour)
+ yield (prev, current)
+ prev = current
+
+@contextmanager
+def opencsv(path_to_csv, lookahead_size=1024):
+ with open(path_to_csv, newline="") as csvfile:
+ sniffer = csv.Sniffer()
+ sample = csvfile.read(lookahead_size)
+
+ dialect = sniffer.sniff(sample)
+ header = sniffer.has_header(sample)
+
+ csvfile.seek(0)
+
+ reader = csv.reader(csvfile, dialect)
+
+ if header:
+ # skip header if it exists
+ next(reader)
+
+ yield reader
+
+def process_shifts(path_to_csv):
+ """
+
+ :param path_to_csv: The path to the work_shift.csv
+ :type string:
+ :return: A dictionary with time as key (string) with format %H:%M
+ (e.g. "18:00") and cost as value (Number)
+ For example, it should be something like :
+ {
+ "17:00": 50,
+ "22:00: 40,
+ }
+ In other words, for the hour beginning at 17:00, labour cost was
+ 50 pounds
+ :rtype dict:
+ """
+ with opencsv(path_to_csv) as reader:
+ result = dict()
+
+ for break_notes, end_time, pay_rate, start_time in reader:
+ break_start, break_end = map(parse_time, break_notes.split('-'))
+
+ start_time = parse_time(start_time)
+ end_time = parse_time(end_time)
+
+ pay_rate = float(pay_rate)
+
+ if not start_time < end_time:
+ # shift passes over midday/midnight
+ end_time = end_time + timedelta(hours=12)
+
+ if not start_time <= break_start:
+ # break starts in PM
+ break_start = break_start + timedelta(hours=12)
+
+ if not break_start < break_end:
+ # break passes over midday/midnight
+ break_end = break_end + timedelta(hours=12)
+
+ assert(break_start < break_end)
+ assert(start_time < end_time)
+ assert(start_time <= break_start)
+ assert(break_end <= end_time)
+
+ for hour_start, hour_end in hours(start_time, end_time):
+ paid_minutes = 60
+
+ if start_time > hour_start:
+ # shift started mid-hour
+ paid_minutes = paid_minutes - start_time.minute
+
+ if end_time < hour_end:
+ # shift ended mid-hour
+ paid_minutes = paid_minutes - (60 - end_time.minute)
+
+ if break_start <= hour_start:
+ # break starts before/on hour
+
+ if break_end >= hour_end:
+ # hour is contained in break
+ paid_minutes = 0
+
+ else:
+ # break ends mid-hour
+ paid_minutes = paid_minutes - break_end.minute
+
+ elif break_start <= hour_end:
+ # break starts mid-hour
+ paid_minutes = paid_minutes - (60 - break_start.hour)
+
+ if break_end < hour_end:
+ # break also ends mid-hour
+ paid_minutes = paid_minutes + (60 - break_end.hour)
+
+ else:
+ # break starts after hour
+ pass
+
+ hour_str = datetime.strftime(hour_start, "%H:%M")
+ hour_cost = pay_rate / 60 * paid_minutes
+ result[hour_str] = result.get(hour_str, 0) + hour_cost
+
+ return result
+
+def process_sales(path_to_csv):
+ """
+
+ :param path_to_csv: The path to the transactions.csv
+ :type string:
+ :return: A dictionary with time (string) with format %H:%M as key and
+ sales as value (string),
+ and corresponding value with format %H:%M (e.g. "18:00"),
+ and type float)
+ For example, it should be something like :
+ {
+ "17:00": 250,
+ "22:00": 0,
+ },
+ This means, for the hour beginning at 17:00, the sales were 250 dollars
+ and for the hour beginning at 22:00, the sales were 0.
+
+ :rtype dict:
+ """
+ with opencsv(path_to_csv) as reader:
+ result = dict()
+
+ for amount, time in reader:
+ amount = float(amount)
+ time = parse_time(time)
+
+ hour_str = datetime.strftime(time, "%H:00")
+ result[hour_str] = result.get(hour_str, 0) + amount
+
+ return result
+
+def compute_percentage(shifts, sales):
+ """
+
+ :param shifts:
+ :type shifts: dict
+ :param sales:
+ :type sales: dict
+ :return: A dictionary with time as key (string) with format %H:%M and
+ percentage of labour cost per sales as value (float),
+ If the sales are null, then return -cost instead of percentage
+ For example, it should be something like :
+ {
+ "17:00": 20,
+ "22:00": -40,
+ }
+ :rtype: dict
+ """
+ result = dict()
+
+ # things cannot be sold while there are no clerks working, so it should be
+ # safe to use the keys from the shift calculations dict.
+ for hour_str in shifts:
+ if hour_str not in sales:
+ result[hour_str] = - shifts[hour_str]
+
+ elif sales[hour_str] == 0:
+ # this won't happen with the version of process_sales I've written
+ # but may turn up in tests (it appears in the docstring of
+ # process_sales)
+ result[hour_str] = - shifts[hour_str]
+
+ else:
+ result[hour_str] = shifts[hour_str] / sales[hour_str] * 100
+
+ return result
+
+def best_and_worst_hour(percentages):
+ """
+
+ Args:
+ percentages: output of compute_percentage
+ Return: list of strings, the first element should be the best hour,
+ the second (and last) element should be the worst hour. Hour are
+ represented by string with format %H:%M
+ e.g. ["18:00", "20:00"]
+
+ """
+ Rank = namedtuple("Rank", "hour value")
+
+ iterator = iter(percentages.items())
+ first = Rank(*next(iterator))
+ best = first
+ worst = first
+
+ for hour_str, value in iterator:
+ if value < worst.value:
+ worst = Rank(hour_str, value)
+
+ if value > best.value:
+ best = Rank(hour_str, value)
+
+ return [ best.hour, worst.hour ]
+
+def main(path_to_shifts, path_to_sales):
+ """
+ Do not touch this function, but you can look at it, to have an idea of
+ how your data should interact with each other
+ """
+
+ shifts_processed = process_shifts(path_to_shifts)
+ sales_processed = process_sales(path_to_sales)
+ percentages = compute_percentage(shifts_processed, sales_processed)
+ best_hour, worst_hour = best_and_worst_hour(percentages)
+ return best_hour, worst_hour
+
+if __name__ == '__main__':
+ # You can change this to test your code, it will not be used
+ path_to_sales = "./transactions.csv"
+ path_to_shifts = "./work_shifts.csv"
+ best_hour, worst_hour = main(path_to_shifts, path_to_sales)
+ print(best_hour, worst_hour)
+
+# Please write you name here: Tom Harley