# Setup

In [None]:
#@title Install stravalib
!pip install stravalib


In [None]:
#@title Imports

import warnings
warnings.filterwarnings("ignore")

import pandas as pd
import numpy as np
import stravalib.client
from matplotlib import pyplot as plt
import matplotlib as mpl
import seaborn as sns

sns.set()

In [None]:
#@title Unit testing
import unittest

def RunTest(test_case_class):
  suite = unittest.makeSuite(test_case_class)
  runner = unittest.runner.TextTestRunner(verbosity=1)
  runner.run(suite)

In [None]:
#@title Strava layer

class Strava(object):

  def __init__(self, client_id, client_secret):
    self.client = stravalib.client.Client()
    self.client_id = client_id
    self.client_secret = client_secret
  
  @property
  def authorization_url(self):
    return self.client.authorization_url(
        client_id=self.client_id,
        redirect_uri='http://localhost')
    
  def exchange_code_for_token(self, code):
    self.access_token = (
        self.client.exchange_code_for_token(self.client_id,
                                            self.client_secret,
                                            code))
  
  def get_raw_activities(self):
    return self.client.get_activities(limit=1000000)
    import pandas as pd

  def get_raw_activities_as_dataframe(self):
    return pd.DataFrame.from_records(
        map(lambda a: a.to_dict(),
            self.get_raw_activities()))


# Extract

In [None]:
#@title Get Strava authorization code

STRAVA_CLIENT_ID = "" #@param {type: "string"}
STRAVA_CLIENT_SECRET = "" #@param {type: "string"}

strava = Strava(client_id=STRAVA_CLIENT_ID,
                client_secret=STRAVA_CLIENT_SECRET)

print("""Go to

%s

Copy the code from the URL in the browser after having been
redirected from the Strava authorization page.
""" %
strava.authorization_url)

CODE = input()

strava.exchange_code_for_token(CODE)

In [None]:
#@title Download Strava activities
_raw_activities = strava.get_raw_activities_as_dataframe()

# Transform and load

## Functions

In [None]:
#@title add_activity_id

import hashlib

def add_activity_id():
  def op(df):
    cp = df.copy()
    t = pd.to_datetime(df['start_date']).apply(lambda ts: str(ts.value))
    str_to_sha1 = lambda s: hashlib.sha1(s.encode('utf-8')).hexdigest()
    cp['activity_id'] = t.apply(str_to_sha1)
    return cp
  return op

In [None]:
#@title activities_table

def activities_table():
  """
  column            type                description
  ----------------------------------------------------------------
  external_id       string              identifies the activity
  start_date        pandas.Timestamp    start time of the activity
  name              string              name of the activity
  distance          int64               distance in meters
  moving_time       pandas.Timedelta    time spent moving
  elapsed_time      pandas.Timedelta    duration of activity
  elevation_gain    int64               elevation gain in meters
  """

  def select(df):
    return df[[
            'activity_id',
            'start_date',
            'name',
            'type',
            'distance',
            'moving_time',
            'elapsed_time',
            'total_elevation_gain',
          ]]

  def rename(df):
    return df.rename({
      'total_elevation_gain': 'elevation_gain',
    }, axis=1)

  def convert(df):
    cp = df.copy()
    cp['distance'] = (df['distance'] / 1000.)
    cp['elevation_gain'] = df['elevation_gain'].round().astype(int)
    cp['start_date'] = pd.to_datetime(df['start_date'])
    cp['moving_time'] = pd.to_timedelta(df['moving_time']).apply(lambda td: td.total_seconds() / 3600.)
    cp['elapsed_time'] = pd.to_timedelta(df['elapsed_time']).apply(lambda td: td.total_seconds() / 3600.)
    return cp
  
  def where(df):
    return df[df['type'] == 'Run']

  def sort(df):
    return df.sort_values('start_date', ascending=True)

  return lambda df: (df
      .pipe(add_activity_id())
      .pipe(select)
      .pipe(rename)
      .pipe(convert)
      .pipe(sort)
      .pipe(where)
      .reset_index(drop=True))

In [None]:
#@title align_to_period

def align_to_period(period_range : pd.PeriodIndex):
  """Aligns a data frame to a particular period range."""
  def op(df):
    cp = (df
            .set_index('period')
            .align(
                other=period_range.to_frame()[[]],
                join='right',
                axis=0)[0])
    cp.index.rename('period', inplace=True)
    return cp.reset_index()
  return op


class align_to_period_test(unittest.TestCase):

  def test_fills_missing_periods_in_dataframe(self):
    # Arrange
    inp = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q2'), 2),
         (pd.Period('2019Q4'), 4),
        ],
        columns=['period', 'value'])
    q1_q2_q3_q4 = pd.period_range(
      start=pd.Period('2019Q1'),
      end=pd.Period('2019Q4'),
      freq='Q',
    )
    expected = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q1'), None),
         (pd.Period('2019Q2'), 2),
         (pd.Period('2019Q3'), None),
         (pd.Period('2019Q4'), 4),
        ],
        columns=['period', 'value'])
    # Act
    actual = inp.pipe(align_to_period(q1_q2_q3_q4))
    # Assert
    assert actual.equals(expected)

  def test_drops_periods_in_dataframe(self):
    # Arrange
    inp = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q1'), 1),
         (pd.Period('2019Q2'), 2),
         (pd.Period('2019Q3'), 3),
         (pd.Period('2019Q4'), 4),
        ],
        columns=['period', 'value'])
    q2_q3 = pd.period_range(
      start=pd.Period('2019Q2'),
      end=pd.Period('2019Q3'),
      freq='Q',
    )
    expected = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q2'), 2),
         (pd.Period('2019Q3'), 3),
        ],
        columns=['period', 'value'])
    # Act
    actual = inp.pipe(align_to_period(q2_q3))
    # Assert
    assert actual.equals(expected)

RunTest(align_to_period_test)

In [None]:
#@title densify

def densify():
  def op(df):
    if len(df) == 0:
      return df
    return df.pipe(
        align_to_period(
            pd.period_range(
                start=df['period'].min(),
                end=df['period'].max(),
                freq=pd.PeriodIndex(df['period']).freq,
                name='period')))
  return op


class densify_test(unittest.TestCase):

  def test_densify(self):
    # Arrange
    inp = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q2'), 2),
         (pd.Period('2019Q4'), 4),
        ],
        columns=['period', 'value'])
    expected = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q2'), 2),
         (pd.Period('2019Q3'), None),
         (pd.Period('2019Q4'), 4),
        ],
        columns=['period', 'value'])
    # Act
    actual = inp.pipe(densify())
    # Assert
    assert actual.equals(expected)
  
  def test_densify_empty(self):
    # Arrange
    expected = pd.DataFrame(columns=['period', 'value'])
    # Act
    actual = expected.pipe(densify())
    # Assert
    assert actual.equals(expected)

RunTest(densify_test)


In [None]:
#@title fillna

def fillna():
  """Fill all N/A values with the respective zero element."""
  def operator(df):
    r = df.copy()    
    r['moving_time'].fillna(0, inplace=True)
    r['elapsed_time'].fillna(0, inplace=True)
    r['distance'].fillna(0, inplace=True)
    r['elevation_gain'].fillna(0, inplace=True)
    return r
  return operator


class fillna_test(unittest.TestCase):

  def test_fillna(self):
    # Arrange
    inp = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q2'), 24200., 232., 2., 3.,),
         (pd.Period('2019Q3'), None, None, None, None,),
         (pd.Period('2019Q4'), 15999., 843., 1., 2.5,),
        ],
        columns=['period', 'distance', 'elevation_gain', 'moving_time',
                 'elapsed_time'])
    expected = pd.DataFrame.from_records(
        [
         (pd.Period('2019Q2'), 24200., 232., 2., 3.,),
         (pd.Period('2019Q3'), 0., 0., 0., 0.,),
         (pd.Period('2019Q4'), 15999., 843., 1., 2.5,),
        ],
        columns=['period', 'distance', 'elevation_gain', 'moving_time',
                 'elapsed_time'])
    # Act
    actual = inp.pipe(fillna())
    # Assert
    assert actual.equals(expected)
  
  def test_fillna_empty(self):
    # Arrange
    expected = pd.DataFrame(columns=['period', 'distance', 'elevation_gain',
                                     'moving_time', 'elapsed_time'])
    # Act
    actual = expected.pipe(densify())
    # Assert
    assert actual.equals(expected)


RunTest(fillna_test)


In [None]:
#@title aggregate_over_periods

def aggregate_over_periods(freq):
  """Aggregates activities over periods at a given frequency.

  column            type
  ----------------------------------
  period            pandas.Period
  distance          int64
  moving_time       pandas.Timedelta
  elapsed_time      pandas.Timedelta
  elevation_gain    int64 
  """
  def by_period(df):
    cp = df.set_index('start_date').resample(freq, kind='period').agg(
        {
            'distance': np.sum,
            'elevation_gain': np.sum,
            'moving_time': np.sum,
            'elapsed_time': np.sum,
        })
    cp.index.rename('period', inplace=True)
    return cp

  def sort(df):
    return df.sort_values('period', ascending=True)

  return lambda df: (df
                     .pipe(by_period)
                     .reset_index()
                     .pipe(densify())
                     .pipe(fillna())
                     .pipe(sort)
                     .reset_index(drop=True))
  


class aggregate_over_periods_test(unittest.TestCase):

  def test_aggregate_over_periods(self):
    # Arrange
    inp = pd.DataFrame.from_records(
        [
         (pd.Timestamp('2019-01-03 13:01:00 UTC'), 24200.0, 232.0, 2., 3.,),
         (pd.Timestamp('2019-01-04 17:31:00 UTC'), 10000.0, 100.0, 1., 2.,),
         (pd.Timestamp('2019-02-12 09:22:00 UTC'),  5000.0,  20.0, 0.5, 0.75,),
       ],
        columns=['start_date', 'distance', 'elevation_gain', 'moving_time',
                 'elapsed_time'])
    expected = pd.DataFrame.from_records(
        [
         (pd.Period('2019-01'), 34200.0, 332.0, 3., 5.,),
         (pd.Period('2019-02'),  5000.0,  20.0, 0.5, 0.75,),
        ],
        columns=['period', 'distance', 'elevation_gain', 'moving_time',
                 'elapsed_time'])
    # Act
    actual = inp.pipe(aggregate_over_periods('M'))
    # Assert
    assert actual.equals(expected)
  

RunTest(aggregate_over_periods_test)

## Run

In [None]:
#@title Build activities (tidy data) table

_activities = _raw_activities.pipe(activities_table())

_activities.tail()

# Report

## Functions

In [None]:
#@title plot_totals_over_time

def plot_totals_over_time(activities,
                          metric,
                          time_frame,
                          granularity):
  # Convert the user selected granularity into something that pandas
  # understands.
  freq = {
      'years': 'Y',
      'quarters': 'Q',
      'months': 'M',
      'weeks': 'W',
      'days': 'D',
    }[granularity]

  # Determine the start and the end of the time frame based on the user
  # selection.
  activity_periods = activities.pipe(aggregate_over_periods(freq=freq))
  start = None
  end = None
  now = pd.Timestamp.now()
  if time_frame == "all data":
    start = activity_periods['period'].min()
    end = activity_periods['period'].max()
  elif time_frame == "year-to-date":
    start = pd.Timestamp(year=now.year,
             month=1,
             day=1)
    end = now
  elif time_frame == "month-to-date":
    start = pd.Timestamp(
        year=now.year,
        month=now.month,
        day=1)
    end = now
  elif time_frame == "this year":
    start = pd.Timestamp(
        year=now.year,
        month=1,
        day=1)
    end = (start
           + pd.DateOffset(years=1)
           - pd.DateOffset(days=1))
  elif time_frame == "this month":
    start = pd.Timestamp(
        year=now.year,
        month=now.month,
        day=1)
    end = (start
           + pd.DateOffset(months=1)
           - pd.DateOffset(days=1))
  elif time_frame == "last 30 days":
    start = pd.Timestamp.now() - pd.Timedelta('30 days')
    end = pd.Timestamp.now()
  elif time_frame == "last 365 days":
    start = pd.Timestamp.now() - pd.Timedelta('365 days')
    end = pd.Timestamp.now()
  elif time_frame == "last 730 days":
    start = pd.Timestamp.now() - pd.Timedelta('730 days')
    end = pd.Timestamp.now()
  elif time_frame == "last 1095 days":
    start = pd.Timestamp.now() - pd.Timedelta('1095 days')
    end = pd.Timestamp.now()
  else:
    assert False, "not implemented"
  assert start
  assert end

  ax = (activity_periods
        .pipe(align_to_period(
            pd.period_range(
                start=start,
                end=end,
                freq=freq)))
        .set_index('period')[metric]
        .plot(kind='bar'))
  
  ax.set_title({
      'distance': 'distance',
      'elevation_gain': 'elevation gain',
      'elapsed_time': 'elapsed time',
      'moving_time': 'moving time',
    }[metric])
  ax.set_ylabel({
      'distance': 'kilometers',
      'elevation_gain': 'meters',
      'elapsed_time': 'hours',
      'moving_time': 'hours',
    }[metric])
  ax.set_xlabel('')



In [None]:
#@title plot_progress_towards_goal

def plot_progress_towards_annual_goal(activities,
                                      metric,
                                      annual_target,
                                      figsize):
  now = pd.Timestamp.now()
  start = pd.Timestamp(
      year=now.year,
      month=1,
      day=1)
  end = (start
          + pd.DateOffset(years=1)
          - pd.DateOffset(days=1))

  ax = (activities
        .pipe(aggregate_over_periods(freq='Y'))
        .pipe(align_to_period(
            pd.period_range(
                start=start,
                end=end,
                freq='Y')))[[metric]]
        .rename({0: "current"}, axis=0)
        .sort_index(ascending=False)
        .plot.barh(figsize=figsize, legend=False));

  plt.axvline(x=((now - start) / (end - start)) * annual_target,
              color='orange')
  plt.axvline(x=annual_target, color='red')

  ax.set_title({
      'distance': 'distance',
      'elevation_gain': 'elevation gain',
      'elapsed_time': 'elapsed time',
      'moving_time': 'moving time',
    }[metric])
  ax.set_xlabel({
      'distance': 'kilometers',
      'elevation_gain': 'meters',
      'elapsed_time': 'hours',
      'moving_time': 'hours',
    }[metric])
  ax.set_ylabel('');
  return ax
  

## Totals over time

This report allows you to plot the totals for a particular **metric** over a particular **time frame** at a specified **granularity**.

In [None]:
METRIC = "elevation_gain" #@param ["distance", "elevation_gain", "moving_time", "elapsed_time"]
TIME_FRAME = "all data" #@param ["all data", "year-to-date", "month-to-date", "this year", "this quarter", "this month", "last 30 days", "last 365 days", "last 730 days", "last 1095 days"]
GRANULARITY = "years" #@param ["years", "quarters", "months", "weeks", "days"]
WIDTH_SCALING_FACTOR = 1 #@param {type: "number"}

_width = 700.0 / (936.0 / 13.0)
plt.figure(figsize=(WIDTH_SCALING_FACTOR * _width, 3));
plot_totals_over_time(_activities,
                      metric=METRIC,
                      time_frame=TIME_FRAME,
                      granularity=GRANULARITY);
plt.show();


## Progress towards goals

This report allows you to compare progress goals.

In [None]:
#@title Annual goal: distance

ANNUAL_DISTANCE_IN_KILOMETERS = 750 #@param {type: "number"}
WIDTH_SCALING_FACTOR = 1 #@param {type: "number"}

plot_progress_towards_annual_goal(
    activities=_activities,
    metric='distance',
    annual_target=ANNUAL_DISTANCE_IN_KILOMETERS,
    figsize=(WIDTH_SCALING_FACTOR * _width, 2));
plt.show();


In [None]:
#@title Annual goal: elevation gain

ANNUAL_ELEVATION_GAIN_IN_METERS = 24000 #@param {type: "number"}

plot_progress_towards_annual_goal(
    activities=_activities,
    metric='elevation_gain',
    annual_target=ANNUAL_ELEVATION_GAIN_IN_METERS,
    figsize=(WIDTH_SCALING_FACTOR * _width, 2));
plt.show();

In [None]:
#@title plot_sarima_forecast

from statsmodels.tsa.statespace.sarimax import SARIMAX
from random import random

def plot_sarima_forecast(activities,
                         metric,
                         months_to_forecast,
                         months_to_plot,
                         figsize,
                         horizontal):
  actual = (activities
                .pipe(aggregate_over_periods('M'))
                .set_index('period')[[metric]]
                .rename({metric: 'actual'}, axis=1))

  model = SARIMAX(
      actual['actual'],
      order=(1,1,0),
      seasonal_order=(1,1,0,12));

  model_fit = model.fit(disp=True)
  
  predicted = (model_fit
          .predict(start=len(actual),
                   end=len(actual) + months_to_forecast - 1)
          .to_frame()
          .rename({0: 'predicted'}, axis=1))
  if horizontal:
    ax = (pd.concat([actual, predicted])
          .tail(months_to_plot)
          .plot.bar(figsize=figsize, title=metric))
  else:
    ax = (pd.concat([actual, predicted])
          .tail(months_to_plot).iloc[::-1]
          .plot.barh(figsize=figsize, title=metric))
  
  ax.set_title({
      'distance': 'distance',
      'elevation_gain': 'elevation gain',
      'elapsed_time': 'elapsed time',
      'moving_time': 'moving time',
    }[metric])
  ax.set_xlabel({
      'distance': 'kilometers',
      'elevation_gain': 'meters',
      'elapsed_time': 'hours',
      'moving_time': 'hours',
    }[metric])
  ax.set_ylabel('');


In [None]:
#@title SARIMA forecast
METRIC = "distance" #@param ["distance", "elevation_gain", "moving_time", "elapsed_time"]
MONTHS_TO_FORECAST = 3 #@param {type: "number"}
MONTHS_TO_PLOT = 6 #@param {type: "number"}
SCALING_FACTOR = 1 #@param {type: "number"}
HORIZONTAL = True #@param {type: "boolean"}

plot_sarima_forecast(activities=_activities,
                     metric=METRIC,
                     months_to_forecast=MONTHS_TO_FORECAST,
                     months_to_plot=MONTHS_TO_PLOT,
                     figsize=(SCALING_FACTOR * _width, 5)
                              if HORIZONTAL
                              else (5, SCALING_FACTOR * _width),
                     horizontal=HORIZONTAL)