Wednesday, 15 May 2019

Jupyter Notebook Example - Historical currency exchange rates



Exchange Rates

In [6]:
# Import simple warning to ignore
import warnings
warnings.simplefilter('ignore', FutureWarning)

# import pandas
from pandas import *

# Import the plotting modules and set default format
import matplotlib
matplotlib.rcParams['axes.grid'] = True # show gridlines by default

# Show plots in the notepad
%matplotlib inline

import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, FormatStrFormatter,
                               AutoMinorLocator)
import matplotlib.dates as mdates

# Import LXML to read HTML and extract data
from lxml import html
import requests

# Import collections (to obtain the OrderedDict)
import collections
#import csv

# Import date time to handle the dates in the data
from datetime import datetime
In [7]:
# Constants
URLUSSTERLING= "https://www.federalreserve.gov/releases/h10/hist/dat00_uk.htm"
URLUSEURO="https://www.federalreserve.gov/releases/h10/hist/dat00_eu.htm"
DATE="Date"
RATE="Rate"
In [8]:
# Function definitions

# Obtain exchange rate from US Federal Reserve website (via supplied URL)
# Note: the direction of the exchange differs for each currency - Euro and GBP are the same direction
def get_US_exchange_rates(aURL):
    exchangerates=collections.OrderedDict()
# Get the page from the URL
    # print("Get page")
    page = requests.get(aURL)
# Make an HTML tree from the text
    # print("Make HTML page")
    tree = html.fromstring(page.content)
# The data is in a table of class="pubtables"
    r=tree.xpath("//table[@class='pubtables']/tr")
# Iterate over the selected XPATH elements
    # print("Iterate over xpath")
    for x in r:
        if x[1].text_content().strip()!="ND":
            exchangerates[datetime.strptime(x[0].text_content().strip(),"%d-%b-%y")]=float(x[1].text_content().strip())   
            # Note: the values need to be "floated" so the scaling works
    # print("Done")
    return exchangerates

# Functions to obtain scaling for Y axes, rounding up to the next 0.1, down to the next 0.1
def round_up_to_tenth(value):
    return float(int((value*10)+1))/10
def round_down_to_tenth(value):
    return float(int((value*10)-1))/10

# Get the max of the two values rounded up to the next 0.1
def round_up_to_tenth_max_of_two(value1,value2):
    result=round_up_to_tenth(value1)
    roundup2=round_up_to_tenth(value2)
    if result<roundup2:
        result=roundup2
    return result

# Get the max of the two values rounded down to the next 0.1
def round_down_to_tenth_max_of_two(value1,value2):
    result=round_down_to_tenth(value1)
    roundup2=round_down_to_tenth(value2)
    if result>roundup2:
        result=roundup2
    return result
    
In [9]:
# Get US dollar to Sterling rate
c=get_US_exchange_rates(URLUSSTERLING)
print("US rates found")
#c
US rates found
In [10]:
# Make a dataframe from the exchange rates (renaming the columns)
dfus=pandas.DataFrame([x,c[x]] for x in c)
dfus.rename(columns={0: DATE, 1: RATE}, inplace=True)
# Display the last five rows from the data frame
dfus.tail()
Out[10]:
Date Rate
4853 2019-05-06 1.3093
4854 2019-05-07 1.3052
4855 2019-05-08 1.3014
4856 2019-05-09 1.3013
4857 2019-05-10 1.3036
In [11]:
# Get the US dollar to Euro exchange rates
c=get_US_exchange_rates(URLUSEURO)
print("EU rates found")
#c
EU rates found
In [12]:
# Make a dataframe from the exchange rates (renaming the columns)
dfeu=pandas.DataFrame([x,c[x]] for x in c)
dfeu.rename(columns={0: DATE, 1: RATE}, inplace=True)
# Display the last five rows from the data frame (just to sanity check the values)
dfeu.tail()
Out[12]:
Date Rate
4854 2019-05-06 1.1197
4855 2019-05-07 1.1178
4856 2019-05-08 1.1206
4857 2019-05-09 1.1228
4858 2019-05-10 1.1241
In [13]:
# Merge the two dataframes (inner merge so there will be both exchange rates for each date)
df=merge(dfus,dfeu,on=DATE)
df.rename(columns={RATE+"_x": RATE+"_US",RATE+"_y": RATE+"_EU"}, inplace=True)
# Convert US to Euro to Euro to Sterling 
df[RATE+"_EU"]=df[RATE+"_US"]/df[RATE+"_EU"]

# Only include data from the beginning of 2016
df=df[df.Date>"01-Jan-2016"]
df.tail()
Out[13]:
Date Rate_US Rate_EU
4853 2019-05-06 1.3093 1.169331
4854 2019-05-07 1.3052 1.167651
4855 2019-05-08 1.3014 1.161342
4856 2019-05-09 1.3013 1.158978
4857 2019-05-10 1.3036 1.159683
In [14]:
# Plot exchange rates

# Set plot size
fig = plt.figure(figsize=(18, 10))

# Set the X axis format
majorLocator = MultipleLocator(100)
majorFormatter = FormatStrFormatter('%d')
years = mdates.YearLocator()   # every year
months = mdates.MonthLocator()  # every month
yearsFmt = mdates.DateFormatter('%Y')

ax = fig.add_subplot(111)
ax.format_xdata = matplotlib.dates.DateFormatter('%Y-%m-%d')

ax.xaxis.set_major_locator(years)
ax.xaxis.set_major_formatter(yearsFmt)
ax.xaxis.set_minor_locator(months)

# Get maximum and minimum values for the two two plots 
yminimum=round_down_to_tenth_max_of_two(min(df[RATE+"_US"]),min(df[RATE+"_EU"]))
ymaximum=round_up_to_tenth_max_of_two(max(df[RATE+"_US"]),max(df[RATE+"_EU"]))

# Set Y limits and ticks
plt.ylim(ymin=yminimum,ymax=ymaximum)
plt.yticks(np.arange(yminimum, ymaximum,0.1))

# Plot the two sets of exchange rates
ax.plot(df[DATE],df[RATE+"_US"],color='b')
ax.plot(df[DATE],df[RATE+"_EU"],color='r')

# Add the legend and titles
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=2, mode="expand", borderaxespad=0.)
plt.title('Exchange Rates')
plt.xlabel('Date')
plt.ylabel('Rate', multialignment='center')
fig.autofmt_xdate()

Tuesday, 26 March 2019

Jupyter Notebooks


One of the first courses I studied on FutureLearn was "Learn to Code for Data Analysis". It used the Jupyter Notebook to allow the manipulate large amounts of data.

It uses a paradigm similar to spreadsheets with macros/scripting languages but inverts the emphasis. Normally with a spreadsheet, you start with the sheet of data and that is what you see. You then run some code against the data and return to the sheet.

With Jupyter Notebooks you do not look at the (raw) data but manipulate it in the background, you do not even need to look at the raw data (except during the development process).

The language used to manipulate the data is Python. As it is Python you can use the same code that you would use in a stand alone program in your notebook. If you have ever had to think which language you are using (or the version of the language you are using - such as Visual Basic, VB.Net or VBA), this is a great help.

I am not going to cover the installation, there is an installation guide here: 

You can export the results into various forms, including static HTML pages.

A simple demonstration of the power of Jupyter Notebooks

In an earlier posting, I showed a simple Python webscraping function to obtain historical currency exhange rates from the US Federal Reserve. This used a number of standard Python libraries.

Now, if you wanted to undertake analysis of exchange rates over time, you could build upon that Python code, but Jupyter Notebooks offers  a simpler way of manipulating the data. You can just use the Python code and the associated libraries.

I exported the results of the Jupyter Notebook as HTML and copied it into a post on this blog.

If you want to try it yourself, just copy and paste the cell contents into cells on your own Jupyter Notebook.

References




Historical Currency Exchange Rates from the US Federal Reserve (Python)

Sometimes you want historical exchange rates, either to track the values over time, or the value at a particular time.

The US Federal Reserve has information on exchange rates, for example Sterling to US Dollars: https://www.federalreserve.gov/releases/h10/hist/dat00_uk.htm

Note that the direction of the exchange is not constant for all currencies.

Examining the structure of the web page showed the table  containing the data is identified by it having a class of "pubtables".

LXML is used to take the table data and allow it to be searched using XPATH.

The program outputs the data as a CSV file.

# Function to extract exchange rates from US Federal Reserve
from lxml import html
import requests
import collections
import csv
from datetime import datetime

url= "https://www.federalreserve.gov/releases/h10/hist/dat00_uk.htm"

def get_US_exchange_rates(aURL):
    exchangerates=collections.OrderedDict()
# Get the page from the URL
    print("Get page")
    page = requests.get(aURL)
# Make an HTML tree from the text
    print("Make HTML page")
    tree = html.fromstring(page.content)
# The data is in a table of class="pubtables"
    r=tree.xpath("//table[@class='pubtables']/tr")
    print("Iterate over xpath")
    for x in r:
        if x[1].text_content().strip()!="ND":
            exchangerates[datetime.strptime(x[0].text_content().strip(),"%d-%b-%y")]=float(x[1].text_content().strip() )  
    print("Done")
    return exchangerates


# Testing code 
if __name__ == "__main__":
    print ('Exchange Rates:')
    if True:
        c=get_US_exchange_rates(url)
        with open('exchange.csv', 'w') as csvfile:
            exchwriter=csv.writer(csvfile,delimiter=',', quotechar='|', quoting=csv.QUOTE_MINIMAL)
            for x in c:
                print(x,c[x])
                exchwriter.writerow([x,c[x]])