{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Cases in London\n",
"\n",
"The graphs below use the data from . Daily cases per region is not available, but they do provide the raw data in . Note that the data are now shown by the date the specimen was taken from the person being tested. This gives a much more useful analysis of the progression of cases over time. It does mean that the latest days’ figures are always incomplete, and **only data from 5 days or more ago can be considered complete**. Because of that, in the graphs below, we only use data from 5 days ago or previous."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import datetime\n",
"import gzip\n",
"import io\n",
"import time\n",
"\n",
"import pandas as pd\n",
"import plotly.graph_objs as go\n",
"from plotly.subplots import make_subplots\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"secondary_axis = False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"pd.options.plotting.backend = \"plotly\""
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"content = requests.get(\"https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv\").content\n",
"with gzip.open(f\"coronavirus-cases-{round(time.time())}.csv.gz\", \"wb\") as fp:\n",
" fp.write(content)\n",
"latest = io.StringIO(content.decode(\"utf-8\"))\n",
"df = pd.read_csv(latest)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"df[\"Specimen date\"] = pd.to_datetime(df[\"Specimen date\"])\n",
"most_recent_date = df[\"Specimen date\"].max()\n",
"#df = df[df[\"Specimen date\"] < (most_recent_date - datetime.timedelta(days=5))]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Lastest data available: 2020-08-25 00:00:00\n",
"Last update: 2020-08-25 19:32:56.693127\n"
]
}
],
"source": [
"print(\"Lastest data available:\", most_recent_date)\n",
"print(\"Last update:\", datetime.datetime.utcnow())"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [],
"source": [
"def create_region_df(df, region):\n",
" df = df[df[\"Area name\"] == region]\n",
" df = df.rename(columns={\"Daily lab-confirmed cases\": \"Daily\"})\n",
" df = df[[\"Specimen date\", \"Daily\"]]\n",
" df = df.groupby(\"Specimen date\").max()\n",
"\n",
" df[\"Cummulative\"] = df[\"Daily\"].cumsum()\n",
"\n",
" return df\n",
"\n",
"\n",
"def plot(df, regions):\n",
" fig = make_subplots(\n",
" cols=1,\n",
" rows=len(regions),\n",
" shared_xaxes=True,\n",
" specs=[[{\"secondary_y\": True}]] * len(regions) if secondary_axis else None,\n",
" subplot_titles=regions,\n",
" vertical_spacing=0.02\n",
" )\n",
" for i, region in enumerate(regions):\n",
" region_df = create_region_df(df, region)\n",
"\n",
" fig.add_trace(\n",
" go.Bar(\n",
" x=region_df.index,\n",
" y=region_df.Daily,\n",
" name=\"Daily\",\n",
" legendgroup=\"Daily\",\n",
" marker={\"color\": \"blue\"},\n",
" showlegend=True if i == 0 else False,\n",
" ),\n",
" row=i + 1,\n",
" col=1\n",
" )\n",
" daily_resample = region_df.resample(\"D\").sum()\n",
" rolling = daily_resample.Daily.rolling(20).sum()\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=daily_resample.index,\n",
" y=rolling,\n",
" name=\"Rolling sum (20 days)\",\n",
" legendgroup=\"Rolling\",\n",
" mode=\"lines+markers\",\n",
" marker={\"color\": \"green\"},\n",
" showlegend=True if i == 0 else False,\n",
" ),\n",
" secondary_y=secondary_axis,\n",
" row=i + 1,\n",
" col=1\n",
" )\n",
"\n",
" first_good_date = most_recent_date - datetime.timedelta(days=5)\n",
" fig.update_layout(shapes=[\n",
" {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n",
" ])\n",
" fig.update_yaxes(title_text=\"Daily Cases\" if secondary_axis else \"Daily Cases & Rolling sum\")\n",
" if secondary_axis:\n",
" fig.update_yaxes(title_text=\"Rolling sum\", secondary_y=True)\n",
" fig.update_layout(height=1800)\n",
" fig.show()\n",
"\n",
"\n",
"def plot_cum(df, regions):\n",
" fig = make_subplots(\n",
" cols=1,\n",
" rows=len(regions),\n",
" shared_xaxes=True,\n",
" subplot_titles=regions,\n",
" vertical_spacing=0.02,\n",
" )\n",
" for i, region in enumerate(regions):\n",
" region_df = create_region_df(df, region)\n",
"\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=region_df.index,\n",
" y=region_df.Cummulative,\n",
" name=\"Cummulative\",\n",
" legendgroup=\"Cummulative\",\n",
" mode=\"lines+markers\",\n",
" marker={\"color\": \"red\"},\n",
" showlegend=True if i == 0 else False,\n",
" ),\n",
" row=i + 1,\n",
" col=1\n",
" )\n",
"\n",
" first_good_date = most_recent_date - datetime.timedelta(days=5)\n",
" fig.update_layout(shapes=[\n",
" {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n",
" ])\n",
" fig.update_yaxes(title_text=\"Cummulative Cases\")\n",
" fig.update_layout(height=1800)\n",
" fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"