{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Cases in London\n", "\n", "The graphs below use the data from . Daily cases per region is not available, but they do provide the raw data in . Note that the data are now shown by the date the specimen was taken from the person being tested. This gives a much more useful analysis of the progression of cases over time. It does mean that the latest days’ figures are always incomplete, and **only data from 5 days or more ago can be considered complete**. Because of that, in the graphs below, we only use data from 5 days ago or previous." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import datetime\n", "import gzip\n", "import io\n", "import time\n", "\n", "import pandas as pd\n", "import plotly.graph_objs as go\n", "from plotly.subplots import make_subplots\n", "import requests" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "tags": [ "parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "secondary_axis = False" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pd.options.plotting.backend = \"plotly\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "content = requests.get(\"https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv\").content\n", "with gzip.open(f\"coronavirus-cases-{round(time.time())}.csv.gz\", \"wb\") as fp:\n", " fp.write(content)\n", "latest = io.StringIO(content.decode(\"utf-8\"))\n", "df = pd.read_csv(latest)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df[\"Specimen date\"] = pd.to_datetime(df[\"Specimen date\"])\n", "most_recent_date = df[\"Specimen date\"].max()\n", "#df = df[df[\"Specimen date\"] < (most_recent_date - datetime.timedelta(days=5))]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Lastest data available: 2020-08-25 00:00:00\n", "Last update: 2020-08-25 19:32:56.693127\n" ] } ], "source": [ "print(\"Lastest data available:\", most_recent_date)\n", "print(\"Last update:\", datetime.datetime.utcnow())" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "def create_region_df(df, region):\n", " df = df[df[\"Area name\"] == region]\n", " df = df.rename(columns={\"Daily lab-confirmed cases\": \"Daily\"})\n", " df = df[[\"Specimen date\", \"Daily\"]]\n", " df = df.groupby(\"Specimen date\").max()\n", "\n", " df[\"Cummulative\"] = df[\"Daily\"].cumsum()\n", "\n", " return df\n", "\n", "\n", "def plot(df, regions):\n", " fig = make_subplots(\n", " cols=1,\n", " rows=len(regions),\n", " shared_xaxes=True,\n", " specs=[[{\"secondary_y\": True}]] * len(regions) if secondary_axis else None,\n", " subplot_titles=regions,\n", " vertical_spacing=0.02\n", " )\n", " for i, region in enumerate(regions):\n", " region_df = create_region_df(df, region)\n", "\n", " fig.add_trace(\n", " go.Bar(\n", " x=region_df.index,\n", " y=region_df.Daily,\n", " name=\"Daily\",\n", " legendgroup=\"Daily\",\n", " marker={\"color\": \"blue\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " row=i + 1,\n", " col=1\n", " )\n", " daily_resample = region_df.resample(\"D\").sum()\n", " rolling = daily_resample.Daily.rolling(20).sum()\n", " fig.add_trace(\n", " go.Scatter(\n", " x=daily_resample.index,\n", " y=rolling,\n", " name=\"Rolling sum (20 days)\",\n", " legendgroup=\"Rolling\",\n", " mode=\"lines+markers\",\n", " marker={\"color\": \"green\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " secondary_y=secondary_axis,\n", " row=i + 1,\n", " col=1\n", " )\n", "\n", " first_good_date = most_recent_date - datetime.timedelta(days=5)\n", " fig.update_layout(shapes=[\n", " {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n", " ])\n", " fig.update_yaxes(title_text=\"Daily Cases\" if secondary_axis else \"Daily Cases & Rolling sum\")\n", " if secondary_axis:\n", " fig.update_yaxes(title_text=\"Rolling sum\", secondary_y=True)\n", " fig.update_layout(height=1800)\n", " fig.show()\n", "\n", "\n", "def plot_cum(df, regions):\n", " fig = make_subplots(\n", " cols=1,\n", " rows=len(regions),\n", " shared_xaxes=True,\n", " subplot_titles=regions,\n", " vertical_spacing=0.02,\n", " )\n", " for i, region in enumerate(regions):\n", " region_df = create_region_df(df, region)\n", "\n", " fig.add_trace(\n", " go.Scatter(\n", " x=region_df.index,\n", " y=region_df.Cummulative,\n", " name=\"Cummulative\",\n", " legendgroup=\"Cummulative\",\n", " mode=\"lines+markers\",\n", " marker={\"color\": \"red\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " row=i + 1,\n", " col=1\n", " )\n", "\n", " first_good_date = most_recent_date - datetime.timedelta(days=5)\n", " fig.update_layout(shapes=[\n", " {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n", " ])\n", " fig.update_yaxes(title_text=\"Cummulative Cases\")\n", " fig.update_layout(height=1800)\n", " fig.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "