{ "cells": [ { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.010913, "end_time": "2022-05-24T16:02:03.568550", "exception": false, "start_time": "2022-05-24T16:02:03.557637", "status": "completed" }, "tags": [] }, "source": [ "# Cases in London\n", "\n", "The graphs below use the data from . Daily cases per region is not available, but they do provide the raw data in . Note that the data are now shown by the date the specimen was taken from the person being tested. This gives a much more useful analysis of the progression of cases over time. It does mean that the latest days’ figures are always incomplete, and **only data from 5 days or more ago can be considered complete**. Because of that, in the graphs below, we only use data from 5 days ago or previous." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:03.590185Z", "iopub.status.busy": "2022-05-24T16:02:03.589375Z", "iopub.status.idle": "2022-05-24T16:02:03.964636Z", "shell.execute_reply": "2022-05-24T16:02:03.965549Z" }, "papermill": { "duration": 0.389362, "end_time": "2022-05-24T16:02:03.965847", "exception": false, "start_time": "2022-05-24T16:02:03.576485", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import datetime\n", "import gzip\n", "import io\n", "import time\n", "\n", "import pandas as pd\n", "import plotly.graph_objs as go\n", "from plotly.subplots import make_subplots\n", "import requests" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:03.986939Z", "iopub.status.busy": "2022-05-24T16:02:03.986132Z", "iopub.status.idle": "2022-05-24T16:02:03.989601Z", "shell.execute_reply": "2022-05-24T16:02:03.990215Z" }, "papermill": { "duration": 0.016099, "end_time": "2022-05-24T16:02:03.990398", "exception": false, "start_time": "2022-05-24T16:02:03.974299", "status": "completed" }, "tags": [ "parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "secondary_axis = False" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:04.008425Z", "iopub.status.busy": "2022-05-24T16:02:04.007682Z", "iopub.status.idle": "2022-05-24T16:02:04.011144Z", "shell.execute_reply": "2022-05-24T16:02:04.011767Z" }, "papermill": { "duration": 0.013992, "end_time": "2022-05-24T16:02:04.011941", "exception": false, "start_time": "2022-05-24T16:02:03.997949", "status": "completed" }, "tags": [ "injected-parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "secondary_axis = True\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:04.030409Z", "iopub.status.busy": "2022-05-24T16:02:04.029642Z", "iopub.status.idle": "2022-05-24T16:02:04.041250Z", "shell.execute_reply": "2022-05-24T16:02:04.041827Z" }, "papermill": { "duration": 0.022333, "end_time": "2022-05-24T16:02:04.042002", "exception": false, "start_time": "2022-05-24T16:02:04.019669", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "pd.options.plotting.backend = \"plotly\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:04.060597Z", "iopub.status.busy": "2022-05-24T16:02:04.059822Z", "iopub.status.idle": "2022-05-24T16:02:05.039458Z", "shell.execute_reply": "2022-05-24T16:02:05.040067Z" }, "papermill": { "duration": 0.990499, "end_time": "2022-05-24T16:02:05.040359", "exception": false, "start_time": "2022-05-24T16:02:04.049860", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "content = requests.get(\"https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv\").content\n", "with gzip.open(f\"coronavirus-cases-{round(time.time())}.csv.gz\", \"wb\") as fp:\n", " fp.write(content)\n", "latest = io.StringIO(content.decode(\"utf-8\"))\n", "df = pd.read_csv(latest)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:05.060748Z", "iopub.status.busy": "2022-05-24T16:02:05.059976Z", "iopub.status.idle": "2022-05-24T16:02:05.074781Z", "shell.execute_reply": "2022-05-24T16:02:05.075423Z" }, "papermill": { "duration": 0.026406, "end_time": "2022-05-24T16:02:05.075630", "exception": false, "start_time": "2022-05-24T16:02:05.049224", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df[\"Specimen date\"] = pd.to_datetime(df[\"Specimen date\"])\n", "most_recent_date = df[\"Specimen date\"].max()\n", "#df = df[df[\"Specimen date\"] < (most_recent_date - datetime.timedelta(days=5))]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:05.094859Z", "iopub.status.busy": "2022-05-24T16:02:05.094104Z", "iopub.status.idle": "2022-05-24T16:02:05.099645Z", "shell.execute_reply": "2022-05-24T16:02:05.100290Z" }, "papermill": { "duration": 0.016663, "end_time": "2022-05-24T16:02:05.100476", "exception": false, "start_time": "2022-05-24T16:02:05.083813", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Lastest data available: 2021-04-26 00:00:00\n", "Last update: 2022-05-24 16:02:05.095429\n" ] } ], "source": [ "print(\"Lastest data available:\", most_recent_date)\n", "print(\"Last update:\", datetime.datetime.utcnow())" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:05.121017Z", "iopub.status.busy": "2022-05-24T16:02:05.120223Z", "iopub.status.idle": "2022-05-24T16:02:05.183538Z", "shell.execute_reply": "2022-05-24T16:02:05.184142Z" }, "papermill": { "duration": 0.07529, "end_time": "2022-05-24T16:02:05.184373", "exception": false, "start_time": "2022-05-24T16:02:05.109083", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "def create_region_df(df, region):\n", " df = df[df[\"Area name\"] == region]\n", " df = df.rename(columns={\"Daily lab-confirmed cases\": \"Daily\"})\n", " df = df[[\"Specimen date\", \"Daily\"]]\n", " df = df.groupby(\"Specimen date\").max()\n", "\n", " df[\"Cummulative\"] = df[\"Daily\"].cumsum()\n", "\n", " return df\n", "\n", "\n", "def plot(df, regions):\n", " fig = make_subplots(\n", " cols=1,\n", " rows=len(regions),\n", " shared_xaxes=True,\n", " specs=[[{\"secondary_y\": True}]] * len(regions) if secondary_axis else None,\n", " subplot_titles=regions,\n", " vertical_spacing=0.02\n", " )\n", " for i, region in enumerate(regions):\n", " region_df = create_region_df(df, region)\n", "\n", " fig.add_trace(\n", " go.Bar(\n", " x=region_df.index,\n", " y=region_df.Daily,\n", " name=\"Daily\",\n", " legendgroup=\"Daily\",\n", " marker={\"color\": \"blue\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " row=i + 1,\n", " col=1\n", " )\n", " daily_resample = region_df.resample(\"D\").sum()\n", " rolling = daily_resample.Daily.rolling(20).sum()\n", " fig.add_trace(\n", " go.Scatter(\n", " x=daily_resample.index,\n", " y=rolling,\n", " name=\"Rolling sum (20 days)\",\n", " legendgroup=\"Rolling\",\n", " mode=\"lines+markers\",\n", " marker={\"color\": \"green\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " secondary_y=secondary_axis,\n", " row=i + 1,\n", " col=1\n", " )\n", "\n", " first_good_date = most_recent_date - datetime.timedelta(days=5)\n", " fig.update_layout(shapes=[\n", " {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n", " ])\n", " fig.update_yaxes(title_text=\"Daily Cases\" if secondary_axis else \"Daily Cases & Rolling sum\")\n", " if secondary_axis:\n", " fig.update_yaxes(title_text=\"Rolling sum\", secondary_y=True)\n", " fig.update_layout(height=1800)\n", " fig.show()\n", "\n", "\n", "def plot_cum(df, regions):\n", " fig = make_subplots(\n", " cols=1,\n", " rows=len(regions),\n", " shared_xaxes=True,\n", " subplot_titles=regions,\n", " vertical_spacing=0.02,\n", " )\n", " for i, region in enumerate(regions):\n", " region_df = create_region_df(df, region)\n", "\n", " fig.add_trace(\n", " go.Scatter(\n", " x=region_df.index,\n", " y=region_df.Cummulative,\n", " name=\"Cummulative\",\n", " legendgroup=\"Cummulative\",\n", " mode=\"lines+markers\",\n", " marker={\"color\": \"red\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " row=i + 1,\n", " col=1\n", " )\n", "\n", " first_good_date = most_recent_date - datetime.timedelta(days=5)\n", " fig.update_layout(shapes=[\n", " {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n", " ])\n", " fig.update_yaxes(title_text=\"Cummulative Cases\")\n", " fig.update_layout(height=1800)\n", " fig.show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2022-05-24T16:02:05.205468Z", "iopub.status.busy": "2022-05-24T16:02:05.204700Z", "iopub.status.idle": "2022-05-24T16:02:05.628790Z", "shell.execute_reply": "2022-05-24T16:02:05.628088Z" }, "papermill": { "duration": 0.435305, "end_time": "2022-05-24T16:02:05.628968", "exception": false, "start_time": "2022-05-24T16:02:05.193663", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "