{ "cells": [ { "cell_type": "markdown", "metadata": { "papermill": { "duration": 0.007286, "end_time": "2023-03-28T23:02:02.559630", "exception": false, "start_time": "2023-03-28T23:02:02.552344", "status": "completed" }, "tags": [] }, "source": [ "# Cases in London\n", "\n", "The graphs below use the data from . Daily cases per region is not available, but they do provide the raw data in . Note that the data are now shown by the date the specimen was taken from the person being tested. This gives a much more useful analysis of the progression of cases over time. It does mean that the latest days’ figures are always incomplete, and **only data from 5 days or more ago can be considered complete**. Because of that, in the graphs below, we only use data from 5 days ago or previous." ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:02.577057Z", "iopub.status.busy": "2023-03-28T23:02:02.576332Z", "iopub.status.idle": "2023-03-28T23:02:02.814690Z", "shell.execute_reply": "2023-03-28T23:02:02.815659Z" }, "papermill": { "duration": 0.250758, "end_time": "2023-03-28T23:02:02.816124", "exception": false, "start_time": "2023-03-28T23:02:02.565366", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "import datetime\n", "import gzip\n", "import io\n", "import time\n", "\n", "import pandas as pd\n", "import plotly.graph_objs as go\n", "from plotly.subplots import make_subplots\n", "import requests" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:02.832856Z", "iopub.status.busy": "2023-03-28T23:02:02.832369Z", "iopub.status.idle": "2023-03-28T23:02:02.834754Z", "shell.execute_reply": "2023-03-28T23:02:02.835103Z" }, "papermill": { "duration": 0.011151, "end_time": "2023-03-28T23:02:02.835263", "exception": false, "start_time": "2023-03-28T23:02:02.824112", "status": "completed" }, "tags": [ "parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "secondary_axis = False" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:02.847254Z", "iopub.status.busy": "2023-03-28T23:02:02.846796Z", "iopub.status.idle": "2023-03-28T23:02:02.848905Z", "shell.execute_reply": "2023-03-28T23:02:02.849672Z" }, "papermill": { "duration": 0.009455, "end_time": "2023-03-28T23:02:02.849776", "exception": false, "start_time": "2023-03-28T23:02:02.840321", "status": "completed" }, "tags": [ "injected-parameters" ] }, "outputs": [], "source": [ "# Parameters\n", "secondary_axis = True\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:02.866449Z", "iopub.status.busy": "2023-03-28T23:02:02.865974Z", "iopub.status.idle": "2023-03-28T23:02:02.872583Z", "shell.execute_reply": "2023-03-28T23:02:02.873102Z" }, "papermill": { "duration": 0.018453, "end_time": "2023-03-28T23:02:02.873215", "exception": false, "start_time": "2023-03-28T23:02:02.854762", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "pd.options.plotting.backend = \"plotly\"" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:02.886438Z", "iopub.status.busy": "2023-03-28T23:02:02.885953Z", "iopub.status.idle": "2023-03-28T23:02:03.954365Z", "shell.execute_reply": "2023-03-28T23:02:03.954763Z" }, "papermill": { "duration": 1.076035, "end_time": "2023-03-28T23:02:03.954950", "exception": false, "start_time": "2023-03-28T23:02:02.878915", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "content = requests.get(\"https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv\").content\n", "with gzip.open(f\"coronavirus-cases-{round(time.time())}.csv.gz\", \"wb\") as fp:\n", " fp.write(content)\n", "latest = io.StringIO(content.decode(\"utf-8\"))\n", "df = pd.read_csv(latest)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:03.967729Z", "iopub.status.busy": "2023-03-28T23:02:03.967138Z", "iopub.status.idle": "2023-03-28T23:02:03.979184Z", "shell.execute_reply": "2023-03-28T23:02:03.979779Z" }, "papermill": { "duration": 0.019159, "end_time": "2023-03-28T23:02:03.979888", "exception": false, "start_time": "2023-03-28T23:02:03.960729", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df[\"Specimen date\"] = pd.to_datetime(df[\"Specimen date\"])\n", "most_recent_date = df[\"Specimen date\"].max()\n", "#df = df[df[\"Specimen date\"] < (most_recent_date - datetime.timedelta(days=5))]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:03.992047Z", "iopub.status.busy": "2023-03-28T23:02:03.991614Z", "iopub.status.idle": "2023-03-28T23:02:03.995508Z", "shell.execute_reply": "2023-03-28T23:02:03.995897Z" }, "papermill": { "duration": 0.010994, "end_time": "2023-03-28T23:02:03.995996", "exception": false, "start_time": "2023-03-28T23:02:03.985002", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Lastest data available: 2021-04-26 00:00:00\n", "Last update: 2023-03-28 23:02:03.992771\n" ] } ], "source": [ "print(\"Lastest data available:\", most_recent_date)\n", "print(\"Last update:\", datetime.datetime.utcnow())" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:04.008826Z", "iopub.status.busy": "2023-03-28T23:02:04.008351Z", "iopub.status.idle": "2023-03-28T23:02:04.059335Z", "shell.execute_reply": "2023-03-28T23:02:04.059766Z" }, "papermill": { "duration": 0.05843, "end_time": "2023-03-28T23:02:04.059915", "exception": false, "start_time": "2023-03-28T23:02:04.001485", "status": "completed" }, "pycharm": { "name": "#%%\n" }, "tags": [] }, "outputs": [], "source": [ "def create_region_df(df, region):\n", " df = df[df[\"Area name\"] == region]\n", " df = df.rename(columns={\"Daily lab-confirmed cases\": \"Daily\"})\n", " df = df[[\"Specimen date\", \"Daily\"]]\n", " df = df.groupby(\"Specimen date\").max()\n", "\n", " df[\"Cummulative\"] = df[\"Daily\"].cumsum()\n", "\n", " return df\n", "\n", "\n", "def plot(df, regions):\n", " fig = make_subplots(\n", " cols=1,\n", " rows=len(regions),\n", " shared_xaxes=True,\n", " specs=[[{\"secondary_y\": True}]] * len(regions) if secondary_axis else None,\n", " subplot_titles=regions,\n", " vertical_spacing=0.02\n", " )\n", " for i, region in enumerate(regions):\n", " region_df = create_region_df(df, region)\n", "\n", " fig.add_trace(\n", " go.Bar(\n", " x=region_df.index,\n", " y=region_df.Daily,\n", " name=\"Daily\",\n", " legendgroup=\"Daily\",\n", " marker={\"color\": \"blue\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " row=i + 1,\n", " col=1\n", " )\n", " daily_resample = region_df.resample(\"D\").sum()\n", " rolling = daily_resample.Daily.rolling(20).sum()\n", " fig.add_trace(\n", " go.Scatter(\n", " x=daily_resample.index,\n", " y=rolling,\n", " name=\"Rolling sum (20 days)\",\n", " legendgroup=\"Rolling\",\n", " mode=\"lines+markers\",\n", " marker={\"color\": \"green\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " secondary_y=secondary_axis,\n", " row=i + 1,\n", " col=1\n", " )\n", "\n", " first_good_date = most_recent_date - datetime.timedelta(days=5)\n", " fig.update_layout(shapes=[\n", " {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n", " ])\n", " fig.update_yaxes(title_text=\"Daily Cases\" if secondary_axis else \"Daily Cases & Rolling sum\")\n", " if secondary_axis:\n", " fig.update_yaxes(title_text=\"Rolling sum\", secondary_y=True)\n", " fig.update_layout(height=1800)\n", " fig.show()\n", "\n", "\n", "def plot_cum(df, regions):\n", " fig = make_subplots(\n", " cols=1,\n", " rows=len(regions),\n", " shared_xaxes=True,\n", " subplot_titles=regions,\n", " vertical_spacing=0.02,\n", " )\n", " for i, region in enumerate(regions):\n", " region_df = create_region_df(df, region)\n", "\n", " fig.add_trace(\n", " go.Scatter(\n", " x=region_df.index,\n", " y=region_df.Cummulative,\n", " name=\"Cummulative\",\n", " legendgroup=\"Cummulative\",\n", " mode=\"lines+markers\",\n", " marker={\"color\": \"red\"},\n", " showlegend=True if i == 0 else False,\n", " ),\n", " row=i + 1,\n", " col=1\n", " )\n", "\n", " first_good_date = most_recent_date - datetime.timedelta(days=5)\n", " fig.update_layout(shapes=[\n", " {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n", " ])\n", " fig.update_yaxes(title_text=\"Cummulative Cases\")\n", " fig.update_layout(height=1800)\n", " fig.show()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2023-03-28T23:02:04.073719Z", "iopub.status.busy": "2023-03-28T23:02:04.073314Z", "iopub.status.idle": "2023-03-28T23:02:04.364534Z", "shell.execute_reply": "2023-03-28T23:02:04.363918Z" }, "papermill": { "duration": 0.298833, "end_time": "2023-03-28T23:02:04.364656", "exception": false, "start_time": "2023-03-28T23:02:04.065823", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "