{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"papermill": {
"duration": 0.010824,
"end_time": "2023-09-23T23:02:03.692444",
"exception": false,
"start_time": "2023-09-23T23:02:03.681620",
"status": "completed"
},
"tags": []
},
"source": [
"# Cases in London\n",
"\n",
"The graphs below use the data from . Daily cases per region is not available, but they do provide the raw data in . Note that the data are now shown by the date the specimen was taken from the person being tested. This gives a much more useful analysis of the progression of cases over time. It does mean that the latest days’ figures are always incomplete, and **only data from 5 days or more ago can be considered complete**. Because of that, in the graphs below, we only use data from 5 days ago or previous."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:03.705766Z",
"iopub.status.busy": "2023-09-23T23:02:03.705149Z",
"iopub.status.idle": "2023-09-23T23:02:04.372762Z",
"shell.execute_reply": "2023-09-23T23:02:04.374013Z"
},
"papermill": {
"duration": 0.676314,
"end_time": "2023-09-23T23:02:04.374270",
"exception": false,
"start_time": "2023-09-23T23:02:03.697956",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"import datetime\n",
"import gzip\n",
"import io\n",
"import time\n",
"\n",
"import pandas as pd\n",
"import plotly.graph_objs as go\n",
"from plotly.subplots import make_subplots\n",
"import requests"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:04.390646Z",
"iopub.status.busy": "2023-09-23T23:02:04.389986Z",
"iopub.status.idle": "2023-09-23T23:02:04.392661Z",
"shell.execute_reply": "2023-09-23T23:02:04.393027Z"
},
"papermill": {
"duration": 0.011883,
"end_time": "2023-09-23T23:02:04.393138",
"exception": false,
"start_time": "2023-09-23T23:02:04.381255",
"status": "completed"
},
"tags": [
"parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"secondary_axis = False"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:04.406228Z",
"iopub.status.busy": "2023-09-23T23:02:04.405371Z",
"iopub.status.idle": "2023-09-23T23:02:04.408322Z",
"shell.execute_reply": "2023-09-23T23:02:04.408763Z"
},
"papermill": {
"duration": 0.010535,
"end_time": "2023-09-23T23:02:04.409063",
"exception": false,
"start_time": "2023-09-23T23:02:04.398528",
"status": "completed"
},
"tags": [
"injected-parameters"
]
},
"outputs": [],
"source": [
"# Parameters\n",
"secondary_axis = True\n"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:04.421244Z",
"iopub.status.busy": "2023-09-23T23:02:04.420587Z",
"iopub.status.idle": "2023-09-23T23:02:04.429140Z",
"shell.execute_reply": "2023-09-23T23:02:04.429544Z"
},
"papermill": {
"duration": 0.015803,
"end_time": "2023-09-23T23:02:04.429652",
"exception": false,
"start_time": "2023-09-23T23:02:04.413849",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"pd.options.plotting.backend = \"plotly\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:04.443154Z",
"iopub.status.busy": "2023-09-23T23:02:04.442350Z",
"iopub.status.idle": "2023-09-23T23:02:05.351210Z",
"shell.execute_reply": "2023-09-23T23:02:05.351646Z"
},
"papermill": {
"duration": 0.91668,
"end_time": "2023-09-23T23:02:05.351866",
"exception": false,
"start_time": "2023-09-23T23:02:04.435186",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"content = requests.get(\"https://coronavirus.data.gov.uk/downloads/csv/coronavirus-cases_latest.csv\").content\n",
"with gzip.open(f\"coronavirus-cases-{round(time.time())}.csv.gz\", \"wb\") as fp:\n",
" fp.write(content)\n",
"latest = io.StringIO(content.decode(\"utf-8\"))\n",
"df = pd.read_csv(latest)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:05.364500Z",
"iopub.status.busy": "2023-09-23T23:02:05.364008Z",
"iopub.status.idle": "2023-09-23T23:02:05.373468Z",
"shell.execute_reply": "2023-09-23T23:02:05.373849Z"
},
"papermill": {
"duration": 0.01644,
"end_time": "2023-09-23T23:02:05.373963",
"exception": false,
"start_time": "2023-09-23T23:02:05.357523",
"status": "completed"
},
"tags": []
},
"outputs": [],
"source": [
"df[\"Specimen date\"] = pd.to_datetime(df[\"Specimen date\"])\n",
"most_recent_date = df[\"Specimen date\"].max()\n",
"#df = df[df[\"Specimen date\"] < (most_recent_date - datetime.timedelta(days=5))]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:05.387436Z",
"iopub.status.busy": "2023-09-23T23:02:05.386811Z",
"iopub.status.idle": "2023-09-23T23:02:05.390952Z",
"shell.execute_reply": "2023-09-23T23:02:05.391415Z"
},
"papermill": {
"duration": 0.011937,
"end_time": "2023-09-23T23:02:05.391524",
"exception": false,
"start_time": "2023-09-23T23:02:05.379587",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Lastest data available: 2021-04-26 00:00:00\n",
"Last update: 2023-09-23 23:02:05.387778\n"
]
}
],
"source": [
"print(\"Lastest data available:\", most_recent_date)\n",
"print(\"Last update:\", datetime.datetime.utcnow())"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:05.404697Z",
"iopub.status.busy": "2023-09-23T23:02:05.404261Z",
"iopub.status.idle": "2023-09-23T23:02:05.443096Z",
"shell.execute_reply": "2023-09-23T23:02:05.443476Z"
},
"papermill": {
"duration": 0.046242,
"end_time": "2023-09-23T23:02:05.443590",
"exception": false,
"start_time": "2023-09-23T23:02:05.397348",
"status": "completed"
},
"pycharm": {
"name": "#%%\n"
},
"tags": []
},
"outputs": [],
"source": [
"def create_region_df(df, region):\n",
" df = df[df[\"Area name\"] == region]\n",
" df = df.rename(columns={\"Daily lab-confirmed cases\": \"Daily\"})\n",
" df = df[[\"Specimen date\", \"Daily\"]]\n",
" df = df.groupby(\"Specimen date\").max()\n",
"\n",
" df[\"Cummulative\"] = df[\"Daily\"].cumsum()\n",
"\n",
" return df\n",
"\n",
"\n",
"def plot(df, regions):\n",
" fig = make_subplots(\n",
" cols=1,\n",
" rows=len(regions),\n",
" shared_xaxes=True,\n",
" specs=[[{\"secondary_y\": True}]] * len(regions) if secondary_axis else None,\n",
" subplot_titles=regions,\n",
" vertical_spacing=0.02\n",
" )\n",
" for i, region in enumerate(regions):\n",
" region_df = create_region_df(df, region)\n",
"\n",
" fig.add_trace(\n",
" go.Bar(\n",
" x=region_df.index,\n",
" y=region_df.Daily,\n",
" name=\"Daily\",\n",
" legendgroup=\"Daily\",\n",
" marker={\"color\": \"blue\"},\n",
" showlegend=True if i == 0 else False,\n",
" ),\n",
" row=i + 1,\n",
" col=1\n",
" )\n",
" daily_resample = region_df.resample(\"D\").sum()\n",
" rolling = daily_resample.Daily.rolling(20).sum()\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=daily_resample.index,\n",
" y=rolling,\n",
" name=\"Rolling sum (20 days)\",\n",
" legendgroup=\"Rolling\",\n",
" mode=\"lines+markers\",\n",
" marker={\"color\": \"green\"},\n",
" showlegend=True if i == 0 else False,\n",
" ),\n",
" secondary_y=secondary_axis,\n",
" row=i + 1,\n",
" col=1\n",
" )\n",
"\n",
" first_good_date = most_recent_date - datetime.timedelta(days=5)\n",
" fig.update_layout(shapes=[\n",
" {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n",
" ])\n",
" fig.update_yaxes(title_text=\"Daily Cases\" if secondary_axis else \"Daily Cases & Rolling sum\")\n",
" if secondary_axis:\n",
" fig.update_yaxes(title_text=\"Rolling sum\", secondary_y=True)\n",
" fig.update_layout(height=1800)\n",
" fig.show()\n",
"\n",
"\n",
"def plot_cum(df, regions):\n",
" fig = make_subplots(\n",
" cols=1,\n",
" rows=len(regions),\n",
" shared_xaxes=True,\n",
" subplot_titles=regions,\n",
" vertical_spacing=0.02,\n",
" )\n",
" for i, region in enumerate(regions):\n",
" region_df = create_region_df(df, region)\n",
"\n",
" fig.add_trace(\n",
" go.Scatter(\n",
" x=region_df.index,\n",
" y=region_df.Cummulative,\n",
" name=\"Cummulative\",\n",
" legendgroup=\"Cummulative\",\n",
" mode=\"lines+markers\",\n",
" marker={\"color\": \"red\"},\n",
" showlegend=True if i == 0 else False,\n",
" ),\n",
" row=i + 1,\n",
" col=1\n",
" )\n",
"\n",
" first_good_date = most_recent_date - datetime.timedelta(days=5)\n",
" fig.update_layout(shapes=[\n",
" {\"type\": \"line\", \"yref\": \"paper\", \"y0\": 0, \"y1\": 1, \"xref\": \"x\", \"x0\": first_good_date, \"x1\": first_good_date, \"line\": {\"dash\": \"dot\", \"color\": \"DarkRed\"}}\n",
" ])\n",
" fig.update_yaxes(title_text=\"Cummulative Cases\")\n",
" fig.update_layout(height=1800)\n",
" fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"execution": {
"iopub.execute_input": "2023-09-23T23:02:05.457863Z",
"iopub.status.busy": "2023-09-23T23:02:05.457419Z",
"iopub.status.idle": "2023-09-23T23:02:05.837532Z",
"shell.execute_reply": "2023-09-23T23:02:05.837096Z"
},
"papermill": {
"duration": 0.388019,
"end_time": "2023-09-23T23:02:05.837646",
"exception": false,
"start_time": "2023-09-23T23:02:05.449627",
"status": "completed"
},
"tags": []
},
"outputs": [
{
"data": {
"text/html": [
"