diff --git a/ovi.py b/ovi.py new file mode 100644 index 0000000000000000000000000000000000000000..3dedde68bc270929d80a9c0af8b1c14fa9ad360b --- /dev/null +++ b/ovi.py @@ -0,0 +1,40 @@ +import pandas as pandas +import matplotlib.pyplot as plt + +# Retrieve the maximum daily average for 2016 PM10, and then index by the appropriate LGA code +MAX_24HR = pandas.read_csv("PM10_MAX_24HR_READING.csv") +PM10_to_LGA = pandas.read_csv("PM10_to_LGA.csv") +data = MAX_24HR +data["LGA_CODE_2011"] = PM10_to_LGA["LGA_CODE_2011"] +data = data.set_index("LGA_CODE_2011") + +# Add the adult smoker rate to this data +LGA_by_HEALTH = pandas.read_csv("LGA_by_HEALTH.csv") +LGA_by_HEALTH = LGA_by_HEALTH.set_index("LGA_CODE_2011") +data["smokers"] = LGA_by_HEALTH["smokers"] + +# Average out all SA2 data for each LGA +SA2_by_DISEASES = pandas.read_csv("SA2_by_DISEASES.csv") +SA2_to_LGA = pandas.read_csv("SA2_to_LGA.csv") +LGA_by_DISEASES = SA2_by_DISEASES.join(SA2_to_LGA.set_index("SA2_MAINCODE_2011"), on="SA2_MAINCODE_2011") +LGA_by_DISEASES = LGA_by_DISEASES.drop("SA2_MAINCODE_2011", axis = 1) +LGA_by_DISEASES = LGA_by_DISEASES.groupby(["LGA_CODE_2011"]).mean() + +# Add asthma data +data["asthma"] = LGA_by_DISEASES["asthma"] +data = data.reset_index() +data = data.sort_values(["asthma"]) + +fig, ax = plt.subplots() + +# Maximum PM10 24 hour average against asthma incidence for local government area +pl1 = ax.scatter(data["asthma"], data["value"], c = "b") +ax.set_ylabel("Maximum PM10 24 hour reading (2016, μg/m³)", color = "blue") +ax.set_xlabel("Asthma incidence (percentage)") +fig.savefig("ovi_asthma_vs_pm1024hr.png", bbox_inches='tight') + +# Now add against smoker rate +ax2 = ax.twinx() +pl2 = ax2.scatter(data["asthma"], data["smokers"], c = "r") +ax2.set_ylabel("Adult smokers rate (percentage)", color = "red") +fig.savefig("ovi_asthma_vs_pm1024hr_smokers.png", bbox_inches='tight') \ No newline at end of file