diff --git a/LGA_by_HEALTH.csv b/LGA_by_HEALTH.csv new file mode 100644 index 0000000000000000000000000000000000000000..ccb34704aac9c295733c1b9cb0f3728767a15b69 --- /dev/null +++ b/LGA_by_HEALTH.csv @@ -0,0 +1,9 @@ +LGA_CODE_2011,smokers,heart_disease,pre_obese,obese +21890,12.1,6.6,30.1,19.3 +22670,14.5,9.1,28.1,16.8 +22750,12.2,7.9,32.5,16.6 +23110,11.8,8.1,30.4,21.3 +23810,24.4,5.6,36.6,22.0 +24330,15.7,3.9,34.9,11.9 +27350,14.3,7.0,23.5,12.1 +27450,8.9,6.7,33.1,16.4 diff --git a/LGA_by_HEALTH.py b/LGA_by_HEALTH.py new file mode 100644 index 0000000000000000000000000000000000000000..69cef3f35b53a953a10b5a5668d637bc25b063d1 --- /dev/null +++ b/LGA_by_HEALTH.py @@ -0,0 +1,13 @@ +import pandas as pandas + +df = pandas.read_csv("datasets/2015_LGA_health_profiles.csv", dtype = "string") + +lgas = set(pandas.read_csv("PM10_to_LGA.csv", dtype = "string")["LGA_CODE_2011"].unique()) + +df = df[df[" lga_code"].isin(lgas)] +df = df.rename(columns = {" lga_code": "LGA_CODE_2011", "ppl_reporting_heart_disease_perc": "heart_disease", + " ppl_reporting_being_pre_obese_perc": "pre_obese", " ppl_reporting_being_obese_perc": "obese", " ppl_aged_over_18_who_are_current_smokers_perc": "smokers"}) +df = df.sort_values(["LGA_CODE_2011"]) +df = df.reindex(columns = ["LGA_CODE_2011", "smokers", "heart_disease", "pre_obese", "obese"]) + +df.to_csv(path_or_buf = "LGA_by_HEALTH.csv", index = False) \ No newline at end of file diff --git a/SA2_to_LGA.py b/SA2_to_LGA.py index d2ffa64877d069259b853559bd38915ec946fe20..1ca4df01c7d4d59fc21fdbc1df117bd62a108292 100644 --- a/SA2_to_LGA.py +++ b/SA2_to_LGA.py @@ -5,7 +5,7 @@ df = pandas.read_csv("datasets/2011_SA2_to_LGA_vic.csv", dtype = "string") # Keep only Victorian LGAs #df = df[df["LGA_CODE_2011"].str[0] == "2"] # Only ones in AQ_to_LGA.csv -lgas = set(pandas.read_csv("AQ_to_LGA.csv", dtype = "string")["LGA_CODE_2011"].unique()) +lgas = set(pandas.read_csv("PM10_to_LGA.csv", dtype = "string")["LGA_CODE_2011"].unique()) print(lgas) df = df[df["LGA_CODE_2011"].isin(lgas)]