diff --git a/mutual_info.py b/mutual_info.py new file mode 100644 index 0000000000000000000000000000000000000000..c52e40568515b586dacf2d0df6b5a6f062c1f792 --- /dev/null +++ b/mutual_info.py @@ -0,0 +1,29 @@ +import pandas as pandas +import math + +data = pandas.read_csv("dataframe_for_MI.csv") +df = data + +#Asthma data and PM10 data were binned based on domain knowledge +#Asthma was binned into 2 bins; percentages under 10% and over 10% +#PM10 was binned into 3 bins; readings under 40, between 40 and 80 between 80 and 120 +#This was according to air quality categories of good, fair and poor respectively provided by the EPA +#https://www.epa.vic.gov.au/for-community/environmental-information/air-quality/pm10-particles-in-the-air + +asthma_entropy = -0.5 * math.log(0.5,2) - 0.5 * math.log(0.5,2) +print("Entropy value for asthma percentage data: " + str(asthma_entropy)) + +pm10_entropy = -0.25*math.log(0.25,2) - 0.625 * math.log(0.625,2) - 0.125 * math.log(0.125,2) +print("Entropy value for PM10 data: " + str(pm10_entropy)) + +pm10_given_asthma = 0.5*( -0.5 * math.log(0.5, 2) -0.5 * math.log(0.5, 2) + -0.25 * math.log(0.25, 2) - 0.75 * math.log(0.75, 2)) +print("Conditional entropy for PM10 data given values for asthma: " + str(pm10_given_asthma)) + +asthma_given_pm10 = 0.625 * (-0.4 * math.log(0.4, 2) + -0.6 * math.log(0.6, 2)) +print("Conditional entropy for asthma data given values for PM10: " + str(asthma_given_pm10)) + + +mutual_info = pm10_entropy - pm10_given_asthma +normalised_mutual_info = mutual_info / asthma_entropy + +print("Normalised mutual information between the PM10 and asthma data: " + str(normalised_mutual_info)) \ No newline at end of file