Skip to content
Snippets Groups Projects
Select Git revision
  • 5e57093ff961341411594b41f558256c15007ff3
  • master default protected
2 results

csv_handler.h

Blame
  • harvester.py 3.65 KiB
    from tweepy import API
    import twitter_harvest.util.utilities as utils
    import re
    import shapefile
    import logging
    import threading
    import time
    import json
    import twitter_harvest.util.config as config
    
    
    def main():
    
        print(countLocations())
    
        format = "%(asctime)s: %(message)s"
        logging.basicConfig(filename='harvester.log', filemode='w', format=format,
                            level=logging.INFO, datefmt="%H:%M:%S")
    
        threads = list()
    
        # Thread for searching for Tweets by GeoLocation
        logging.info("Main    : create and start thread %d.", 0)
        api = utils.TwitterAPI(cred_rank=0)
        threadSearchTweets = threading.Thread(target=api.search_tweets,
                             args=('searchCovid', config.VIC_GeoLocation, ['covid19', 'coronavirus', 'covid-19'],))
        threads.append(threadSearchTweets)
        threadSearchTweets.start()
    
        # Thread for searching for Tweets by Users
        logging.info("Main    : create and start thread %d.", 1)
        api2 = utils.TwitterAPI(cred_rank=1)
        threadSearchUserTweets = threading.Thread(target=searchTimelinesThread,
                             args=('searchCovid', api2, ['covid19', 'coronavirus', 'covid-19'],))
        threads.append(threadSearchUserTweets)
        threadSearchUserTweets.start()
    
        # Thread for Streaming Tweets by GeoLocation
        logging.info("Main    : create and start thread %d.", 2)
        streamVIC = utils.TwitterStream(cred_rank=1)
        threadStreamTweets = threading.Thread(target=streamVIC.stream,
                             args=('tweetsVIC.txt', config.VIC_BBox, ['covid19', 'coronavirus', 'covid-19'],))
        #threads.append(threadStreamTweets)
        #threadStreamTweets.start()
    
        for index, thread in enumerate(threads):
            logging.info("Main    : before joining thread %d.", index)
            thread.join()
            logging.info("Main    : thread %d done", index)
    
        while True:
            for thread in threads:
                if not thread.is_alive():
                    logging.info("Main    : Restarting thread !!")
                    time.sleep(15 * 60)
                    thread.start()
    
        """
        for index, thread in enumerate(threads):
            logging.info("Main    : before joining thread %d.", index)
            thread.join()
            logging.info("Main    : thread %d done", index)
        """
    
    
    def getTwitterUsers():
        users_set = set()
        filetweetsVIC = open('searchCovid-2020-05-01.txt', 'r')
        while True:
            try:
                line = filetweetsVIC.readline()
                jline = json.loads(line)
                users_set.add(jline['user']['screen_name'])
            except:
                break
    
        return list(users_set)
    
    
    def searchTimelinesThread(nametag, api, filter_list=[]):
        # time.sleep(5 * 60)
        users = getTwitterUsers()
        for user in users:
            tweets = api.retrieve_timeline_tweets(nametag,search_user=user, filter_list=filter_list)
    
    
    def countLocations():
        filetweetsVIC = open('searchCovid-2020-05-01.txt', 'r')
        countall = 0
        countloc = 0
        users_set = set()
        while True:
            try:
                line = filetweetsVIC.readline()
                jline = json.loads(line)
                countall += 1
                if jline['coordinates'] is not None or jline['geo'] is not None or jline['place'] is not None:
                    countloc += 1
                    users_set.add(jline['user']['screen_name'])
            except:
                break
    
        return "Location available in {} records out of {} Total, for {} users. ".format(countloc, countall, len(users_set))
    
    def mapSA4():
        # sf = shapefile.Reader('SA4_2016_AUST.shx')
        # print(sf.shapeTypeName, sf.bbox)
        # fields = sf.fields
        # shapes = sf.shapes()
        # vicshapes = shapes[30:47]
        # print(shapes[30].bbox)
        return
    
    
    if __name__ == "__main__":
        main()