Select Git revision
csv_handler.h
harvester.py 3.65 KiB
from tweepy import API
import twitter_harvest.util.utilities as utils
import re
import shapefile
import logging
import threading
import time
import json
import twitter_harvest.util.config as config
def main():
print(countLocations())
format = "%(asctime)s: %(message)s"
logging.basicConfig(filename='harvester.log', filemode='w', format=format,
level=logging.INFO, datefmt="%H:%M:%S")
threads = list()
# Thread for searching for Tweets by GeoLocation
logging.info("Main : create and start thread %d.", 0)
api = utils.TwitterAPI(cred_rank=0)
threadSearchTweets = threading.Thread(target=api.search_tweets,
args=('searchCovid', config.VIC_GeoLocation, ['covid19', 'coronavirus', 'covid-19'],))
threads.append(threadSearchTweets)
threadSearchTweets.start()
# Thread for searching for Tweets by Users
logging.info("Main : create and start thread %d.", 1)
api2 = utils.TwitterAPI(cred_rank=1)
threadSearchUserTweets = threading.Thread(target=searchTimelinesThread,
args=('searchCovid', api2, ['covid19', 'coronavirus', 'covid-19'],))
threads.append(threadSearchUserTweets)
threadSearchUserTweets.start()
# Thread for Streaming Tweets by GeoLocation
logging.info("Main : create and start thread %d.", 2)
streamVIC = utils.TwitterStream(cred_rank=1)
threadStreamTweets = threading.Thread(target=streamVIC.stream,
args=('tweetsVIC.txt', config.VIC_BBox, ['covid19', 'coronavirus', 'covid-19'],))
#threads.append(threadStreamTweets)
#threadStreamTweets.start()
for index, thread in enumerate(threads):
logging.info("Main : before joining thread %d.", index)
thread.join()
logging.info("Main : thread %d done", index)
while True:
for thread in threads:
if not thread.is_alive():
logging.info("Main : Restarting thread !!")
time.sleep(15 * 60)
thread.start()
"""
for index, thread in enumerate(threads):
logging.info("Main : before joining thread %d.", index)
thread.join()
logging.info("Main : thread %d done", index)
"""
def getTwitterUsers():
users_set = set()
filetweetsVIC = open('searchCovid-2020-05-01.txt', 'r')
while True:
try:
line = filetweetsVIC.readline()
jline = json.loads(line)
users_set.add(jline['user']['screen_name'])
except:
break
return list(users_set)
def searchTimelinesThread(nametag, api, filter_list=[]):
# time.sleep(5 * 60)
users = getTwitterUsers()
for user in users:
tweets = api.retrieve_timeline_tweets(nametag,search_user=user, filter_list=filter_list)
def countLocations():
filetweetsVIC = open('searchCovid-2020-05-01.txt', 'r')
countall = 0
countloc = 0
users_set = set()
while True:
try:
line = filetweetsVIC.readline()
jline = json.loads(line)
countall += 1
if jline['coordinates'] is not None or jline['geo'] is not None or jline['place'] is not None:
countloc += 1
users_set.add(jline['user']['screen_name'])
except:
break
return "Location available in {} records out of {} Total, for {} users. ".format(countloc, countall, len(users_set))
def mapSA4():
# sf = shapefile.Reader('SA4_2016_AUST.shx')
# print(sf.shapeTypeName, sf.bbox)
# fields = sf.fields
# shapes = sf.shapes()
# vicshapes = shapes[30:47]
# print(shapes[30].bbox)
return
if __name__ == "__main__":
main()