Skip to content
Snippets Groups Projects
Commit 150f1d54 authored by Felipe Ramos's avatar Felipe Ramos
Browse files

fix to disk storaga data for both search and stream

parent 83d2620b
No related branches found
No related tags found
No related merge requests found
Source diff could not be displayed: it is too large. Options to address this: view the blob.
This diff is collapsed.
...@@ -15,7 +15,7 @@ access_token = "1386612665682853892-Tp6J5KfT4Wr8gsIPYSr2G5W15axUlj" ...@@ -15,7 +15,7 @@ access_token = "1386612665682853892-Tp6J5KfT4Wr8gsIPYSr2G5W15axUlj"
access_secret = "hN59hNhidGKLJpsJ8nX8Dr2EEB4m9ny0w70K2Fd0dpY8Q" access_secret = "hN59hNhidGKLJpsJ8nX8Dr2EEB4m9ny0w70K2Fd0dpY8Q"
outfile = open('data.json', 'r+', encoding ='utf-8') outfile = open('twt_stream.json', 'r+', encoding ='utf-8')
class StreamListener(tweepy.StreamListener): class StreamListener(tweepy.StreamListener):
...@@ -48,8 +48,9 @@ class StreamListener(tweepy.StreamListener): ...@@ -48,8 +48,9 @@ class StreamListener(tweepy.StreamListener):
#db.save(doc) #db.save(doc)
#self.file.append(json.load(doc)) #self.file.append(json.load(doc))
serial = {doc['_id']:doc}
print(doc) print(doc)
outfile.write(str(doc)) outfile.write(str(serial) + '\n')
...@@ -65,7 +66,8 @@ if __name__ == "__main__": ...@@ -65,7 +66,8 @@ if __name__ == "__main__":
auth = tweepy.OAuthHandler(API_key, API_secret) auth = tweepy.OAuthHandler(API_key, API_secret)
auth.set_access_token(access_token, auth.set_access_token(access_token,
access_secret) access_secret)
api = tweepy.API(auth) api = tweepy.API(auth,wait_on_rate_limit=True, wait_on_rate_limit_notify=True)
#outfile = open('twt_stream.json', 'r+', encoding ='utf-8')
if collector_type == 'stream': if collector_type == 'stream':
...@@ -79,7 +81,7 @@ if __name__ == "__main__": ...@@ -79,7 +81,7 @@ if __name__ == "__main__":
box = [144.5,-38.2,145.49,-37.41] #Melb box box = [144.5,-38.2,145.49,-37.41] #Melb box
stream.filter(track=tags, locations= box) stream.filter(track=tags, locations= box, languages=["en"])
...@@ -89,19 +91,19 @@ if __name__ == "__main__": ...@@ -89,19 +91,19 @@ if __name__ == "__main__":
twts = {} twts = {}
# capure the most data # capure the most data
for tweet in api.search(q="AFL tennis footie swimming AustraliaOpen soccer", geocode='-37.7980882,144.9334346,60km', count=100, tweet_mode="extended"): for tweet in api.search(q="AFL OR tennis OR footie OR swimming OR AustraliaOpen OR soccer", geocode='-37.7980882,144.9334346,60km', count=100, tweet_mode="extended"):
tweet = tweet._json #tweet = tweet._json
print("") print("")
print(tweet.user.screen_name) print(tweet.user.screen_name)
print("") print("")
print(tweet.text) print(tweet.full_text )
tmp.append(tweet) tmp.append(tweet)
replies = [] replies = []
for v in tmp: for v in tmp:
twts[v.id] = v twts[v.id_str] = v
# Get retweets # Get retweets
#for tweet in api.retweets(v.id, count=100): #for tweet in api.retweets(v.id, count=100):
# twts[tweet.id] = tweet # twts[tweet.id] = tweet
...@@ -127,11 +129,15 @@ if __name__ == "__main__": ...@@ -127,11 +129,15 @@ if __name__ == "__main__":
#db.save(status._json) #db.save(status._json)
save results to disk #save results to disk
serializable = {k: v._json for k,v in twts.items()} serializable = {k: v._json for k,v in twts.items()}
with open("tweet_results-melb.json", "w") as outfile: for k, v in serializable:
json.dump(twts, outfile) v['region'] = 'Melbourne'
with open("twt_search.json", "w") as outfile:
json.dump(serializable, outfile)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment