Select Git revision
process.py 882 B
import io
import json
from datetime import datetime
import re
from textblob import TextBlob
file1='tweetsWA.json'
output_file=io.open('extractedWA.json','w')
count=1
remove_ms = lambda x:re.sub("\+\d+\s","",x)
mk_dt = lambda x:datetime.strptime(remove_ms(x), "%a %b %d %H:%M:%S %Y")
my_date = lambda x:"{:%Y-%m-%d}".format(mk_dt(x))
my_time = lambda x:"{:%H:%M:%S}".format(mk_dt(x))
with io.open(file1,'r',encoding="utf-8") as f:
for line in f:
try:
my_dict={}
my_dict['id']=json.loads(line)['id']
create_time=json.loads(line)['created_at']
my_dict['date']=my_date(create_time)
my_dict['time']=my_time(create_time)
tweet = json.loads(line)['text']
my_dict['polarity']=TextBlob(tweet).sentiment.polarity
my_dict['lang'] = json.loads(line)['lang']
output_file.write(json.dumps(my_dict)+',\n')
except:
pass
output_file.close()