Skip to content
Snippets Groups Projects
Commit 16344ac7 authored by evana's avatar evana
Browse files

extract tweet's information

parent 48131472
No related branches found
No related tags found
1 merge request!10Patch 2
import io
import json
from datetime import datetime
import re
from textblob import TextBlob
file1='tweetsWA.json'
output_file=io.open('extractedWA.json','w')
count=1
remove_ms = lambda x:re.sub("\+\d+\s","",x)
mk_dt = lambda x:datetime.strptime(remove_ms(x), "%a %b %d %H:%M:%S %Y")
my_date = lambda x:"{:%Y-%m-%d}".format(mk_dt(x))
my_time = lambda x:"{:%H:%M:%S}".format(mk_dt(x))
with io.open(file1,'r',encoding="utf-8") as f:
for line in f:
try:
my_dict={}
my_dict['id']=json.loads(line)['id']
create_time=json.loads(line)['created_at']
my_dict['date']=my_date(create_time)
my_dict['time']=my_time(create_time)
tweet = json.loads(line)['text']
my_dict['polarity']=TextBlob(tweet).sentiment.polarity
my_dict['lang'] = json.loads(line)['lang']
json.dump(my_dict,output_file,indent=4)
except:
pass
output_file.close()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment