Skip to content
Snippets Groups Projects
Select Git revision
  • 19e0f6959f9309162f788e8590601864356fded4
  • master default protected
2 results

Tile.py

Blame
  • process.py 882 B
    import io
    import json
    from datetime import datetime
    import re
    from textblob import TextBlob
    
    
    file1='tweetsWA.json'
    
    output_file=io.open('extractedWA.json','w')
    
    count=1
    
    remove_ms = lambda x:re.sub("\+\d+\s","",x)
    mk_dt = lambda x:datetime.strptime(remove_ms(x), "%a %b %d %H:%M:%S %Y")
    my_date = lambda x:"{:%Y-%m-%d}".format(mk_dt(x))
    my_time = lambda x:"{:%H:%M:%S}".format(mk_dt(x))
    
    with io.open(file1,'r',encoding="utf-8") as f:
    	for line in f:
    		try:
    			my_dict={}
    			my_dict['id']=json.loads(line)['id']
    			create_time=json.loads(line)['created_at']
    			my_dict['date']=my_date(create_time)
    			my_dict['time']=my_time(create_time)
    			tweet = json.loads(line)['text']
    			my_dict['polarity']=TextBlob(tweet).sentiment.polarity
    			my_dict['lang'] = json.loads(line)['lang']
    			output_file.write(json.dumps(my_dict)+',\n')
    		except:
    			pass
    			
    output_file.close()