diff --git a/tweet_havester/process.py b/tweet_havester/process.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa334293163315a0f076c2f0ff06257bb968b66a
--- /dev/null
+++ b/tweet_havester/process.py
@@ -0,0 +1,58 @@
+from sklearn.externals import joblib
+import json
+import couchdb
+model = joblib.load("train_model.m")
+
+dataset=[]
+
+user = "admin"
+password = "password"
+dbserver = couchdb.Server("http://admin:password@172.26.38.157:5984/")
+db=dbserver["raw_tweets"]
+r_db =  dbserver["tweet_results"]
+count=0
+dataset=["1"]
+
+####filter cities
+cities =['melbourne','sydney','adelaide','perth','brisbane']
+
+for id in db:
+    tweet=db.get(id)
+    text = tweet['text']
+    lang = tweet['lang']
+    location = tweet['location']
+    create_time = tweet['create_time']
+    user_id = tweet['user_id']
+
+    
+    #find the target city
+    flag = False
+    for city in cities:
+        #the location contains target city names
+        if city in location.lower():
+            #generalize city name
+            flag = True
+            location=city
+    #not in target cities,continue
+    if flag == False:
+         continue
+
+    p_tweet = {
+    '_id':id,
+    'user_id':user_id,
+    "create_time":create_time,
+    "location":location,
+    "lang":lang,
+    'text':text
+    }
+    if lang =='en':
+        dataset[0]=text
+        predicts = model.predict(dataset)
+        if predicts[0]==1:
+            r_db.save(p_tweet) 
+
+
+
+
+
+
diff --git a/tweet_havester/requirement.txt b/tweet_havester/requirement.txt
new file mode 100644
index 0000000000000000000000000000000000000000..51c11cdf9dff5a63c2672e0144b202a022187496
--- /dev/null
+++ b/tweet_havester/requirement.txt
@@ -0,0 +1,3 @@
+CouchDB
+sklearn
+tweepy
\ No newline at end of file
diff --git a/tweet_havester/train_model.m b/tweet_havester/train_model.m
new file mode 100755
index 0000000000000000000000000000000000000000..15152e84b51b5407faea84fd3ab158159a429c63
Binary files /dev/null and b/tweet_havester/train_model.m differ
diff --git a/tweet_havester/tweepy_search.py b/tweet_havester/tweepy_search.py
index 94de0799feab5232cb550377a82726d3c85f0c09..eaedcb2688f26588f123067354da96cbaab0181f 100644
--- a/tweet_havester/tweepy_search.py
+++ b/tweet_havester/tweepy_search.py
@@ -1,16 +1,17 @@
 #  -*- coding: utf-8 -*-
 import json
-import csv
 import os
 import couchdb
 import tweepy
 from tweepy import OAuthHandler
+from sklearn.externals import joblib
 
 
 
 class TweetSearchHavester():
     def __init__(self,couch):
         self.couch = couch
+        self.model = joblib.load("./train_model.m")
 
     def run(self, ids , city):
         dict = {}
@@ -39,6 +40,7 @@ class TweetSearchHavester():
     def get_all_tweets(self, user_id, api):
         new_tweets = api.user_timeline(user_id=user_id, count=50)
         db = self.couch['raw_tweets']
+        process_db = self.couch['tweet_results']
         for tweet in tweepy.Cursor(api.user_timeline,id = user_id ).items(50):
             # save most recent tweets
             dic = {}
@@ -53,6 +55,9 @@ class TweetSearchHavester():
                 dic['location'] = tweet.user.location
             # print(dic)
             try:
+                p_dic = date_process(dic,self.model)
+                if p_dic != None:
+                    process_db.save(p_dic)
                 db.save(dic)
             except:
                 pass
diff --git a/tweet_havester/tweepy_stream.py b/tweet_havester/tweepy_stream.py
index 8d0d330aa1e33532eb315299a2176e42f6e9dbbf..9cb48840a371f2bf2433b2b11758209324968f6c 100644
--- a/tweet_havester/tweepy_stream.py
+++ b/tweet_havester/tweepy_stream.py
@@ -7,10 +7,12 @@ import threading
 from tweepy import OAuthHandler
 from tweepy import Stream
 from tweepy.streaming import StreamListener
+from sklearn.externals import joblib
 
 class listener(StreamListener):
     def __init__(self,path):
         self.couch = couchdb.Server(path)
+        self.model = joblib.load("./train_model.m")
     def convertValue(self,origin):
         dic = {}
         dic['_id'] = origin["id_str"]
@@ -27,10 +29,14 @@ class listener(StreamListener):
         try:
             db = self.couch['raw_tweets']
             id_db = self.couch['user_id']
+            pc_db = self.couch['tweet_results']
             content = json.loads(data)
             dic = self.convertValue(content)
             id_doc = {"_id":str(dic["user_id"]),"user_name":content['user']['name'],"isSearched":False}
             # print(id_doc)
+            p_dic = date_process(dic,self.model)
+            if p_dic != None:
+                process_db.save(p_dic)
             id_db.save(id_doc)
             db.save(dic)
             # print("success")