diff --git a/.gitignore b/.gitignore index 0485ba8f6b07aff0aaa8ae8d20e6723bcb365a36..0dcb91398ab645ba5745e3f1904af17c845e4194 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ \.DS_Store + +*.pyc diff --git a/tweet_havester/__pycache__/general_process.cpython-37.pyc b/tweet_havester/__pycache__/general_process.cpython-37.pyc index 499ebe437a399bd4deb4396de8a70396355733ba..c8018862355bf6199d19eef0f2404937d361da5d 100644 Binary files a/tweet_havester/__pycache__/general_process.cpython-37.pyc and b/tweet_havester/__pycache__/general_process.cpython-37.pyc differ diff --git a/tweet_havester/__pycache__/tweepy_search.cpython-37.pyc b/tweet_havester/__pycache__/tweepy_search.cpython-37.pyc index 477cbf31596d50ad27e7ef9bd5185cabc4d57899..26a16ae86d26beeec6c17682d00b640f4ed31c2d 100644 Binary files a/tweet_havester/__pycache__/tweepy_search.cpython-37.pyc and b/tweet_havester/__pycache__/tweepy_search.cpython-37.pyc differ diff --git a/tweet_havester/__pycache__/tweepy_stream.cpython-37.pyc b/tweet_havester/__pycache__/tweepy_stream.cpython-37.pyc index 7c2944ac09c80c5722949a52ed3eb6e27d4e1422..c3c196dc734728bffd04af690dbb764be92b9924 100644 Binary files a/tweet_havester/__pycache__/tweepy_stream.cpython-37.pyc and b/tweet_havester/__pycache__/tweepy_stream.cpython-37.pyc differ diff --git a/tweet_havester/general_process.py b/tweet_havester/general_process.py index fdeac5d2db2e951c212cfd8f23abd33cd88ae624..5e093654770980ec40cb413d509230b63c316ffb 100644 --- a/tweet_havester/general_process.py +++ b/tweet_havester/general_process.py @@ -15,15 +15,15 @@ def data_process(tweet,model): return None location = tweet['location'] create_time = tweet['create_time'] - + flag = False for city in cities: #the location contains target city names if city in location.lower(): #generalize city name location=city - else: - return None - + flag = True + if(not flag): + return None p_tweet={ '_id':id, "create_time":create_time, diff --git a/tweet_havester/tweepy_stream.py b/tweet_havester/tweepy_stream.py index 7087200abd534ddfdb9befbec6f5625843931007..cfd8180d5d7efcff276c4ff110e37a7b7a1a6139 100644 --- a/tweet_havester/tweepy_stream.py +++ b/tweet_havester/tweepy_stream.py @@ -37,7 +37,7 @@ class listener(StreamListener): id_doc = {"_id":str(dic["user_id"]),"user_name":content['user']['name'],"isSearched":False} p_dic = gp.data_process(dic,self.model) if p_dic != None: - process_db.save(p_dic) + pc_db.save(p_dic) id_db.save(id_doc) db.save(dic) # print("success")