-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathFiltering.py
More file actions
73 lines (55 loc) · 2.03 KB
/
Filtering.py
File metadata and controls
73 lines (55 loc) · 2.03 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
__author__ = 'gatluri'
import twitter
import tweepy
from tweepy import OAuthHandler
import tweepy
import re
import time
consumer_key = 'q0FPYbzctolJ3tCwHc5PBd0IF'
consumer_secret = 'c1zshTpZ0YnoafJueyIpo6StrOTybcKLSqQsRlLbzVrjF0a83j'
access_token = '604340177-SH2zgSFwY3hT9NeTxGBgkBeE1fealGTHwVhb4EAN'
access_secret = 'nq00oY0Bxwv6LhvOdUxf2HUMYL0VTwanPakuVt4sjPduy'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)
api = tweepy.API(auth)
arr = []
f = open('OhioUsers.txt','r+')
count=0
for line in f:
global count
count = count+1
if(count%25==0):
time.sleep(60);
idd = int(filter(str.isdigit, line))
try :
User = api.get_user(idd)
botScore = 0
follower_ratio = User.followers_count/(User.friends_count+1)
spamWords = ["follow", "rt and follow", "follow and rt"]
matchExact = re.compile(r'%s' % '|'.join(spamWords), flags=re.IGNORECASE)
if(follower_ratio <0.001):
botScore= botScore+50;
if(follower_ratio <0.01):
botScore = botScore+20;
if(follower_ratio <0.1):
botScore = botScore+10;
if(User.statuses_count < 50):
botScore = botScore+20;
if(User.statuses_count < 100):
botScore = botScore+10;
timeline = api.user_timeline(idd,count=5)
for tweet in timeline:
len1 = len(re.findall(r"#\S+",tweet.text))
if(len1>3): botScore = botScore+10;
len2 = len(re.findall(r"http\S+",tweet.text))
if(len2>=1): botScore = botScore+20;
len3 = len(matchExact.findall(tweet.text))
if(len3 >=1):
botScore = botScore+25;
if(botScore <125):
ff=open('valid_Ohio_users.txt','a')
print(line)
ff.write(str(line))
ff.write('\n')
except tweepy.error.TweepError:
pass