Skip to content

Commit

Permalink
crawl_2
Browse files Browse the repository at this point in the history
  • Loading branch information
yatan committed Nov 19, 2017
1 parent 0da11c3 commit 1d2c272
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 7 deletions.
36 changes: 32 additions & 4 deletions .vscode/tags
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,43 @@ active_user ../crawler.py /^ active_user = ""$/;" kind:variable line:20
active_user ../crawler.py /^ active_user = sys.argv[1]$/;" kind:variable line:14
active_user ../crawler.py /^active_user = "twittersoz"$/;" kind:variable line:12
actual_crawl ../crawler.py /^actual_crawl = 0$/;" kind:variable line:8
check_existed_link_id ../get_data_from_image.py /^def check_existed_link_id(link_id):$/;" kind:function line:7
check_existed_user ../export_links_db.py /^def check_existed_user(username):$/;" kind:function line:17
concepts ../get_data_from_image.py /^ concepts = get_API(row[2])$/;" kind:variable line:40
crawler.py ../crawler.py 1;" kind:file line:1
driver ../get_data.py /^driver = webdriver.Firefox()$/;" kind:variable line:5
cursor ../export_links_db.py /^cursor = db.cursor()$/;" kind:variable line:27
cursor ../export_users_db.py /^ cursor = db.cursor()$/;" kind:variable line:11
cursor ../export_users_db.py /^cursor = db.cursor()$/;" kind:variable line:17
cursor ../get_data_from_image.py /^cursor = db.cursor()$/;" kind:variable line:28
cursor ../show_users.py /^cursor = db.cursor()$/;" kind:variable line:5
cursor2 ../get_data_from_image.py /^ cursor2 = db.cursor()$/;" kind:variable line:41
db ../export_links_db.py /^db = sqlite3.connect('web\/data\/crawler')$/;" kind:variable line:4
db ../export_users_db.py /^db = sqlite3.connect('web\/data\/crawler')$/;" kind:variable line:7
db ../get_data_from_image.py /^db = sqlite3.connect('.\/web\/data\/crawler')$/;" kind:variable line:5
db ../show_users.py /^db = sqlite3.connect('.\/web\/data\/crawler')$/;" kind:variable line:3
e ../crawler.py /^e = Crawler(active_user)$/;" kind:variable line:89
get_data.py ../get_data.py 1;" kind:file line:1
export_links_db.py ../export_links_db.py 1;" kind:file line:1
export_users_db.py ../export_users_db.py 1;" kind:file line:1
file_exist ../export_users_db.py /^file_exist = os.path.isfile('.\/web\/data\/crawler') $/;" kind:variable line:5
file_links ../get_images_from_profile.py /^ file_links = open("links_" + userbo + ".txt","w") $/;" kind:variable line:29
get_API ../get_data_from_image.py /^def get_API(photo_url):$/;" kind:function line:15
get_data_from_image.py ../get_data_from_image.py 1;" kind:file line:1
get_images_from_profile.py ../get_images_from_profile.py 1;" kind:file line:1
get_img_url ../get_images_from_profile.py /^def get_img_url(url):$/;" kind:function line:5
get_links ../export_links_db.py /^def get_links(username):$/;" kind:function line:7
join ../crawler.py /^ def join( self ):$/;" kind:member line:81
links ../get_images_from_profile.py /^ links = get_img_url(userbo)$/;" kind:variable line:30
llistat ../export_links_db.py /^ llistat = get_links(userbo)$/;" kind:variable line:34
max_crawl ../crawler.py /^max_crawl = 5$/;" kind:variable line:7
parse_users ../crawler.py /^ def parse_users(self, soup):$/;" kind:member line:38
photo_id ../get_data_from_image.py /^ photo_id = str(row[0])$/;" kind:variable line:32
run ../crawler.py /^ def run (self):$/;" kind:member line:27
scanned_users ../crawler.py /^ scanned_users = []$/;" kind:variable line:19
soup ../get_data.py /^soup = BeautifulSoup(driver.page_source)$/;" kind:variable line:8
url ../get_data.py /^url = 'http:\/\/instagram.com\/umnpics\/'$/;" kind:variable line:4
show_users.py ../show_users.py 1;" kind:file line:1
user ../export_links_db.py /^ user = filename.split("links_")[1]$/;" kind:variable line:31
user ../export_users_db.py /^ user = filename.split("users_")[1]$/;" kind:variable line:21
user ../get_images_from_profile.py /^ user = filename.split("users_")[1]$/;" kind:variable line:27
userbo ../export_links_db.py /^ userbo = user.split(".txt")[0]$/;" kind:variable line:32
userbo ../export_users_db.py /^ userbo = user.split(".txt")[0]$/;" kind:variable line:22
userbo ../get_images_from_profile.py /^ userbo = user.split(".txt")[0]$/;" kind:variable line:28
users ../crawler.py /^ users = []$/;" kind:variable line:18
16 changes: 13 additions & 3 deletions crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ def run (self):
# Parse users
self.parse_users(soup)

def check_exist(self, username):
file_users_analized = open("analized_users.txt","r")
loglist = file_users_analized.readlines()
file_users_analized.close()
found = False
for line in loglist:
if str(username) in line:
found = True
return found

def parse_users(self, soup):

Expand All @@ -60,7 +69,7 @@ def parse_users(self, soup):
#print element
file_users.close()
#Add user to users analized
file_users_analized = open("analized_users.txt","w+")
file_users_analized = open("analized_users.txt","w")
file_users_analized.write(self.active_user + "\n")
file_users_analized.close()
actual_crawl -= 1
Expand All @@ -73,8 +82,9 @@ def parse_users(self, soup):
time.sleep(5)
else:
print "worker " + usuari
actual_crawl += 1
self.child = subprocess.Popen([sys.executable, './crawler.py', usuari])
if ( check_exist(usuari) == False):
actual_crawl += 1
self.child = subprocess.Popen([sys.executable, './crawler.py', usuari])



Expand Down

0 comments on commit 1d2c272

Please sign in to comment.