-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindeed.py
142 lines (116 loc) · 4.29 KB
/
indeed.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/usr/bin/python
# -*- coding: utf-8 -*-
import sys, re
from urllib import urlencode,urlopen
AGE = 60 or 'any'
AGE = 60
DIST = 15
LIMIT = 10
SWISS = ['Genève, GE', 'Genève']
regions = ['Paris', 'Rhône-Alpes', 'Essonnes', 'Essonne', 'Lyon', 'Grenoble', 'Genève, GE', 'Annemasse', 'Chambéry', 'Annecy']
postes = ['ingénieur logiciel', 'software engineer', 'développeur', 'devops', 'ingénieur de production']
entreprises = ['CERN', 'CEA', 'INRIA', 'CNRS']
domaines = ['Carte à puce', 'Smartcard', 'Laboratoire']
words = ['python', 'django', 'angular', 'bootstrap', 'responsive']
keywords = postes + [''] + entreprises + [''] + domaines + [''] + words
def build_search_url(**args):
'''http://www.indeed.fr/emplois?as_and=&as_phr=&as_any=&as_not=&as_ttl=&as_cmp=&jt=all&st=&radius=50&l=Paris&fromage=15&limit=50&sort=date&psf=advsrch'''
args['as_and'] = args.get('as_and','')
args['as_phr'] = args.get('as_phr','')
args['as_any'] = args.get('as_any','')
args['as_not'] = args.get('as_not','')
args['as_ttl'] = args.get('as_ttl','')
args['as_cmp'] = args.get('as_cmp','')
args['jt'] = args.get('jt','all') # job type
args['st'] = args.get('st','')
args['radius'] = args.get('radius',DIST)
args['l'] = args.get('l','')
args['fromage'] = args.get('fromage',AGE)
args['limit'] = args.get('limit',LIMIT)
args['sort'] = args.get('sort','date')
args['psf'] = args.get('psf','advsrch')
if args['l'] in SWISS:
url = 'http://www.indeed.ch/Stellen?'
else:
url = 'http://www.indeed.fr/emplois?'
try:
url += urlencode(args)
except:
print args
raise
# print url
return url
def query(all='', any='', exact='', no='', l='Paris', r=DIST, t=AGE):
url = build_search_url(as_and=all, as_any=any, as_not=no, l=l, radius=r, fromage=t)
page = urlopen(url).read()
if l in SWISS:
pattern = '<div id="searchCount">Stellen (.*) - (.*) von (.*)</div>'
else:
pattern = '<div id="searchCount">Emplois (.*) à (.*) sur (.*)</div>'
m = re.search(pattern, page)
if not m:
return 0
else:
n,tot,max = m.group(1).replace('\xc2\xa0',''),m.group(2).replace('\xc2\xa0',''),m.group(3).replace('\xc2\xa0','')
# print n, tot, max
if int(tot) == LIMIT:
return max
else:
return tot
# print query("python",l="Genève")
# sys.exit(0)
# for r in regions:
# for r in ['Fontainebleau']:
# print '\n',r
# for k in keywords:
# print query(k, l=r) if k else '\''
# print query('ingénieur', any="python django", t=30)
# print query('ingénieur', any="django", no="python", t=30)
# print query('ingénieur', any="python django", no="django", t=30)
# print query('ingénieur', any="python", no="django", t=30)
# print query('', any="ingénieur python", no="django", t=30)
# print query('ingénieur python', t=30)
# print query('ingénieur django', t=30)
# print query('ingénieur python django', t=30)
# print query("django")
# print query("python")
# print query("python django")
# print query(any="python django")
# print query("django", no="python")
# print query("django")
# print query("python")
### DISTANCES Vol d'oiseau ###
# Annecy-Genève ou Annemasse 33
# Annecy-Gex 47
# Annecy-Chambéry 40
# Chambéry-Aix les bains 13
# Chambéry-Grenoble 46 (49 d'après indeed)
# Chambéry-Genève 74
# Chambéry-Gex 86
# Chambéry-Lyon 86
# Grenoble-Lyon 94
# Grenoble-Genève 119
# Lyon-Genève 112
# Lyon-Chalon 112
# Lyon-Valence 92
# Lyon-Clermont 135
# Versailles-Paris 17
# Palaiseau-Versailles 13
# Palaiseau-Evry 17
# Evry-Arpajon 14
# Evry-Melun 19
# Melun-Fontainebleau 16
### RECHERCHES ###
# Très Large : Lyon 120 Km
# Large : Chambéry 75-80 Km
# Ciblée: Grenoble 25, Chambéry 25, Annemasse 30? ou Annecy 20, Annemasse 20
#### Popularité des technos :
# Bootstrap, angularjs, backbonejs, web components, rails, django, symphony2,
# print query("python", l="Chambéry", t=60, r=49)
# print query("python", l="Chambéry", t=60, r=10)
# print query("python", l="Grenoble", t=60, r=10)
# print query("python", l="Aix-les-Bains", t=60, r=10)
# print query("python", l="Annecy", t=60, r=10)
# print query("python", l="Bourgoin-Jailleu", t=60, r=10)
# print query("python", l="Albertville", t=60, r=10)
print query("scada", l="Annemasse", t=60)