-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtransform.js
120 lines (112 loc) · 3.46 KB
/
transform.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
const fs = require('fs')
const path = require('path')
const ProgressBar = require('./progress-bar')
const { pdf2Txt } = require('./pdf2txt')
const pb = new ProgressBar('分析进度', 50)
/**
* 读取目录下的文件
* @param {*} resumeDir 简历所在目录
*/
const readDir = function (resumeDir) {
return new Promise((resolve, reject) => {
fs.readdir(path.join(__dirname, resumeDir), function (err, files) {
const dirs = []
;(function iterator(i) {
if (i == files.length) {
// console.log(dirs);
resolve(dirs)
return
}
fs.stat(
path.join(__dirname, resumeDir, files[i]),
function (err, data) {
if (data.isFile()) {
dirs.push(path.join(__dirname, resumeDir, files[i]))
}
iterator(i + 1)
}
)
})(0)
})
})
}
/**
* 提取简历所有的内容
* @param {*} dirs 文件路径数组
* @param {*} keywords 识别关键字数组
*/
const readResume = function (dirs, keywords) {
return new Promise((resolve) => {
let resumes = []
let max = keywords.reduce((prev, curr) => {
return prev + curr.weight
}, 0)
;(function iterator(i) {
pb.render({ completed: i, total: dirs.length })
if (i === dirs.length) {
resolve(resumes)
return
}
pdf2Txt(dirs[i]).then((data) => {
let weight = 0
// 匹配关键字
keywords.map((item) => {
if (data.indexOf(item.key) >= 0) {
weight += item.weight
}
})
const information = {
email: data.match(/[\d\w]+\b@[a-zA-ZA-z0-9]+\.[a-z]+/g), // 邮箱检测
phone: data.match(
/(1[3|4|5|7|8][\d]{9}|0[\d]{2,3}-[\d]{7,8}|400[-]?[\d]{3}[-]?[\d]{4})/g
), // 手机检测
website: data.match(/(http:\/\/|https:\/\/)((\w|=|\?|\.|\/|&|-)+)/g), // 个人网站或者github.com检测
project: data.match(/项目经历|项目经验/g), // 项目经历检测
work: data.match(/工作经历/g), // 工作经历检测
education: data.match(/教育经历/g), // 教育经历检测
age: data.match(/[\d+]{1,2}岁|出生年月/g), // 年龄检测
years: data.match(/[\d+]{1,2}年/g), // 工作年限检测
}
const integrity =
(
(Object.values(information).filter((item) => !!item).length /
Object.keys(information).length) *
100
).toFixed(2) + '%'
const match = ((weight / max) * 100).toFixed(2) + '%'
const name = dirs[i].split('.pdf')[0].split('resume/')[1]
const newItem = {
name,
path: dirs[i],
weight,
match,
integrity,
getContent: () => {
const d = data
return d
},
getInformation: () => {
const d = information
return d
},
}
resumes.push(newItem)
iterator(i + 1)
})
})(0)
})
}
/**
* 提取简历内容
* @param {*} resumeDir 简历所在目录(相对路径)
* @param {*} keywords 简历筛选关键词
*/
const main = async (resumeDir, keywords) => {
const dirs = await readDir(resumeDir)
const pdfs = dirs.filter((item) => item.indexOf('pdf') > 0)
console.log('发现简历:', pdfs.length, '件')
const resumes = await readResume(pdfs, keywords)
console.log('\n内容提取完成')
return resumes
}
exports.main = main