-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTs_request.py
87 lines (77 loc) · 2.77 KB
/
Ts_request.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 7 16:57:05 2021
@author: Shine'lon
"""
import re
import requests
from lxml import etree
from selenium import webdriver
# import time
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
def ts_request(urls_queue,episode_url):#具体集的地址
r=requests.get(episode_url)
html=r.text
x=etree.HTML(html)
temp=x.xpath('/html/body/div[1]/div/div[2]/div[2]/div[1]/script[1]/text()')
temp1=re.search('url.*?(http.*?)/sha.*?',temp[0])
browser=webdriver.Chrome()
browser.get(episode_url)
browser.switch_to.frame(browser.find_element_by_xpath('/html/body/div[1]/div/div[2]/div[2]/div[1]/iframe[1]'))
wait=WebDriverWait(browser,60)
while True:
try :
# time.sleep(20)
wait.until(EC.presence_of_element_located((By.XPATH,'/html/body/script[5]')))
ihtml=browser.page_source
browser.close()
print(ihtml)
break
except Exception as timeout_err:
print(timeout_err,'请求子页面超时,请检查网络.')
if_wait=input('是否继续等待(y/n):')
if if_wait=='y':
print('loading...')
# b.refresh()
continue
else:
browser.close()
print('退出')
break
ix=etree.HTML(ihtml)
base_url=re.sub(r'\\','',temp1.group(1))#播放器域名 http://iqiyi.cdn9-okzy.com
# print(base_url)
# 获取sign值
temp2=ix.xpath('/html/body/script[5]/text()')
# print(temp2)
temp3=re.search('main.*?"(/.*?sign=.*?)"',temp2[0])
# print(temp3.group(1))
# 拼接url
# http://iqiyi.cdn9-okzy.com/20200628/11819_1c14aed2/index.m3u8?sign=82a754710203b950ca519ffdb147bf5f
temp4=base_url+temp3.group(1)
# print(temp4)
# temp4='http://iqiyi.cdn9-okzy.com/20200628/11819_1c14aed2/index.m3u8?sign=82a754710203b950ca519ffdb147bf5f'
#获取m3u8地址的后部分
r2=requests.get(temp4)
temp5=r2.text
temp6=re.search('EXT-.*?\n(.*)',temp5)
temp7=temp6.group(1)
#拼接最终m3u8文件的url
temp8=re.search('(.*?)index.*',temp4)
m3u8_url=temp8.group(1)+temp7
print(m3u8_url)
#请求m3u8文件
r3=requests.get(m3u8_url)
m3u8_page=r3.text
l=re.findall('.*?\n(\d+.*?.ts).*?\n', m3u8_page)
# ts文件的前缀
temp9=re.search('(.*?)index.*',m3u8_url)
print(temp9.group(1))
#拼接ts文件url
count=0
while count<len(l):
urls_queue.put([count,temp9.group(1)+l[count]])
count+=1
return len(l)