# coding=utf-8 #!/usr/bin/python import sys sys.path.append('..') from base.spider import Spider import json import urllib.parse import re from lxml import etree from urllib.parse import urljoin class Spider(Spider): def getName(self): return "奇优影院" def init(self, extend): pass def homeContent(self, filter): result = {} cateManual = { "电影": "1", "电视剧": "2", "动漫": "3", "综艺": "4", "午夜": "6" } classes = [{'type_name': k, 'type_id': v} for k, v in cateManual.items()] result['class'] = classes filters = { "1": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}], "2": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}], "3": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}], "4": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}], "6": [{"key": "by", "name": "排序", "value": [{"n": "按时间", "v": "time"}, {"n": "按人气", "v": "hit"}]}] } result['filters'] = filters return result def homeVideoContent(self): try: rsp = self.fetch("http://qiyoudy5.com/") root = self.parse_html(rsp.content) if not root: return {'list': []} videos = [] # 轮播图 for a in root.xpath("//div[contains(@class,'carousel')]//a[contains(@class,'stui-vodlist__thumb')]"): try: name = a.xpath(".//span[@class='pic-text text-center']/text()")[0].strip() if a.xpath(".//span[@class='pic-text text-center']/text()") else a.xpath("./@title")[0] if a.xpath("./@title") else "未知" style = a.xpath("./@style")[0] if a.xpath("./@style") else "" pic = re.search(r"background:\s*url\((.*?)\)", style).group(1) if re.search(r"background:\s*url\((.*?)\)", style) else "" sid = a.xpath("./@href")[0] if a.xpath("./@href") else "" videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": "推荐"}) except: continue # 视频列表 for a in root.xpath("//ul[contains(@class,'stui-vodlist')]//a[contains(@class,'stui-vodlist__thumb')]"): try: name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知" pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else "" sid = a.xpath("./@href")[0] if a.xpath("./@href") else "" remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else "" videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark}) except: continue return {'list': videos} except: return {'list': []} def categoryContent(self, tid, pg, filter, extend): result = {} try: order = extend.get('by', 'time') if extend else 'time' url = f'http://qiyoudy5.com/list/{tid}_{pg}.html?order={order}' rsp = self.fetch(url) root = self.parse_html(rsp.content) if not root: return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0} videos = [] for a in root.xpath("//a[contains(@class,'stui-vodlist__thumb')]"): try: name = a.xpath("./@title")[0] if a.xpath("./@title") else "未知" pic = a.xpath("./@data-original")[0] if a.xpath("./@data-original") else "" sid = a.xpath("./@href")[0] if a.xpath("./@href") else "" remark = a.xpath(".//span[@class='pic-text text-right']/text()")[0] if a.xpath(".//span[@class='pic-text text-right']/text()") else "" videos.append({"vod_id": sid, "vod_name": name, "vod_pic": pic, "vod_remarks": remark}) except: continue current_page = int(root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()")[0]) if root.xpath("//ul[contains(@class,'stui-page')]//a[@class='active']/text()") else pg page_numbers = [] for link in root.xpath("//ul[contains(@class,'stui-page')]//a[contains(@href,'list')]/@href"): match = re.search(r'list/\d+_(\d+)\.html', link) if match: page_numbers.append(int(match.group(1))) total_page = max(page_numbers) if page_numbers else 1 return { 'list': videos, 'page': current_page, 'pagecount': total_page if total_page > 0 else 9999, 'limit': 90, 'total': 999999 } except: return {'list': [], 'page': pg, 'pagecount': 1, 'limit': 90, 'total': 0} def detailContent(self, array): try: tid = array[0] url = f'http://qiyoudy5.com{tid}' rsp = self.fetch(url) root = self.parse_html(rsp.content) if not root: return {'list': []} # 基本信息 detail_node = root.xpath("//div[contains(@class,'stui-content__detail')]") or root.xpath("//div[@class='stui-player__detail']") pic = title = area = director = actor = year = desc = "" if detail_node: detail_node = detail_node[0] pic = self.get_first(root.xpath("//meta[@property='og:image']/@content") or detail_node.xpath(".//img/@data-original")) title = self.get_first(detail_node.xpath(".//h1//text()")) if not title: page_title = self.get_first(root.xpath("//title/text()")) title = re.search(r"《(.*?)》", page_title).group(1) if page_title and re.search(r"《(.*?)》", page_title) else "" area = self.get_first(root.xpath("//meta[@property='og:video:area']/@content")) director = self.get_first(root.xpath("//meta[@property='og:video:director']/@content")) actor = self.get_first(root.xpath("//meta[@property='og:video:actor']/@content")) year_info = self.get_first(root.xpath("//p[@class='data']//text()[contains(.,'年份:')]")) year = re.search(r"年份:(\d{4})", year_info).group(1) if year_info and re.search(r"年份:(\d{4})", year_info) else "" desc = self.get_first(root.xpath("//meta[@property='og:description']/@content")) # 播放列表 playFrom, playUrl = [], [] for tab in root.xpath("//ul[contains(@class,'nav-tabs')]/li"): tab_name = self.get_first(tab.xpath(".//a/text()")) tab_id = self.get_first(tab.xpath(".//a/@href")).replace("#", "") if tab.xpath(".//a/@href") else "" if tab_name and tab_id: play_list = root.xpath(f"//div[@id='{tab_id}']//ul[contains(@class,'stui-content__playlist')]//a") if play_list: playFrom.append(tab_name) episodes = [] for episode in play_list: ep_name = self.get_first(episode.xpath("./text()")) or "播放" ep_url = self.get_first(episode.xpath("./@href")) if ep_url: episodes.append(f"{ep_name}${ep_url}") if episodes: playUrl.append("#".join(episodes)) vod = { "vod_id": tid, "vod_name": title, "vod_pic": pic, "vod_year": year, "vod_area": area, "vod_actor": actor, "vod_director": director, "vod_content": desc } if playFrom and playUrl: vod['vod_play_from'] = "$$$".join(playFrom) vod['vod_play_url'] = "$$$".join(playUrl) return {'list': [vod]} except: return {'list': []} def searchContent(self, key, quick, page='1'): try: url = "http://qiyoudy5.com/search.php" # 修复:使用正确的参数名和变量 post_data = { 'searchword': key, # 改为变量key,而不是字符串'key' } headers = { "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Referer": "http://qiyoudy5.com/", "Origin": "http://qiyoudy5.com" } # 修复:只发送一次POST请求,删除重复的请求 rsp = self.post(url, data=post_data, headers=headers) root = self.parse_html(rsp.content) if not root: return {'list': []} videos = [] # 多种选择器尝试获取搜索结果 selectors = [ "//ul[contains(@class,'stui-vodlist__media')]//li", "//ul[contains(@class,'stui-vodlist')]//li", "//a[contains(@class,'stui-vodlist__thumb')]" ] result_items = [] for selector in selectors: result_items = root.xpath(selector) if result_items: break for item in result_items: try: if item.tag == 'a': # 直接是a标签 href = self.get_first(item.xpath("./@href")) title = self.get_first(item.xpath("./@title")) pic = self.get_first(item.xpath("./@data-original")) remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()")) else: # li标签 link = item.xpath(".//a[contains(@class,'stui-vodlist__thumb')]") or item.xpath(".//a") if not link: continue link = link[0] href = self.get_first(link.xpath("./@href")) title = self.get_first(link.xpath("./@title")) pic = self.get_first(link.xpath("./@data-original")) if not pic: style = self.get_first(link.xpath("./@style")) if style and "background-image" in style: pic_match = re.search(r"background-image:\s*url\(['\"]?(.*?)['\"]?\)", style) if pic_match: pic = pic_match.group(1) remark = self.get_first(item.xpath(".//span[contains(@class,'pic-text')]/text()")) if href and title: videos.append({ "vod_id": href, "vod_name": title.strip(), "vod_pic": pic, "vod_remarks": remark or "" }) except Exception as e: continue # 备用解析方案 if not videos: for a in root.xpath("//a[contains(@href,'/vod/')]"): try: href = self.get_first(a.xpath("./@href")) title = self.get_first(a.xpath("./@title")) or self.get_first(a.xpath(".//text()")) pic = self.get_first(a.xpath("./@data-original")) remark = self.get_first(a.xpath(".//span[contains(@class,'pic-text')]/text()")) if href and title: videos.append({ "vod_id": href, "vod_name": title.strip(), "vod_pic": pic, "vod_remarks": remark or "" }) except: continue # 去重 seen = set() unique_videos = [] for video in videos: identifier = (video["vod_id"], video["vod_name"]) if identifier not in seen: seen.add(identifier) unique_videos.append(video) return {'list': unique_videos} except Exception as e: return {'list': []} def playerContent(self, flag, id, vipFlags): try: url = f"http://qiyoudy5.com{id}" rsp = self.fetch(url) _, html_content = self.parse_html(rsp.content, return_content=True) headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Referer": url, } # 多种方式查找播放地址 # 1. API链接 for pattern in [r"http://api\.yongfan99\.com:81/content\.php\?[^'\"]+", r"content\.php\?vid=[^&]+&type=[^'\"]+"]: match = re.search(pattern, html_content) if match: api_url = match.group(0) if not api_url.startswith('http'): api_url = "http://api.yongfan99.com:81/" + api_url try: api_rsp = self.fetch(api_url, headers=headers) m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', api_rsp.text) if m3u8_match: return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers} except: pass # 2. iframe中的播放器 for pattern in [r']*src=[\'"]([^\'"]+)[\'"][^>]*>', r'src\s*=\s*[\'"]((?:http[^\'"]*)?/play/[^\'"]*)[\'"]']: for iframe_src in re.findall(pattern, html_content): if not iframe_src.startswith('http'): iframe_src = urljoin(url, iframe_src) try: iframe_rsp = self.fetch(iframe_src, headers=headers) m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', iframe_rsp.text) if m3u8_match: return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers} except: continue # 3. 直接搜索m3u8链接 m3u8_match = re.search(r'http[s]?://[^\s"\']+\.m3u8[^\s"\']*', html_content) if m3u8_match: return {"parse": 0, "playUrl": "", "url": m3u8_match.group(0), "header": headers} # 4. 返回原始URL进行外部解析 return {"parse": 1, "playUrl": "", "url": url, "header": headers} except: return {"parse": 1, "playUrl": "", "url": f"http://qiyoudy5.com{id}", "header": { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36", "Referer": "http://qiyoudy5.com/", }} # 辅助函数 def parse_html(self, content, return_content=False): encodings = ['utf-8', 'gbk', 'gb2312', 'iso-8859-1'] html_content = None for encoding in encodings: try: html_content = content.decode(encoding) break except UnicodeDecodeError: continue if html_content is None: html_content = content.decode('utf-8', errors='replace') html_content = self.clean_html(html_content) root = etree.HTML(html_content) if return_content: return root, html_content return root def get_first(self, array, default=""): return array[0] if array else default def clean_html(self, html_content): html_content = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', html_content) replacements = {' ': ' ', '&': '&', '<': '<', '>': '>', '"': '"'} for old, new in replacements.items(): html_content = html_content.replace(old, new) return html_content def isVideoFormat(self, url): return any(fmt in url for fmt in ['.m3u8', '.mp4', '.avi', '.mkv', '.flv', '.webm']) def manualVideoCheck(self): return True def localProxy(self, param): return {}