公司需要做网站吗/seo优化排名是什么
结合tkinter与opencv爬取豆瓣电影(1
前言:练习python爬虫技术
文章声明:广搜多方资源,如有侵权,请立即联系本人修改
——————————————————————————
一、学习前需掌握的技术
1.正则表达式
2.tkinter库
3.opencv库
4.文件操作
5.爬虫的基本知识
二、代码使用说明
在D盘中新建两个文件夹,分别命名为poster和vedio,将User-Agent改为自己浏览器里的,如图:
之前需要下载第三方库,在cmd上即可安装,具体查询百度,实在解决不了或者速度太慢,请在评论区说明情况,教你使用清华镜像安装^ ^,基本操作实现好了,可以复制粘贴到自己的编译器上运行即可
三、源代码
from tkinter import *
import tkinter as tk
from tkinter import ttk
import cv2
import os
import requests
from bs4 import BeautifulSoup
import re
import urllib.request,urllib.error
import PIL.Image,PIL.ImageTk
from PIL import Image, ImageTkclass Douban:def __init__(self):self.camera = None self.root = Tk()self.root.title('douban.movie')self.root.geometry('1000x800')self.Douban_info()mainloop()def Douban_info(self):self.frame1=Frame(self.root)self.Image_info1=Label(self.frame1)self.Image_info1.pack(padx=5, pady=5)self.frame2=Frame(self.root) self.Image_info2=Label(self.frame2,width=500,height=500)self.Image_info2.pack(padx=5, pady=5)self.text=Text(self.root,width=25,height=15)self.text.place(x=5,y=350)Label(self.root,text='预告片信息列表', fg='green', bg='yellow',font=('宋体', 15)).place(x=23,y=320)Label(self.root,text='海 报 图 片', fg='purple', bg='white',font=('宋体', 18)).place(x=780,y=320)self.Movie_list()self.Basic_info()self.Poster_info()self.button1=Button(self.root, width=8, height=2, text="上一部", bg="orange",font=("楷", 12),command=self.Prior_film).place(x=38, y=600)self.button2=Button(self.root, width=8, height=2, text="下一部", bg="orange",font=("楷", 12),command=self.Next_film).place(x=38, y=650) self.button3=Button(self.root, width=18, height=2, text="预告片", bg="blue",fg="white",font=("宋", 12), command = self.Film_info).place(x=0, y=700)def Next_film(self):global iif i>=len(Data_info):returnelse:i+=1self.Basic_info()self.Image_info1.pack_forget()self.frame1.pack_forget()self.Image_info2.pack_forget()self.frame2.pack_forget()self.Poster_info()def Prior_film(self):global iif i<=0:returnelse:i-=1self.Basic_info()self.Image_info1.pack_forget()self.frame1.pack_forget()self.Image_info2.pack_forget()self.frame2.pack_forget()self.Poster_info()def Basic_info(self):self.text.delete('1.0','end')self.text.tag_config("tag_1", backgroun="yellow", foreground="red")self.text.tag_config("tag_2", backgroun="white", foreground="blue")self.text.insert(END,"电影名: ","tag_1")self.text.insert(END,str(Data_info[i][4])+"\n"+"\n","tag_2")self.text.insert(END,"导演: ","tag_1")self.text.insert(END,str(Data_info[i][5])+"\n"+"\n","tag_2")self.text.insert(END,"主演: ","tag_1")actors=Data_info[i][0]for actor in actors:self.text.insert(END,str(actor)+" ","tag_2")self.text.insert(END,"\n"+"\n") self.text.insert(END,"预告片链接:","tag_1")self.text.insert(END,str(Data_info[i][3])+"\n"+"\n","tag_2")self.text.insert(END,"海报链接:","tag_1")self.text.insert(END,str(Data_info[i][2])+"\n"+"\n","tag_2")def Save_poster(self,URL):response=requests.get(URL)Poster=os.path.join("D:/","poster")self.File_path='{0}/{1}.{2}'.format(Poster,str(Data_info[i][4]),'jpg')if not os.path.exists(self.File_path):with open(self.File_path,'wb')as f:f.write(response.content)def Save_video(self,URL):response = requests.get(URL)Video = os.path.join("D:/","video")self.File_path='{0}/{1}.{2}'.format(Video,str(Data_info[i][4]),'mp4')if not os.path.exists(self.File_path):with open(self.File_path,'wb')as f:f.write(response.content) def Poster_info(self):self.url1="".join(Data_info[i][2])self.Save_poster(self.url1)self.camera=cv2.VideoCapture(self.url1)self.frame1.place(x=700,y=350)self.Image_info1=Label(self.frame1)self.Image_info1.pack(padx=5, pady=5)self.Loop_poster_film(self.Image_info1)def Film_info(self):self.Image_info2.pack_forget()self.frame2.pack_forget()self.url="".join(Data_info[i][1])self.Save_video(self.url)self.camera=cv2.VideoCapture(self.url)self.frame2.place(x=180,y=300)self.Image_info2=Label(self.frame2,width=510,height=500)self.Image_info2.pack(padx=5, pady=5)self.Loop_poster_film(self.Image_info2)def Loop_poster_film(self, Loop):Success,img=self.camera.read() if Success:cv2image=cv2.cvtColor(img,cv2.COLOR_BGR2RGBA)Current_image=Image.fromarray(cv2image) imgtk=ImageTk.PhotoImage(image=Current_image)Loop.imgtk=imgtk Loop.config(image=imgtk)self.root.after(1,lambda:self.Loop_poster_film(Loop)) def Movie_list(self):Label(self.root,text='豆 瓣 电 影', fg='red', bg='yellow',font=('宋体', 30)).place(x=380,y=30)self.checkDate=ttk.Treeview(self.root,column=('name'))self.checkDate.heading('#0',text='电影序号')self.checkDate.heading('name',text='电影名称')self.checkDate.column('name',width=800,anchor='center') c=list(range(1,len(Data_info)+1)) d=[]for index in range(0,len(Data_info)):d.append(Data_info[index][4])dict1 = dict(zip(c,d))rowCount=1self.checkDate.tag_configure("evenColor",background="LightBlue")for index in dict1.keys():if rowCount%2==0:self.checkDate.insert("",'end',text=' '+str(index), values=dict1[index])else: self.checkDate.insert("",'end',text=' '+str(index), values=dict1[index],tags=("evenColor"))rowCount+=1yscrollbar = Scrollbar(self.root, orient=VERTICAL, command=self.checkDate.yview)self.checkDate.configure(yscrollcommand=yscrollbar.set)yscrollbar.pack(side=RIGHT,fill=Y)self.checkDate.place(x=0,y=80)class Clutch():def __init__(self):self.findLink=re.compile(r'<a class="ticket-btn" data-psource="poster" href="(.*?)" target="_blank">')self.findImag=re.compile(r'<img.*?src="(.*?)".*?>',re.S)self.findTitle=re.compile(r'<span property="v:itemreviewed">(.*)</span>')self.finddirector=re.compile(r'<a href=".*?" rel="v:directedBy">(.*)</a>')self.findActor=re.compile(r'<a\b href="[^"]*"[^>]*>([\s\S]*?)</a>',re.S)self.findVideo=re.compile(r'<a\b[^>]+\bhref="([^"]*)"[^>]*>[\s\S]*?</a>',re.S)self.findRealVideo=re.compile(r'<source src="(.*?)".*?>')Douban_url="https://movie.douban.com/cinema/nowplaying/beijing/"List_url=self.Get_url(Douban_url)Data_info=self.Crawing(List_url)def Get_url(self,Douban_url):List_url=[] html=self.Ask_url(Douban_url)soup=BeautifulSoup(html,"html.parser") for item in soup.find_all("li",class_="poster"):item=str(item)link=re.findall(self.findLink,item)List_url.append(link)return List_urldef Crawing(self,List_url):for i in range(0,39):url="".join(List_url[i])html=self.Ask_url(url)soup=BeautifulSoup(html,"html.parser")for item in soup.find_all('div',id="wrapper"):print("爬虫中……")data=[]actors=[]for item1 in item.find_all('a',rel="v:starring"):item1=str(item1)actor=re.findall(self.findActor,item1)actors.append("".join(actor))data.append(actors)for item2 in item.find_all('li',class_="label-trailer"):item2=str(item2)video=re.findall(self.findVideo,item2)urlvideo="".join(video)htmlvideo=self.Ask_url(urlvideo)soup_video=BeautifulSoup(htmlvideo,"html.parser")for V in soup_video.find_all('div', class_="cont"):V=str(V)real_video=re.findall(self.findRealVideo,V)data.append("".join(real_video))for item3 in item.find_all('div',class_="subject clearfix"):item3=str(item3)img=re.findall(self.findImag,item3)data.append("".join(img))item=str(item)data.append(url)titles=re.findall(self.findTitle,item)data.append("".join(titles)) director=re.findall(self.finddirector,item)data.append("".join(director)) Data_info.append(data) return Data_infodef Ask_url(self,url):headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.67 Safari/537.36 Edg/87.0.664.47" }request=urllib.request.Request(url,headers=headers)html=" "try:response=urllib.request.urlopen(request)html=response.read().decode("utf-8")except urllib.error.URLError as e:if hasattr(e, "code"):print(e.code)if hasattr(e, "reason"):print(e.reason)return html
if __name__ == '__main__':i=0Data_info=[]Clutch=Clutch() Douban=Douban()
运行效果:
你试了就知道awa
最后:
有不懂的欢迎在评论区留言!