一区二区三区亚洲,区久久AAA片69亚洲,亚洲综合久久久久久中文字幕

五分鐘帶你玩轉python（一）python入門，爬取圖片，文字，視頻，音頻

網友投稿 832 2025-03-31

爬取天氣并存在數據庫

#!/usr/bin/Python

# -*- coding: utf-8 -*-

import pymysql

import requests

from bs4 import BeautifulSoup

db = pymysql.connect(

host='localhost',

port=3306,

五分鐘帶你玩轉python（一）python入門，爬取圖片，文字，視頻，音頻

user='root',

passwd='root',

db='mysql',

use_unicode=True,

charset="utf8"

)

cursor = db.cursor()

def downdata(url):

hd = {

'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}

req = requests.get(url, headers=hd)

# req.encoding = 'utf-8'

soup = BeautifulSoup(req.text, 'html.parser')

da_new = soup.find_all('li', class_='ndays-item png-fix cf')

for da in da_new:

day = da.find('div', class_='td td2').find('p', class_='p1')

week = da.find('div', class_='td td2').find('p', class_='p2')

wd = da.find('div', class_='td td5').find('p', class_='p1')

fl = da.find('div', class_='td td5').find('p', class_='p2')

f2 = da.find('div', class_='td td3').find('div')['title']

print('今天是' + day.text + ',' + '星期' + week.text + ',' + '溫度' + wd.text + ',' + '風力' + fl.text + ',' + '天氣' + f2)

sql = "INSERT INTO tianiq(day1,week1, wd, fl, air) VALUES ('%s','%s','%s','%s','%s')" % (day.text, week.text, wd.text, fl.text, f2)

print(sql)

cursor.execute(sql)

db.commit()

downdata('http://tianqi.sogou.com/shenyang/15/')

爬取漫畫

#!/usr/bin/Python

# -*- coding: UTF-8 -*-

import re

import urllib.request

def gethtml(url):

headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0'}

req = urllib.request.Request(url=url, headers=headers)

html = urllib.request.urlopen(req).read()

return html

def getimg(html):

reg = r'src="(.*?\.jpg)"'

img=re.compile(reg)

html=html.decode('utf-8')#python3

imglist=re.findall(img,html)

x = 0

for imgurl in imglist:

urllib.request.urlretrieve(imgurl,'D:%s.jpg'%x)

x = x+1

html=gethtml("http://www.tuku.cc/")

print(getimg(html))

調用數據庫

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import pymysql

# 打開數據庫連接

db = pymysql.connect("localhost", "root", "root", "mysql")

# 使用cursor()方法獲取操作游標

cursor = db.cursor()

# SQL 插入語句

sql = "INSERT INTO tianiq(day1, \

week1, wd, fl, air) \

VALUES ('Mac', 'Mohan', 'M', 'M', 'M')"

try:

# 執行sql語句

cursor.execute(sql)

# 執行sql語句

db.commit()

print("insert ok")

except:

# 發生錯誤時回滾

db.rollback()

# 關閉數據庫連接

db.close()

爬取視頻

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import re

import requests

from bs4 import BeautifulSoup

def download(url):

dz = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36'}

req = requests.get(url,headers = dz).content

with open('qq.mp4', 'wb') as fp:

fp.write(req)

download('http://video.study.163.com/edu-video/nos/mp4/2017/04/01/1006064693_cc2842f7dc8b410c96018ec618f37ef6_sd.mp4?ak=d2e3a054a6a144f3d98805f49b4f04439064ce920ba6837d89a32d0b0294ad3c1729b01fa6a0b5a3442ba46f5001b48b1ee2fb6240fc719e1b3940ed872a11f180acad2d0d7744336d03591c3586614af455d97e99102a49b825836de913910ef0837682774232610f0d4e39d8436cb9a153bdeea4a2bfbae357803dfb6768a742fe395e87eba0c3e30b7b64ef1be06585111bf60ea26d5dad1f891edd9e94a8e167e0b04144490499ffe31e0d97a0a1babcbd7d2e007d850cc3bf7aa697e8ff')

爬取音頻

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import json

import requests

from bs4 import BeautifulSoup

def download(url):

hd = {

'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"}

req = requests.get(url, headers=hd)

reps = req.text

result = json.loads(reps)

datap = result['data']['tracksAudioPlay']

for index in datap:

title = index['trackName']

index['src']

print(index['src'])

data = requests.get(index['src'], headers=hd).content

try:

with open('%s.mp3' % title, 'wb') as f:

f.write(data)

except BaseException:

print('1')

download('http://www.ximalaya.com/revision/play/album?albumId=7371372&pageNum=1&sort=-1&pageSize=30')

爬取文字

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import requests

from bs4 import BeautifulSoup

def get_h(url):

response = requests.get(url)

response .encoding = 'utf-8'

return response.text

def get_c(html):

soup = BeautifulSoup(html,'html.parser')

joke_content = soup.select('div.content')[0].getText

return joke_content

url_joke = "https://www.qiushibaike.com"

html = get_h(url_joke)

joke_content = get_c(html)

print(joke_content)

爬取圖片

#!/usr/bin/python

# -*- coding: UTF-8 -*-

import requests

from bs4 import BeautifulSoup

import os

headers = {

'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1"}

url = 'http://www.ivsky.com/'

start_html = requests.get(url, headers=headers)

Soup = BeautifulSoup(start_html.text, 'html.parser')

all_div = Soup.find_all('div', class_='syl_pic')

for lsd in all_div:

lsds = 'http://www.ivsky.com' + lsd.find('a')['href']

title = lsd.find('a').get_text

print(lsds)

html = requests.get(lsds, headers=headers)

Soup_new = BeautifulSoup(html.text, 'html.parser')

app = Soup_new.find_all('div', class_='il_img')

for app_new in app:

apptwo = 'http://www.ivsky.com' + app_new.find('a')['href']

htmlthree = requests.get(apptwo, headers=headers)

Soupthree = BeautifulSoup(htmlthree.text, 'html.parser')

appthree = Soupthree.find('div', class_='pic')

appf = appthree.find('img')['src']

name = appf[-9:-4]

img = requests.get(appf, headers=headers)

f = open(name + '.jpg', 'ab') ##寫入多媒體文件必須要 b 這個參數??！必須要??！

f.write(img.content) ##多媒體文件要是用conctent哦！

f.close()

爬取小說

#!/usr/bin/python

# -*- coding: UTF-8 -*-

from urllib import request

from bs4 import BeautifulSoup

import re

import sys

if __name__ == "__main__":

#創建txt文件

file = open('一念永恒.txt', 'w', encoding='utf-8')

#一念永恒小說目錄地址

target_url = 'http://www.biqukan.com/1_1094/'

#User-Agent

head = {}

head['User-Agent'] = 'Mozilla/5.0 (Linux; Android 4.1.1; Nexus 7 Build/JRO03D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Safari/535.19'

target_req = request.Request(url = target_url, headers = head)

target_response = request.urlopen(target_req)

target_html = target_response.read().decode('gbk','ignore')

#創建BeautifulSoup對象

listmain_soup = BeautifulSoup(target_html,'html.parser')

#搜索文檔樹,找出div標簽中class為listmain的所有子標簽

chapters = listmain_soup.find_all('div',class_ = 'listmain')

#使用查詢結果再創建一個BeautifulSoup對象,對其繼續進行解析

download_soup = BeautifulSoup(str(chapters), 'html.parser')

#計算章節個數

numbers = (len(download_soup.dl.contents) - 1) / 2 - 8

index = 1

#開始記錄內容標志位,只要正文卷下面的鏈接,最新章節列表鏈接剔除

begin_flag = False

#遍歷dl標簽下所有子節點

for child in download_soup.dl.children:

#濾除回車

if child != '\n':

#找到《一念永恒》正文卷,使能標志位

if child.string == u"《一念永恒》正文卷":

begin_flag = True

#爬取鏈接并下載鏈接內容

if begin_flag == True and child.a != None:

download_url = "http://www.biqukan.com" + child.a.get('href')

download_req = request.Request(url = download_url, headers = head)

download_response = request.urlopen(download_req)

download_html = download_response.read().decode('gbk','ignore')

download_name = child.string

soup_texts = BeautifulSoup(download_html, 'html.parser')

texts = soup_texts.find_all(id = 'content', class_ = 'showtxt')

soup_text = BeautifulSoup(str(texts), 'html.parser')

write_flag = True

file.write(download_name + '\n\n')

#將爬取內容寫入文件

for each in soup_text.div.text.replace('\xa0',''):

if each == 'h':

write_flag = False

if write_flag == True and each != ' ':

file.write(each)

if write_flag == True and each == '\r':

file.write('\n')

file.write('\n\n')

#打印爬取進度

sys.stdout.write("已下載:%.3f%%" % float(index/numbers) + '\r')

sys.stdout.flush()

index += 1

file.close()

Python 視頻

辦公 自動化(三) | 借助服務器定時爬數據發郵件">python辦公 自動化(三) | 借助服務器定時爬數據發郵件

832 2025-03-31

Python3 網絡爬蟲開發實戰] 1.4.3-Redis 的安裝">[Python3 網絡爬蟲開發實戰] 1.4.3-Redis 的安裝

832 2025-03-31

Python 庫的安裝">Elasticsearch Python 庫的安裝

832 2025-03-31

五分鐘帶你玩轉python（一）python入門，爬取圖片，文字，視頻，音頻

辦公 自動化(三) | 借助服務器定時爬數據發郵件">python辦公 自動化(三) | 借助服務器定時爬數據發郵件

Python3 網絡爬蟲開發實戰] 1.4.3-Redis 的安裝">[Python3 網絡爬蟲開發實戰] 1.4.3-Redis 的安裝

Python 庫的安裝">Elasticsearch Python 庫的安裝

推薦文章

企業生產管理是什么，企業生產管理軟件

進盤點進銷存軟件排行榜前十名

進銷存系統哪個簡單好用？進銷存系統優點

工廠生產管理（工廠生產管理流程及制度）

生產管理軟件，機械制造業生產管理，制造業生產過程管理軟件

進銷存軟件和ERP有什么區別？進銷存與erp軟件理解

進銷存如何進行庫存管理

如何利用excel制作銷售訂單管理系統？

數據庫訂單管理系統有哪些功能？數據庫訂單管理系統怎么設計？

什么是數據庫管理系統？

最近發表

熱評文章

零代碼開發是什么？2022低代碼平臺排行榜">零代碼開發是什么？2022低代碼平臺排行榜

進銷存庫存管理 系統（智慧進銷存）">智能進銷存庫存管理系統（智慧進銷存）

在線文檔哪家強？8款在線文檔編輯軟件推薦">在線文檔哪家強？8款在線文檔編輯軟件推薦

WPS2016怎么繪制簡單的價格表?

系統的功能有哪些？餐飲服務系統的構成及工作程序">連鎖餐飲管理系統的功能有哪些？餐飲服務系統的構成及工

進銷存庫存管理盤點">簡單進銷存庫存管理盤點

友情鏈接

五分鐘帶你玩轉python（一）python入門，爬取圖片，文字，視頻，音頻

辦公自動化(三) | 借助服務器定時爬數據發郵件">python辦公自動化(三) | 借助服務器定時爬數據發郵件

Python3 網絡爬蟲開發實戰] 1.4.3-Redis 的安裝">[Python3 網絡爬蟲開發實戰] 1.4.3-Redis 的安裝

Python 庫的安裝">Elasticsearch Python 庫的安裝

推薦文章

最近發表

熱評文章

零代碼開發是什么？2022低代碼平臺排行榜">零代碼開發是什么？2022低代碼平臺排行榜

進銷存庫存管理系統（智慧進銷存）">智能進銷存庫存管理系統（智慧進銷存）

在線文檔哪家強？8款在線文檔編輯軟件推薦">在線文檔哪家強？8款在線文檔編輯軟件推薦

系統的功能有哪些？餐飲服務系統的構成及工作程序">連鎖餐飲管理系統的功能有哪些？餐飲服務系統的構成及工

進銷存庫存管理盤點">簡單進銷存庫存管理盤點

友情鏈接

五分鐘帶你玩轉python（一）python入門，爬取圖片，文字，視頻，音頻

零代碼開發是什么？2022低代碼平臺排行榜">零代碼開發是什么？2022低代碼平臺排行榜

在線文檔哪家強？8款在線文檔編輯軟件推薦">在線文檔哪家強？8款在線文檔編輯軟件推薦

系統的功能有哪些？餐飲服務系統的構成及工作程序">連鎖餐飲管理系統的功能有哪些？餐飲服務系統的構成及工