Python
date
Jan 27, 2022
slug
python
status
Published
tags
Python
summary
常见的一些东西
type
Post
基础教程包管理pip更改镜像源pip批量安装和导出三方模块安装ChromeDriverredis 使用postgres 使用压缩文件写入csvurl 编码url获取参数selenium4 使用懒加载模式,不等待页面加载完毕等待js渲染完成解析本地html使用 ffmpeg 下载视频mac调用Downie下载视频Aria2下载文件
基于python3.9操作
基础教程
包管理
pip更改镜像源
临时使用:
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple some-package
配置型:
pip install pip -U
pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
如果您到 pip 默认源的网络连接较差,临时使用本镜像站来升级 pip:
pip install -i https://pypi.tuna.tsinghua.edu.cn/simple pip -U
pip批量安装和导出
安装导出的各种安装包, -r 表示逐行读取安装
pip install -r requirements.txt
查看Python已安装的模块
pip list
打包已安装的依赖包
生成已安装包清单
pip freeze > requirements.txt
如本地保留了之前下载的各依赖包,直接将各whl/tar/zip包保存到某个文件夹下,如./packages
如本地未保留之前下载的各依赖包whl/tar/zip包,则需要用下面的命令从网络下载到./packages
指定参数下载
pip download \
--only-binary=:all: \ # 只下载二进制package(即wheel或egg)
--platform linux_x86_64 \ # 说明是linux 64位架构
--python-version 27 \ # Python 2.7
--implementation cp \ # cpython,一般都是这个
--abi cp27mu
-r requirements.txt -d ./packages
离线批量安装依赖包
将 packages 文件夹和 requirements.txt拷贝至离线机器上某个目录下,
packages文件夹 和 requirements.txt放在同目录下,命令窗口进入D:\,输入以下命令,依赖包即可批量安装完成
离线安装依赖包
pip install --no-index --find-links=./packages -r requirements.txt
参数
pip download -h # 查看帮助信息
Usage:
pip download [options] <requirement specifier> [package-index-options] ...
pip download [options] -r <requirements file> [package-index-options] ...
pip download [options] [-e] <vcs project url> ...
pip download [options] [-e] <local project path> ...
pip download [options] <archive url/path> ...
三方模块
安装ChromeDriver
下载页面
版本对照
下载解压。执行命令
mv chromedriver /usr/local/bin
chromedriver
redis 使用
pip install redis
settings.py
REDIS_CONF = {
"HOST": '127.0.0.1',
"PORT": 6379,
"PASSWORD": '123456',
"DATABASE": 0,
}
from myscrapy.settings import REDIS_CONF
pool = redis.ConnectionPool(host=REDIS_CONF['HOST'], port=REDIS_CONF['PORT'], db=9, password=REDIS_CONF['PASSWORD'],
decode_responses=True)
conn = redis.Redis(connection_pool=pool)
print(conn.get('key'))
postgres 使用
pip install psycopg2-binary
import psycopg2
conn = psycopg2.connect(database='postgres',user='postgres',password='postgres',host='127.0.0.1',port='5432')
print(conn)
cur = conn.cursor()
cur.execute("select * from t_images limit 1")
rows = cur.fetchone()
print(rows)
conn.close()
压缩文件
import os
import zipfile
def zipDir(dirpath, outFullName):
"""
压缩指定文件夹
:param dirpath: 目标文件夹路径
:param outFullName: 压缩文件保存路径+xxxx.zip
:return: 无
"""
zip = zipfile.ZipFile(outFullName, "w", zipfile.ZIP_DEFLATED)
for path, dirnames, filenames in os.walk(dirpath):
# 去掉目标跟路径,只对目标文件夹下边的文件及文件夹进行压缩
fpath = path.replace(dirpath, '')
for filename in filenames:
zip.write(os.path.join(path, filename), os.path.join(fpath, filename))
zip.close()
if __name__ == "__main__":
input_path = "/Volumes/Data/test"
output_path = "/Volumes/Data/test.zip"
zipDir(input_path, output_path)
写入csv
import csv
class DomainUtils:
def __init__(self):
self.headers = ['domain_name', 'domain_url', 'domain_describe', 'domain_no', 'domain_date']
self.file_path = 'domain.csv'
def writer_csv(self, rows):
with open('domain.csv', 'w') as f:
f_csv = csv.writer(f)
f_csv.writerow(self.headers)
f_csv.writerows(rows)
def read_csv(self):
with open('domain.csv') as f:
return csv.DictReader(f)
# for row in f_csv:
# print(row) # row['Symbol'] 或者 row['Change']
url 编码
from urllib.parse import quote
url = "https://www.baidu.com/?name=wasd"
quote(url)
url获取参数
url = "https://www.baidu.com/?name=wasd"
query_list = urllib.parse.parse_qsl(urlparse(url).query)
# [('name','wasd')]
name = query_list[0][0]
selenium4 使用
懒加载模式,不等待页面加载完毕
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
import time
from selenium.common.exceptions import TimeoutException
op = Options()
op.page_load_strategy = 'none' # 懒加载模式,不等待页面加载完毕
driver = webdriver.Chrome(options=op)
driver.get('https://www.baidu.com/')
# 因为是直接返回,如果不佳sleep 下面获取会报错
time.sleep(5)
try:
video_url = driver.find_element_by_xpath('//*[@id="myPlayer"]/source').get_attribute('src')
except TimeoutException:
driver.quit()
print("timeout")
等待js渲染完成
解析本地html
from lxml import etree
f = open("1.html", "rb")
content = f.read().decode('utf-8')
tree = etree.HTML(content)
print(tree)
print(tree.xpath('//*[@id="myPlayer"]/source/@src')[0])
使用 ffmpeg 下载视频
import subprocess
subprocess.run(['ffmpeg', '-i', tkb.video_url, '{0}.mp4'.format(tkb.name)])
mac调用Downie下载视频
import os
downie_shell = "open 'downie://XUOpenLink?url={0}'".format(downie_url)
os.system(downie_shell)
Aria2下载文件
import aria2p
# 测试使用aria
# pip3 install aria2p
if __name__ == '__main__':
aria2 = aria2p.Client(
host="",
port=6800,
secret=""
)
uris = ["xxxx.png", "yyyy.png"]
aria2.add_uri(uris=uris)