先来个多线程的,循环的时候可以time.sleep(1)休息一下,不要给网站太大压力,避免被封
import requests,threading
def main():
create_dir(‘pic’)
queue = [i for i in range(1, 72)] # 构造 url 链接 页码。
threads = []
while len(queue) > 0:
for thread in threads:
if not thread.is_alive():
threads.remove(thread)
while len(threads) < 5 and len(queue) > 0: # 最大线程数设置为 5
cur_page = queue.pop(0)
url = ‘http://meizitu.com/a/more_{}.html’.format(cur_page)
thread = threading.Thread(target=execute, args=(url,))
thread.setDaemon(True)
thread.start()
print(‘{}正在下载{}页’.format(threading.current_thread().name, cur_page))
threads.append(thread)if __name__ == ‘__main__’:
main()
注意要引入import threadpool
if __name__ == ‘__main__’:
#text(“ruyangwang”, “第n页url: ” + url + “1”)
def xianurl(url,count=10):
codes=[]
#print(type(int(count)))
#exit()
count=int(count)
for code in list(range(1,count)):
codes.append(url+str(code))
pool = threadpool.ThreadPool(10)
tasks = threadpool.makeRequests(geturl, codes)#geturl是函数,codes是参数的集合,一磁传入一个参数
[pool.putRequest(task) for task in tasks]
pool.wait()
多进程
from multiprocessing import Process
#t=Process(target=geturl,args=(url+str(i),))
#t.start()#多进程
#t2=threading.Thread(target=geturl,args=(url+str(i),))
#t2.start()
#t2.join()
转载请注明:稻香的博客 » python多进程和多线程的总结