-
Notifications
You must be signed in to change notification settings - Fork 14
Expand file tree
/
Copy pathspider_console.py
More file actions
49 lines (37 loc) · 1.09 KB
/
spider_console.py
File metadata and controls
49 lines (37 loc) · 1.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
#!/usr/bin/env python
"""
Created by howie.hu at 06/04/2018.
"""
import asyncio
import os
import sys
import time
sys.path.append('../../')
from importlib import import_module
from hproxy.config import CONFIG
from hproxy.utils import logger
def file_name(file_dir=os.path.join(CONFIG.BASE_DIR, 'spider/proxy_spider')):
"""
Get spider class
:param file_dir:
:return:
"""
all_files = []
for file in os.listdir(file_dir):
if file.endswith('_spider.py'):
all_files.append(file.replace('.py', ''))
return all_files
async def spider_console():
start = time.time()
all_files = file_name()
tasks = []
for spider in all_files:
spider_module = import_module(
"hproxy.spider.proxy_spider.{}".format(spider))
tasks.append(asyncio.ensure_future(spider_module.start()))
await asyncio.wait(tasks)
logger.info(type="Spider finished!", message="Time costs: {0}".format(time.time() - start))
def crawl_proxy():
asyncio.get_event_loop().run_until_complete(spider_console())
if __name__ == '__main__':
crawl_proxy()