Studio KimHippo :D
#2. Kim_crawl_MK.2 (김_크롤_2호) 본문
필요 패키지
# -*- coding : utf-8 -*-
from Kim_crawl_class import crawl as kcc
import os, sys, time
import winsound as ws
크롤링 함수
class kim_crawl_class_mk2:
def __init__(self):
pass
def none(self, in_obj):
if in_obj == 'None':
in_obj = None
return in_obj
def main(self):
try:
print('\n')
print('================================= [Hello, My name is Kim Crawl] =================================')
print('================================= [ May I help you? ] =================================')
print('\n')
num = input('[1]. Only one page Crawling \t\t\t [2]. Some pages Crawling \n'
'[!]. Quit Program \t\t\t \n'
'[?]. Select your function : ')
if num == '1':
if not os.path.isdir('data'):
os.mkdir('data')
os.chdir('data')
print('\n')
print('==============================================================================================')
print('\n')
num_2 = input('[1]. Text data \t\t\t [2]. Image data \n'
'[!]. Back \n'
'[?]. What do you need kind of data? : ')
if num_2 == '1':
self.text_crawl()
elif num_2 == '2':
self.img_crawl()
elif num_2 == '!':
self.main()
else:
print('You must select another option')
self.main()
elif num == '2':
print('This function not ready yet... :(')
self.main()
elif num == '!':
print('\n')
are = input('[1].Yes \t\t\t [2]. No \n'
'[?]. Are you sure? : ')
if are == '1':
print('Good bye! \n')
sys.exit()
elif are == '2': self.main()
else:
print('You must select another option')
self.main()
else:
print('You must select another option. ')
self.main()
except NameError as e:
print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [ Alert! Error Occurred! ]!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ')
print('Error Name : ', e)
for rep in range(1, 6):
ws.Beep(1500, 2000)
time.sleep(5)
def text_crawl(self):
file_name = input('Input your File name : ')
time.sleep(0.5)
i_url = input('Input your url : ')
time.sleep(0.5)
i_tag = input('Input your html tag : ')
time.sleep(0.5)
i_val = input('Input your html value : ')
time.sleep(0.5)
i_class = input('Input your class (None or class_name): ')
time.sleep(0.5)
i_func = input('Input your function (None or All or all) : ')
time.sleep(0.5)
craw = kcc(i_url)
string = ''
if (i_class == 'None') and (i_func == 'None'):
i_class = self.none(i_class)
i_func = self.none(i_func)
elif i_class == 'None':
i_class = self.none(i_class)
elif i_func == 'None':
i_func = self.none(i_func)
if not os.path.isdir('text_data'):
os.mkdir('text_data')
os.chdir('text_data')
if i_func != None:
data = craw.get_obj(i_tag, i_val, i_class, i_func)
for rep in data:
string += rep.text + '\n'
with open(file_name + '.txt', 'a') as text:
text.write(string)
elif i_func == None:
data = craw.get_obj(i_tag, i_val, i_class, i_func).text
with open(file_name + '.txt', 'a') as text:
text.write(data)
print('=====================================[ Crawling Complete! ]=====================================')
print('=====================================[ File Saved! ]=====================================')
def img_crawl(self):
print('This function not ready yet... :(')
if not os.path.isdir('img_data'):
os.mkdir('img_data')
os.chdir('img_data')
def is_continue(self):
cond = True
while cond:
print('\n\n')
i_continue = input('[1]. Continue \t\t\t [!].Quit \n'
'[?]. Are you continue crawling ? : ')
if i_continue == '1':
self.main()
elif i_continue == '!':
print('\n')
are = input('[1].Yes \t\t\t [2]. No \n'
'[?]. Are you sure? : ')
if are == '1':
print('Good bye! \n')
sys.exit()
elif are == '2':
self.main()
else:
print('You must select another option')
self.main()
else:
print('You must select another option. ')
self.main()
pwd = os.getcwd()
craw = kim_crawl_class_mk2()
craw.main()
time.sleep(2)
os.chdir(pwd)
craw.is_continue()
[1] 이후 이대로 사용하지 않고, pyinstaller를 이용해서 하나의 프로그램으로 만들어서 사용했다.
[2] 코드가 굉장히 지저분하고, 복잡해서 개량할 필요성을 느껴 Kim_crawl_mk2.1을 개발했다.
'Data Science > Crawler' 카테고리의 다른 글
#4. Kim_crawl_MK.1.1 (김_크롤_1.1호) (0) | 2019.07.09 |
---|---|
#3. Kim_crawl_MK.2.1 (김_크롤_2.1호) (0) | 2019.06.28 |
#1. Kim_crawl_MK.1 (김_크롤_1호) (0) | 2019.06.27 |
Comments