Studio KimHippo :D

#2. Kim_crawl_MK.2 (김_크롤_2호) 본문

Data Science/Crawler

#2. Kim_crawl_MK.2 (김_크롤_2호)

김작은하마 2019. 6. 28. 01:56

필요 패키지

# -*- coding : utf-8 -*-

from Kim_crawl_class import crawl as kcc
import os, sys, time
import winsound as ws

크롤링 함수

class kim_crawl_class_mk2:

    def __init__(self):
        pass

    def none(self, in_obj):
            if in_obj == 'None':
                in_obj = None
            return in_obj

    def main(self):
        try:

            print('\n')
            print('================================= [Hello, My name is Kim Crawl] =================================')
            print('================================= [       May I help you?     ] =================================')
            print('\n')
            num = input('[1]. Only one page Crawling \t\t\t [2]. Some pages Crawling \n'
                        '[!]. Quit Program \t\t\t \n'
                        '[?]. Select your function : ')

            if num == '1':
                if not os.path.isdir('data'):
                    os.mkdir('data')
                os.chdir('data')

                print('\n')
                print('==============================================================================================')
                print('\n')

                num_2 = input('[1]. Text data \t\t\t [2]. Image data \n'
                              '[!]. Back \n'
                              '[?]. What do you need kind of data? : ')
                if num_2 == '1':
                    self.text_crawl()
                elif num_2 == '2':
                    self.img_crawl()
                elif num_2 == '!':
                    self.main()
                else:
                    print('You must select another option')
                    self.main()

            elif num == '2':
                print('This function not ready yet... :(')
                self.main()

            elif num == '!':
                print('\n')
                are = input('[1].Yes \t\t\t [2]. No \n'
                            '[?]. Are you sure? : ')
                if are == '1':
                    print('Good bye! \n')
                    sys.exit()
                elif are == '2': self.main()
                else:
                    print('You must select another option')
                    self.main()

            else:
                print('You must select another option. ')
                self.main()

        except NameError as e:
            print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! [ Alert! Error Occurred! ]!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ')
            print('Error Name : ', e)

            for rep in range(1, 6):
                ws.Beep(1500, 2000)
            time.sleep(5)

    def text_crawl(self):

        file_name = input('Input your File name : ')
        time.sleep(0.5)
        i_url = input('Input your url : ')
        time.sleep(0.5)
        i_tag = input('Input your html tag : ')
        time.sleep(0.5)
        i_val = input('Input your html value : ')
        time.sleep(0.5)
        i_class = input('Input your class (None or class_name): ')
        time.sleep(0.5)
        i_func = input('Input your function (None or All or all) : ')
        time.sleep(0.5)

        craw = kcc(i_url)
        string = ''

        if (i_class == 'None') and (i_func == 'None'):
            i_class = self.none(i_class)
            i_func = self.none(i_func)

        elif i_class == 'None':
            i_class = self.none(i_class)

        elif i_func == 'None':
            i_func = self.none(i_func)

        if not os.path.isdir('text_data'):
            os.mkdir('text_data')
        os.chdir('text_data')

        if i_func != None:

            data = craw.get_obj(i_tag, i_val, i_class, i_func)
            for rep in data:
                string += rep.text + '\n'

            with open(file_name + '.txt', 'a') as text:
                text.write(string)

        elif i_func == None:

            data = craw.get_obj(i_tag, i_val, i_class, i_func).text
            with open(file_name + '.txt', 'a') as text:
                text.write(data)

        print('=====================================[ Crawling Complete! ]=====================================')
        print('=====================================[     File Saved!    ]=====================================')

    def img_crawl(self):
        print('This function not ready yet... :(')
        if not os.path.isdir('img_data'):
            os.mkdir('img_data')
        os.chdir('img_data')

    def is_continue(self):

        cond = True
        while cond:

            print('\n\n')
            i_continue = input('[1]. Continue \t\t\t [!].Quit \n'
                               '[?]. Are you continue crawling ? : ')

            if i_continue == '1':
                self.main()

            elif i_continue == '!':
                print('\n')
                are = input('[1].Yes \t\t\t [2]. No \n'
                            '[?]. Are you sure? : ')
                if are == '1':
                    print('Good bye! \n')
                    sys.exit()
                elif are == '2':
                    self.main()
                else:
                    print('You must select another option')
                    self.main()

            else:
                print('You must select another option. ')
                self.main()

pwd = os.getcwd()
craw = kim_crawl_class_mk2()
craw.main()
time.sleep(2)
os.chdir(pwd)
craw.is_continue()

[1] 이후 이대로 사용하지 않고, pyinstaller를 이용해서 하나의 프로그램으로 만들어서 사용했다.

[2] 코드가 굉장히 지저분하고, 복잡해서 개량할 필요성을 느껴 Kim_crawl_mk2.1을 개발했다.

 

'Data Science > Crawler' 카테고리의 다른 글

#4. Kim_crawl_MK.1.1 (김_크롤_1.1호)  (0) 2019.07.09
#3. Kim_crawl_MK.2.1 (김_크롤_2.1호)  (0) 2019.06.28
#1. Kim_crawl_MK.1 (김_크롤_1호)  (0) 2019.06.27
Comments