pascallijuan/python_.idea_.name

## python_.idea_.name
python

## python_.idea_encodings.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>


## python_.idea_misc.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.3 (C:/Python27/python.exe)" project-jdk-type="Python SDK" />
</project>


## python_.idea_modules.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/python.iml" filepath="$PROJECT_DIR$/.idea/python.iml" />
    </modules>
  </component>
</project>


## python_.idea_other.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="PyDocumentationSettings">
    <option name="myDocStringFormat" value="Plain" />
  </component>
</project>


## python_.idea_python.iml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>


## python_.idea_scopes_scope_settings.xml
<component name="DependencyValidationManager">
  <state>
    <option name="SKIP_IMPORT_STATEMENTS" value="false" />
  </state>
</component>

## python_.idea_testrunner.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="TestRunnerService">
    <option name="projectConfiguration" value="Unittests" />
    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
  </component>
</project>


## python_.idea_vcs.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="" vcs="" />
  </component>
</project>


## python_newspider_.idea_.name
newspider

## python_newspider_.idea_encodings.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
</project>


## python_newspider_.idea_inspectionProfiles_profiles_settings.xml
<component name="InspectionProjectProfileManager">
  <settings>
    <option name="PROJECT_PROFILE" />
    <option name="USE_PROJECT_PROFILE" value="false" />
    <version value="1.0" />
  </settings>
</component>

## python_newspider_.idea_misc.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.3 (C:/Python27/python.exe)" project-jdk-type="Python SDK" />
</project>


## python_newspider_.idea_modules.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ProjectModuleManager">
    <modules>
      <module fileurl="file://$PROJECT_DIR$/.idea/newspider.iml" filepath="$PROJECT_DIR$/.idea/newspider.iml" />
    </modules>
  </component>
</project>


## python_newspider_.idea_newspider.iml
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
  <component name="NewModuleRootManager">
    <content url="file://$MODULE_DIR$" />
    <orderEntry type="inheritedJdk" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
</module>


## python_newspider_.idea_other.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="PyDocumentationSettings">
    <option name="myDocStringFormat" value="Plain" />
  </component>
</project>


## python_newspider_.idea_scopes_scope_settings.xml
<component name="DependencyValidationManager">
  <state>
    <option name="SKIP_IMPORT_STATEMENTS" value="false" />
  </state>
</component>

## python_newspider_.idea_testrunner.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="TestRunnerService">
    <option name="projectConfiguration" value="Unittests" />
    <option name="PROJECT_TEST_RUNNER" value="Unittests" />
  </component>
</project>


## python_newspider_.idea_vcs.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="VcsDirectoryMappings">
    <mapping directory="" vcs="" />
  </component>
</project>


## python_newspider_.idea_workspace.xml
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
  <component name="ChangeListManager">
    <list default="true" id="7dc9904d-98fe-4bed-8b89-bf5f25178704" name="Default" comment="" />
    <ignored path="newspider.iws" />
    <ignored path=".idea/workspace.xml" />
    <option name="TRACKING_ENABLED" value="true" />
    <option name="SHOW_DIALOG" value="false" />
    <option name="HIGHLIGHT_CONFLICTS" value="true" />
    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
    <option name="LAST_RESOLUTION" value="IGNORE" />
  </component>
  <component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
  <component name="CreatePatchCommitExecutor">
    <option name="PATCH_PATH" value="" />
  </component>
  <component name="DaemonCodeAnalyzer">
    <disable_hints />
  </component>
  <component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
  <component name="ProjectFrameBounds">
    <option name="x" value="-8" />
    <option name="y" value="-8" />
    <option name="width" value="1936" />
    <option name="height" value="1066" />
  </component>
  <component name="ProjectLevelVcsManager" settingsEditedManually="false">
    <OptionsSetting value="true" id="Add" />
    <OptionsSetting value="true" id="Remove" />
    <OptionsSetting value="true" id="Checkout" />
    <OptionsSetting value="true" id="Update" />
    <OptionsSetting value="true" id="Status" />
    <OptionsSetting value="true" id="Edit" />
    <ConfirmationsSetting value="0" id="Add" />
    <ConfirmationsSetting value="0" id="Remove" />
  </component>
  <component name="ProjectReloadState">
    <option name="STATE" value="0" />
  </component>
  <component name="ProjectView">
    <navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5">
      <flattenPackages />
      <showMembers />
      <showModules />
      <showLibraryContents />
      <hideEmptyPackages />
      <abbreviatePackageNames />
      <autoscrollToSource />
      <autoscrollFromSource />
      <sortByType />
    </navigator>
    <panes>
      <pane id="ProjectPane">
        <subPane>
          <PATH>
            <PATH_ELEMENT>
              <option name="myItemId" value="newspider" />
              <option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
            </PATH_ELEMENT>
          </PATH>
        </subPane>
      </pane>
      <pane id="Scope" />
    </panes>
  </component>
  <component name="PropertiesComponent">
    <property name="WebServerToolWindowFactoryState" value="false" />
    <property name="last_opened_file_path" value="$PROJECT_DIR$/.." />
  </component>
  <component name="RunManager">
    <list size="0" />
  </component>
  <component name="ShelveChangesManager" show_recycled="false" />
  <component name="TaskManager">
    <task active="true" id="Default" summary="Default task">
      <created>1366584457012</created>
      <updated>1366584457012</updated>
    </task>
    <servers />
  </component>
  <component name="ToolWindowManager">
    <frame x="-8" y="-8" width="1936" height="1066" extended-state="0" />
    <editor active="false" />
    <layout>
      <window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
      <window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
      <window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
      <window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="true" content_ui="tabs" />
      <window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.2497332" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
      <window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
      <window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
      <window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
      <window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
      <window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
      <window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
      <window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
      <window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
      <window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
      <window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
      <window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
      <window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
    </layout>
  </component>
  <component name="VcsContentAnnotationSettings">
    <option name="myLimit" value="2678400000" />
  </component>
  <component name="VcsManagerConfiguration">
    <option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" />
    <option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="false" />
    <option name="CHECK_NEW_TODO" value="true" />
    <option name="myTodoPanelSettings">
      <value>
        <are-packages-shown value="false" />
        <are-modules-shown value="false" />
        <flatten-packages value="false" />
        <is-autoscroll-to-source value="false" />
      </value>
    </option>
    <option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" />
    <option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" />
    <option name="PERFORM_EDIT_IN_BACKGROUND" value="true" />
    <option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" />
    <option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" />
    <option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" />
    <option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" />
    <option name="CHANGED_ON_SERVER_INTERVAL" value="60" />
    <option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" />
    <option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" />
    <option name="DEFAULT_PATCH_EXTENSION" value="patch" />
    <option name="SHORT_DIFF_HORISONTALLY" value="true" />
    <option name="SHORT_DIFF_EXTRA_LINES" value="2" />
    <option name="SOFT_WRAPS_IN_SHORT_DIFF" value="true" />
    <option name="INCLUDE_TEXT_INTO_PATCH" value="false" />
    <option name="INCLUDE_TEXT_INTO_SHELF" value="false" />
    <option name="SHOW_FILE_HISTORY_DETAILS" value="true" />
    <option name="SHOW_VCS_ERROR_NOTIFICATIONS" value="true" />
    <option name="FORCE_NON_EMPTY_COMMENT" value="false" />
    <option name="CLEAR_INITIAL_COMMIT_MESSAGE" value="false" />
    <option name="LAST_COMMIT_MESSAGE" />
    <option name="MAKE_NEW_CHANGELIST_ACTIVE" value="false" />
    <option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" />
    <option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" />
    <option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" />
    <option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" />
    <option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" />
    <option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" />
    <option name="ACTIVE_VCS_NAME" />
    <option name="UPDATE_GROUP_BY_PACKAGES" value="false" />
    <option name="UPDATE_GROUP_BY_CHANGELIST" value="false" />
    <option name="SHOW_FILE_HISTORY_AS_TREE" value="false" />
    <option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" />
  </component>
  <component name="XDebuggerManager">
    <breakpoint-manager />
  </component>
</project>


## python_newspider_cate.xml
<?xml version="1.0" encoding="utf-8"?>
<root>
  <site siteName="ffdy" readMode="normal" url="http://www.ffdy.cc/s" daily="0.3" charset="utf-8">
      <linkRules>
          <rule type="reg" value="(type/movie|movie)" />
      </linkRules>
      <targets>
          <target name="info">
              <urlRules>
                  <rule type="reg" value=".*/movie/(\d+).html" />
              </urlRules>
              <model dataType="array">
					<field name="title">
						<parsers>
							<parser type="text" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="url">
						<parsers>
							<parser type="pageurl" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="detail_pic">
						<parsers>
							<parser type="text" xpath="//div[@class='detail_pic']/span/img/@src" />
						</parsers>
					</field>
                  <field name="director">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='导演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
                  <field name="leading">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='主演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="type">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='类型：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="area">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='国家/地区：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="show_day">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='上映日期：']/../td[last()]/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="comment">
                      <parsers>
                          <parser type="html" xpath="//div[@class='filmcontents']" reg="u" />
                      </parsers>
                  </field>
              </model>
          </target>
      </targets>
  </site>
</root>

## python_newspider_index.py
# -*- coding: utf-8 -*-
'''
爬虫引导入口 启用多线程处理该事物
'''
__author__ = 'ShengYue'
import threading
import time
from lxml import etree
import os
from model.log import *
from model.db import *
from model.curl import *


'''
爬虫调度接口
'''
class newsspider:
    def __init__(self, xpath_file):
        self.xpath_file = xpath_file
        logging.info(u'-----------------------------------------------------------------------------')
        logging.info(u'创建newsspider对象: '+xpath_file)
        try:
            if os.path.exists(xpath_file):
                self.config_tree = etree.ElementTree(file=xpath_file)
                sites = self.config_tree.xpath('//site')
                if sites == []:
                    logging.error(u'网站配置文件格式不对'+xpath_file)
                    exit(0)

                site = sites[0]

                # 网址获取
                self.site_url = site.get('url')
                if self.site_url == None:
                    logging.error(u'网站配置文件网址获取失败'+xpath_file)
                    exit(0)

                # 网站名
                self.site_name = site.get('siteName')
                self.daily = float(site.get('daily'))

                # 网站链接读取规则
                self.readMode = site.get('readMode')
                print self.readMode

                self.linkRule = self.config_tree.xpath('//linkRules/rule')
                self.infoUrlRule = self.config_tree.xpath('//urlRules/rule')
                self.infoRule = self.config_tree.xpath('//targets/target/model/field')
                self.linkdb = linkdb(self.site_name)

                self.run(self.site_url)
            else:
                logging.error(u'配置文件不存在: '+xpath_file)
                exit(0)
        except Exception, e:
            logging.error(u'读取配置文件失败: '+xpath_file+', --'+e.message)
            exit(0)

    def run(self, url):
        logging.info(u'开始执行配置文件: '+self.xpath_file)
        if self.readMode == 'normal':
            self.autoRead(url)

        elif self.readMode == 'match':
            print u'没有找到匹配模式'
        else:
            logging.error(u'没有找到读取规则'+self.xpath_file)
            exit(0)

    def autoRead(self, url):

        time.sleep(self.daily)
        try:
            # 初次
            if url != None:
                url = self.site_url

            else:
                urlData = self.linkdb.get_url(self.site_name)
                if urlData == None:
                    logging.info(self.url+u'读取成功')
                    exit(0)
                url = urlData[1]

                #更新
                self.linkdb.update_url(urlData[0])

            html = curl().read(url)

            if html == None:
                logging.error(u'获取HTML失败: '+url)

            '''获取链接'''


        except Exception, e:
            logging.error(u'执行失败'+self.xpath_file+', --')

    def close(self):
        logging.info(u'执行完毕: ')
        self.linkdb.close()


'''
创建线程实例
'''
class timer(threading.Thread): #The timer class is derived from the class threading.Thread
    def __init__(self, num, interval):
        threading.Thread.__init__(self)
        self.thread_num = num
        self.i = 0
        self.interval = interval
        self.thread_stop = False

    def run(self): #Overwrite run() method, put what you want the thread do here
        while not self.thread_stop:
            self.i = self.i+1
            print '%d Thread Object(%d), Time:%s\n' %(self.i, self.thread_num, time.ctime())
            time.sleep(self.interval)
    def stop(self):
        self.thread_stop = True
def test():
    thread1 = timer(1, 0.01)
    thread2 = timer(2, 0.2)
    thread1.start()
    thread2.start()
    newsspider()
    time.sleep(5)
    thread1.stop()
    thread2.stop()
    return

if __name__ == '__main__':
    #test()
    ns = newsspider('yousheng.xml')
    #ns.run()
    ns.close()


## python_newspider_model___init__.py
__author__ = 'ShengYue'

## python_newspider_model_curl.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import urllib2
import time
import random
import os.path
import urllib
from log import *
class curl:
    # 链接表
    urlList = {}

    req = None

    #字符编码处理
    def mdcode(self, data, url=''):
       # code = chardet.detect(data)
        #return data.decode(code['encoding'])
        for c in ('utf-8', 'gbk', 'gb2312'):
            try:
                return data.decode(c)
            except Exception, e:
                logging.error(u'编码出错: '+url+', --'+e.message)

    def read(self,url, config={}):
        try:
            url = urllib.unquote(url)

            header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0'}
            self.req = urllib2.Request(url,headers=header)

            # 添加头信息
            for key in config:
                self.req.add_header(key, config[key])

            res = urllib2.urlopen(self.req)
            html = res.read()
            res.close()

            return self.mdcode(html, url)

        except Exception, e:
            logging.error(u'获取HTML失败:'+url+'--'+e.message)


    def getFileName(self):
        return time.strftime('%y%m%d%H%I',time.localtime(time.time()))+'-'+ str(random.randint(10,99))+'-'+str(random.randint(10,99))

    def down(self,url, path=''):

        ext = os.path.splitext(url)[-1]
        socket = urllib2.urlopen(url)
        data = socket.read()
        fileName =self.getFileName()+ext
        with open( './images/'+fileName, "wb") as jpg:
            jpg.write(data)
        socket.close()

        return '/uploads/images/'+fileName

## python_newspider_model_db.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import hashlib
import sqlite3
from log import *

'''
链接管理
'''
class linkdb:
    def __init__(self, dbname):
        try:
            self.conn = sqlite3.connect(':memory:')
            # 创建临时表
            self.cur = self.conn.cursor()

            # 创建内存表
            self.cur.execute('''CREATE TABLE IF NOT EXISTS `links` (
              `id` INTEGER  PRIMARY KEY AUTOINCREMENT,
              `link` varchar(300) NOT NULL,
              `web_name` varchar(16) NOT NULL,
              `md5` varchar(32) NOT NULL,
              `status` tinyint(1) NOT NULL DEFAULT '0'
            );''')

            self.cur.execute('''CREATE INDEX IF NOT EXISTS status on links (status);''')
            self.cur.execute('''CREATE INDEX IF NOT EXISTS md5 on links (md5);''')
            self.cur.execute('''CREATE INDEX IF NOT EXISTS web_name on links (web_name);''')
            #self.conn.commit()

        except Exception, e:
            logging.error(u'数据库初始化失败:'+e.message)
            exit(0)

    def get_url(self, web_name):
        try:
            self.cur.execute("SELECT * FROM links WHERE web_name = %s AND status=0", web_name)
            return self.cur.fetchone()
        except Exception, e:
            logging.error(u'获取链接失败:'+web_name+':'+e.message)
            exit(0)

    def check_url(self, url):
        try:
            md5 = hashlib.md5(url).hexdigest()
            return self.cur.execute("SELECT * FROM links WHERE `md5`=%s", md5)
        except Exception, e:
            logging.error(u'链接验证失败'+url+':'+e.message)

    def add_url(self, url, web_name):
        try:
            md5 = hashlib.md5(url).hexdigest()
            self.cur.execute("INSERT INTO links(`link`, `web_name`, `md5`)VALUES(%s, %s, %s)", [url, web_name, md5])
            #self.conn.commit()

        except Exception, e:
            logging.error(u'链接添加失败'+url+', '+web_name+':'+e.message)

    def update_url(self, id):
        try:
            self.cur.execute("UPDATE links SET status = 1 WHERE id=%s", id);
            #self.conn.commit()
            return True
        except Exception,e:
            logging.error(u'链接更新失败'+id+':'+e.message)

    def close(self):
        try:
            self.cur.close()
            self.conn.close()
        except Exception, e:
            logging.error(u'数据库关闭失败'+id+':'+e.message)

## python_newspider_model_log.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',datefmt='%Y-%m-%d %H:%M:%S', filename='system.log',filemode='a+')

## python_newspider_model_match.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import lxml
import lxml.etree
from lxml.html.clean import Cleaner
import re
from log import *

class match:
    '''
    修复HTML
    创建XPATH对象
    '''
    def __init__(self, html, url):
        cleaner = Cleaner(style=True, scripts=True,page_structure=False, safe_attrs_only=False)
        html = cleaner.clean_html(html)
        del cleaner

        self.etree = lxml.html.fromstring(html)
        self.etree.make_links_absolute( base_url=url, resolve_base_href=True)

    '''
    获取所有可以匹配链接
    '''
    def get_all_links(self, link_match, url):
        links = []
        all_links = self.etree.xpath('//a')

        for match in link_match:
            regLink = re.compile(url + match.get('value'))
            for a in all_links:
                href = a.get('href')
                if href == None:
                    continue

                if regLink.match(href) != None:
                    links.append(href)

        del all_links
        return links

    '''
    验证是否是详细页链接
    '''
    def check_info_link(self, link_match, url):
        for match in link_match:
            regLink = re.compile(match.get('value'))

            if regLink.match(url) != None:
                return True
            else:
                return False

    def match_info(self, match):
        try:
            retrun_data = {}
            for param in match:
                # 匹配当前配置项
                name = param.get('name')
                if name == None:
                    logging.error(u'无法获取字段名')
                    return
                param_tree = lxml.html.fromstring(lxml.etree.tostring(param))

                # 匹配单独项所有规则
                for node in param_tree.xpath('//parsers/parser'):
                    xpath = node.get('xpath')
                    type = node.get('type')
                    info_tree = self.etree.xpath(xpath)

                    try:
                        if type == None:
                            logging.error('字段数据类型获取不到'+name)

                        # 纯文本字段
                        elif type == 'text':
                            retrun_data[name] = ''
                            for val in info_tree:
                                retrun_data[name] = val.strip()

                        elif type == 'text_array':
                            arr = []
                            retrun_data[name] = arr
                            for val in info_tree:
                                if val.strip() == '':
                                    continue
                                arr.append(val.strip())
                            retrun_data[name] = arr

                        elif type == 'html':
                            retrun_data[name] = ''
                            for val in info_tree:
                                infohtml = lxml.etree.tostring(val,encoding="utf-8",method="html")
                                infohtml = infohtml.strip()
                                reg = re.compile(r'<[!/]?\b(?!(\bpre\b|\bli\b|\bp\b|\bbr\b|\bspan\b|\bimg\b))+\b\s*[^>]*>|[\s\r\n\t]+')
                                infohtml = reg.sub(' ',infohtml).strip()
                                retrun_data[name] = infohtml

                    except Exception, e:
                        print e.message;
        except Exception, e:
            logging.error(u'获取详细信息失败: '+e.message())


    def close(self):
        del self.etree


## python_newspider_system.log
2013-05-09 07:22:21 index.py[line:21] INFO -----------------------------------------------------------------------------
2013-05-09 07:22:21 index.py[line:22] INFO 创建newsspider对象: yousheng.xml
2013-05-09 07:22:21 index.py[line:61] INFO 开始执行配置文件: yousheng.xml
2013-05-09 07:22:22 curl.py[line:23] ERROR 编码出错: http://www.tingchina.com/, --
2013-05-09 07:22:22 index.py[line:101] INFO 执行完毕:

## python_newspider_yousheng.xml
<?xml version="1.0" encoding="utf-8"?>
<root>
  <site siteName="ffdy" readMode="normal" url="http://www.tingchina.com/" daily="0.3" charset="utf-8">
      <linkRules>
          <rule type="reg" value="(yousheng)" />
      </linkRules>
      <targets>
          <target name="info">
              <urlRules>
                  <rule type="reg" value=".*/yousheng/disp_(\d+).htm" />
              </urlRules>
              <model dataType="array">
					<field name="title">
						<parsers>
							<parser type="text" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="url">
						<parsers>
							<parser type="pageurl" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="detail_pic">
						<parsers>
							<parser type="text" xpath="//div[@class='detail_pic']/span/img/@src" />
						</parsers>
					</field>
                  <field name="director">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='导演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
                  <field name="leading">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='主演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="type">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='类型：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="area">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='国家/地区：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="show_day">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='上映日期：']/../td[last()]/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="comment">
                      <parsers>
                          <parser type="html" xpath="//div[@class='filmcontents']" reg="u" />
                      </parsers>
                  </field>
              </model>
          </target>
      </targets>
  </site>
</root>

## python_spiderling_.gitignore
/.idea
*backup
*.bak
*tpl.php
/.*
*.zip
*.pyc

## python_spiderling_327id.txt
{"comment": ["\u4eba\u5230\u4e2d\u5e74\u7684\u7b80\u5357\u4fca\u662f\u4e2a\u4e0d\u592a\u6210\u529f\u7684\u751f\u610f\u4eba\uff0c\u62e5\u6709\u4e00\u4e2a\u5178\u578b\u7684\u4e2d\u4ea7\u9636\u7ea7\u5bb6\u5ead\uff0c\u4ed6\u4e0e\u59bb\u5b50\u548c\u4e24\u4e2a\u5b69\u5b50\uff0c\u4ee5\u53ca\u5cb3\u6bcd\u4f4f\u5728\u53f0\u5317\u4e00\u95f4\u666e\u901a\u516c\u5bd3\u623f\u5b50\u91cc\u3002\u59bb\u5b50\u662f\u4e00\u4e2a\u8106\u5f31\u7684\u5973\u4eba\uff0c\u56e0\u4e3a\u6bcd\u4eb2\u7684\u75c5\u800c\u5fc3\u529b\u4ea4\u7601\u3002\u5c0f\u513f\u5b50\u513f\u5b50\u53ea\u670910\u5c81\u5374\u975e\u5e38\u65e9\u719f\uff0c\u559c\u6b22\u62cd\u6444\u4eba\u7684\u80cc\u5f71\u548c\u63d0\u95ee\u54f2\u5b66\u95ee\u9898\u3002\u5927\u5973\u513f\u662f\u4e00\u4e2a\u97f3\u4e50\u5b66\u751f\uff0c\u56e0\u9677\u5165\u4e86\u9519\u8bef\u7684\u7231\u60c5\u800c\u5f00\u59cb\u5c1d\u5230\u4eba\u751f\u7684\u82e6\u6da9\u3002\u4e00\u6b21\u5728\u5c0f\u8205\u5b50\u7684\u5a5a\u793c\u4e0a\uff0c\u7b80\u5357\u4fca\u9047\u5230\u4e86\u5e74\u8f7b\u65f6\u7684\u5973\u53cb\uff0c\u91cd\u65b0\u71c3\u8d77\u4e86\u4e45\u8fdd\u7684\u7231\u60c5\u2026\u2026", "2000\u5e74\uff0c\u662f\u4e9a\u6d32\u7535\u5f71\u5927\u4e30\u6536\u7684\u4e00\u5e74\uff0c5\u6708\u4e3e\u884c\u7684\u621b\u7eb3\u7535\u5f71\u8282\u51e0\u4e4e\u6210\u4e86\u201c\u4e9a\u6d32\u7535\u5f71\u7684\u8282\u65e5\u201d\uff0c\u5728\u8fd9\u6b21\u7535\u5f71\u8282\u4e0a\uff0c\u300a\u82b1\u6837\u5e74\u534e\u300b\u83b7\u5f97\u4e86\u6700\u4f73\u5f71\u7247\u3001\u6700\u4f73\u7537\u4e3b\u89d2\u4e24\u9879\u5927\u5956\uff0c\u5bfc\u6f14\u738b\u5bb6\u536b\u5927\u51fa\u98ce\u5934\uff0c\u800c\u300a\u4e00\u4e00\u300b\u7684\u5bfc\u6f14\u6768\u5fb7\u660c\u751a\u81f3\u6bd4\u738b\u5bb6\u536b\u66f4\u52a0\u5f15\u4eba\u6ce8\u76ee\uff0c\u56e0\u4e3a\u4ed6\u83b7\u5f97\u4e86\u5c5e\u4e8e\u5bfc\u6f14\u7684\u6700\u9ad8\u8363\u8a89\u2014\u2014\u6700\u4f73\u5bfc\u6f14\u5956\u3002\u4f17\u591a\u7684\u89c2\u4f17\u4e3a\u4ed6\u7684\u8fd9\u90e8\u590d\u6742\u3001\u7ec6\u81f4\u800c\u4f18\u96c5\u7684\u5f71\u7247\u800c\u503e\u5012\uff0c\u5e76\u5bf9\u534e\u8bed\u7535\u5f71\u4ea7\u751f\u4e86\u6781\u5927\u5174\u8da3\u3002\u300a\u4e00\u4e00\u300b\u4e5f\u6210\u529f\u5730\u8fdb\u5165\u4e86\u7f8e\u56fd\u5e02\u573a\uff0c\u6210\u4e3a\u88ab\u7f8e\u56fd\u666e\u901a\u89c2\u4f17\u6240\u770b\u5230\u7684\u7b2c\u4e00\u90e8\u6768\u5fb7\u660c\u5bfc\u6f14\u7684\u5f71\u7247\uff0c\u6210\u4e3a\u4ed6\u7684\u7535\u5f71\u5927\u6b65\u8fc8\u8fdb\u66f4\u5e7f\u9614\u7684\u56fd\u9645\u5e02\u573a\u7684\u7b2c\u4e00\u6b65\u3002"], "title": "\u4e00\u4e00", "url": "http://www.ffdy.cc/movie/10450.html", "leading": ["\u5434\u5ff5\u771f", "\u91d1\u71d5\u73b2", "Issei Ogata", "Kelly Lee (II)", "Jonathan Chang", "Hsi-Sheng Chen", "Su-Yun Ko", "Michael Tao", "\u8427\u6dd1\u614e", "Adrian Lin", "Pang Chang Yu", "Ru-Yun Tang", "Shu-Yuan Hsu", "Hsin-Yi Tseng", "\u9648\u4ee5\u6587", "Tang Congsheng"], "area": "\u4e2d\u56fd\u53f0\u6e7e", "detail_pic": "http://img.kankanba.com/cs/250X350/2/cbe3d833e70d0a44b26ff5cf639fdcc2.jpg", "director": ["\u6768\u5fb7\u660c"], "show_day": "2000-05-14 \u6cd5\u56fd", "type": ["\u5267\u60c5"]}

## python_spiderling___init__.py
__author__ = 'ShengYue'

## python_spiderling_cate.xml
<?xml version="1.0" encoding="utf-8"?>
<root>
  <site siteName="ffdy" url="http://www.ffdy.cc/" daily="0.3" log="ffdy.log" error="ffdy_error.log" charset="utf-8">
      <linkRules>
          <rule type="reg" value="(type/movie|movie)" />
      </linkRules>
      <targets>
          <target name="info">
              <urlRules>
                  <rule type="reg" value=".*/movie/(\d+).html" />
              </urlRules>
              <model dataType="array">
					<field name="title">
						<parsers>
							<parser type="text" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="url">
						<parsers>
							<parser type="pageurl" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="detail_pic">
						<parsers>
							<parser type="text" xpath="//div[@class='detail_pic']/span/img/@src" />
						</parsers>
					</field>
                  <field name="director">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='导演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
                  <field name="leading">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='主演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="type">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='类型：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="area">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='国家/地区：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="show_day">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='上映日期：']/../td[last()]/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="comment">
                      <parsers>
                          <parser type="html" xpath="//div[@class='filmcontents']" reg="u" />
                      </parsers>
                  </field>
              </model>
          </target>
      </targets>
  </site>
</root>

## python_spiderling_ffdy.log
Wed, 10 Apr 2013 20:10:52 log.py[line:21] INFO 网站读取完成

## python_spiderling_frame.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import wx
from lxml import etree
from index import main


class DemoFrame(wx.Frame):
    def __init__(self):
        self.cateList = []
        wx.Frame.__init__(self, None, -1, u"load goods",size=(400,200))


        self.draw()

    def draw(self):
        self.panel = wx.Panel(self, -1)
        wx.StaticText(self.panel, -1, u"输入网址:", (15, 15))
        wx.StaticText(self.panel, -1, u"选择分类:", (15, 50))
        sampleList = self.getCate()
        self.getCateList(sampleList)

        self.cate = wx.ComboBox(self.panel, -1, self.cateList[0], (80, 50), wx.DefaultSize, self.cateList)

        self.text = wx.TextCtrl(self.panel,-1,value='',pos=(80,15),size=(300,24))

        self.button = wx.Button(self.panel, -1, u"抓取", pos=(15, 90))
        self.Bind(wx.EVT_BUTTON, self.OnClick, self.button)

    def OnClick(self, event):
        self.button.SetLabel(u'抓取中...')
        self.button.Enable(False)

        index = main()
        bool = index.init(self.text.GetValue(),self.cate.GetValue())
        if bool:
            self.button.SetLabel(u'抓取')
            self.button.Enable(True)

    def getCateList(self, cate):
        for s in cate:
            if type(s) == type([]):
                self.getCateList(s)
            else:
                self.cateList.append(s)


    def getCate(self, xpath=None, p=''):
        ret=[]
        if xpath == None:
            xtree = etree.parse(open('cate.xml'))
            cates = xtree.xpath('/root/cate')
            for cate in cates:
                row = cate.getchildren()
                if row :
                    ret.append(p+cate.get('name'))
                    ret.append(self.getCate(row, (p+cate.get('name')+'->')))
                else:
                    ret.append(p+cate.get('name'))
        else:
            for cate in xpath:
                row = cate.getchildren()
                if row:
                    ret.append(p+cate.get('name'))
                    ret.append(self.getCate(row,(p+cate.get('name')+'->')))
                else:
                    ret.append(p+cate.get('name'))
        return ret


app = wx.PySimpleApp()
frame = DemoFrame()
frame.Show()
app.MainLoop()

## python_spiderling_index.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
from lxml import etree
from os.path import join, getsize
from model.curl import curl
import csv
import re
import string
import re

header = ("*:通用商品类型","bn:商品货号","ibn:规格货号","col:分类","col:品牌","col:市场价","col:成本价","col:销售价","col:商品名称",
    "col:上架","col:规格","price:普通会员","price:高级会员","price:VIP会员","col:缩略图","col:图片文件","col:商品简介",
    "col:详细介绍","col:重量","col:单位","col:库存","col:货位","col:大图片","col:小图片" )

class main:
    def init(self, url, cate):
        self.curl = curl()

        html = self.curl.read(url)
        #fop = open('./html.html')
        #print getsize('./html.html')
        #fop.write(html)
        #try:
        #    html = fop.read(getsize('./html.html'))
        #    #html =  self.curl.mdcode(html)
        #finally:
        #    fop.close()
        #print html
        data = {}

        xtree = etree.HTML(html)

        # 标题
        title = xtree.xpath('//h1')
        data['name'] = string.strip(title[0].text)

        #价格
        price = xtree.xpath('//span[@id="ECS_SHOPPRICE"]')
        data['price'] = string.strip(price[0].text)

        #原价
        oldprice = xtree.xpath('//span[@class="xline"]')
        oldprice = re.findall(re.compile('[\d.]*'), string.strip(oldprice[0].text))
        data['oldprice'] = oldprice[1]


        #品牌
        #brand = xtree.xpath('//*[@id="ECS_FORMBUY"]/div/div[3]/span[2]/a')
        #data['brand'] = string.strip(brand[0].text)

        #货号
        huohao = xtree.xpath('//*[@id="ECS_FORMBUY"]/div/p/span[2]')
        data['ibn'] = string.strip(huohao[0].text)

        #大图片
        bimg = xtree.xpath('//*[@id="thumg"]')

        imgurl = string.strip(bimg[0].get('src'))
        data['bimg'] = self.curl.down(imgurl)

        #大图片
        data['simg'] = data['bimg']

        #详细
        dest = xtree.xpath('//div[@class="deszone"]/div[@class="zones"]')
        des = etree.tostring(dest[0], encoding='utf-8')
        #data['des'] = des
        reg = re.compile('\s',re.I)
        s = reg.subn(' ', des)
        data['des'] = s[0]
        data['des'] = data['des'].replace( 'src2','src')

        #print data['des']
        #下载所有图片
        #ireg = re.compile("<img\b[^<>]*?\bsrc[2\s\t\r\n]*=[\s\t\r\n]*['\"]?[\s\t\r\n]*(\?<imgUrl>[^\s\t\r\n'\"<>]*)[^<>]*?/?[\s\t\r\n]*>")
        imgreg = re.compile(r"<img\b[^<>]*?\bsrc[2\s\t\r\n]*=[\s\t\r\n]*['\"]?[\s\t\r\n]*([^\s\t\r\n'\"<>]*)[^<>]*?/?[\s\t\r\n]*>")

        ilist = imgreg.findall(data['des'])
        for img in ilist:
            try:
                print u'下载'+img
                new = self.curl.down(img)
                data['des'] = data['des'].replace( img,new)
            except:
                print u'下载失败'+img

        header = ("*:通用商品类型","bn:商品货号","ibn:规格货号","col:分类","col:品牌","col:市场价","col:成本价","col:销售价","col:商品名称",
                  "col:上架","col:规格","price:普通会员","price:高级会员","price:VIP会员","col:缩略图","col:图片文件","col:商品简介",
                  "col:详细介绍","col:重量","col:单位","col:库存","col:货位","col:大图片","col:小图片")
        #print cate
        #cate = ''
        # 拼字段
        row = (self.curl.mdcode('通用商品类型'), self.curl.mdcode(data['ibn']),'',self.curl.mdcode(cate),'',self.curl.mdcode(data['oldprice']),self.curl.mdcode(data['price']),self.curl.mdcode(data['price']),self.curl.mdcode(data['name']),'Y','',self.curl.mdcode(data['price']),self.curl.mdcode(data['price']),self.curl.mdcode(data['price']),self.curl.mdcode(data['simg']),self.curl.mdcode(data['bimg']),'',self.curl.mdcode(data['des']),'0.000','','','',self.curl.mdcode(data['bimg']),self.curl.mdcode(data['simg']))
        fop = open('tmp.csv','w+')
        writer = csv.writer(fop)
        writer.writerow(header)
        writer.writerow(row)
        print u'完成'
        fop.close()
        return True

#mai = main()
#mai.init()
#curls = curl()
#curls.down('http://www.msex.com/static/upload/1303121657296625.jpg',{})

## python_spiderling_model___init__.py
__author__ = 'ShengYue'

## python_spiderling_model_curl.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import urllib2
import time
import random
import os.path
import urllib
from log import log
class curl:
    # 链接表
    urlList = {}

    req = None

    #字符编码处理
    def mdcode(self, data):
       # code = chardet.detect(data)
        #return data.decode(code['encoding'])
        for c in ('utf-8', 'gbk', 'gb2312'):
            try:
                return data.decode(c)
            except:
                pass
#
#        for c in ('utf-8', 'gbk', 'gb2312'):
#            try:
#                return data.encode( 'utf-8' )
#            except:
#                pass
#
#        return data
    #
    def getBaseUrl(self, base_url, link):
        print ''

    def read(self,url, config={}):
        try:
            url = urllib.unquote(url)


            header = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1; rv:19.0) Gecko/20100101 Firefox/19.0'}
            self.req = urllib2.Request(url,headers=header)

            # 添加头信息
            for key in config:
                self.req.add_header(key, config[key])

            res = urllib2.urlopen(self.req)
            html = res.read()

            res.close()

            # code = chardet.detect(html)
            return self.mdcode(html)

        except:
            print u'获取HTML失败'
            return ''

    def getFileName(self):
        return time.strftime('%y%m%d%H%I',time.localtime(time.time()))+'-'+ str(random.randint(10,99))+'-'+str(random.randint(10,99))

    def down(self,url):

        ext = os.path.splitext(url)[-1]
        socket = urllib2.urlopen(url)
        data = socket.read()
        fileName =self.getFileName()+ext
        with open( './images/'+fileName, "wb") as jpg:
            jpg.write(data)
        socket.close()

        return '/uploads/images/'+fileName

## python_spiderling_model_db.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import MySQLdb
import hashlib

db_host = '127.0.0.1'
db_name = 'root'
db_passwd = 'LEsc2008'
db_dbname = 'python'
db_port = 3306

class db:
    #self.conn = None
    def __init__(self):
        try:
            self.conn = MySQLdb.connect(host=db_host,user=db_name,passwd=db_passwd,port=db_port,use_unicode=True, charset='utf8')
            self.cur = self.conn.cursor()

            #print self.cur
            '''创建数据库 如果数据库不存在'''
            #count = self.cur.execute("create database if not exists %s", db_dbname)
            #print count
            self.conn.select_db(db_dbname)
            #self.cur.execute("SET NAMES utf8")

        except MySQLdb.Error,e:
            print "Mysql Error %d: %s" % (e.args[0], e.args[1])

    '''
    获取网站下连接
    '''
    def get_url(self, web_name):
        self.cur.execute("SELECT * FROM links WHERE web_name = %s AND status=0", web_name)
        return self.cur.fetchone()

    '''
    持久化连接
    '''
    def add_url(self, link, web_name):
        md5 = hashlib.md5(link).hexdigest()
        print link
        self.cur.execute("INSERT INTO links(`link`, `web_name`, `md5`)VALUES(%s, %s, %s)", [link, web_name, md5])
        self.conn.commit()

    '''
    检测连接是否存在
    '''
    def check_url(self, link):
        md5 = mdb5 = hashlib.md5(link).hexdigest()
        return self.cur.execute("SELECT * FROM links WHERE `md5`=%s", md5)
        #return self.cur.fetchone()

    def update_url(self, id):
        self.cur.execute("UPDATE links SET status = 1 WHERE id=%s", id);
        self.conn.commit()
        return True

    def add_star(self, director):
        #print director
        count = self.cur.execute("SELECT id FROM star WHERE name=%s", director)
        if count == 0:
            self.cur.execute("INSERT INTO star(name)VALUES(%s)",director)

            id = self.conn.insert_id()
            self.conn.commit()
            return str(id)
        else:
            star = self.cur.fetchone()
            return str(star[0])

    def addData(self,data):
        #print data
        ### 增加导演
        director = ''
        try:
            for daoyan in data['director']:
                director += ','+self.add_star(daoyan)
            director = director.strip(',')
        except:
            director = ''

        ### 增加主演
        leading = ''
        try:
            for lead in data['leading']:
                leading += ','+self.add_star(lead)
            leading = leading.strip(',')
        except:
            leading = ''

        ### 简介
        comment = ''
        try:
            for comm in data['comment']:
                comment += comm
        except:
            comment = '';
        #标题， 图片, 链接
        inserData = [data['title'], data['detail_pic'],data['url'],director,leading,data['area'],data['show_day'],comment]
        self.add_movie(inserData)

    def add_movie(self,insertData):
        self.cur.execute("INSERT INTO movie(`title`,`img`,`url`,`director`,`leading`,`area`,`show_day`,`comment`)VALUES(%s, %s, %s, %s, %s, %s, %s, %s)", insertData)
        self.conn.commit()

    '''
    关闭数据库
    '''
    def close(self):
        try:
            self.cur.close()
            self.conn.close()
        except:
            pass

## python_spiderling_model_log.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import logging


class log:
    @staticmethod
    def read(file):
        try:
            fopen = open(file, 'r')
            data = fopen.read()
            fopen.close()
            return data
        except:
            pass

    @staticmethod
    def write(file, logs):
        try:
            logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',datefmt='%a, %d %b %Y %H:%M:%S', filename=file,filemode='w')
            logging.info(logs)
        except Exception, e:
            print '-----------', Exception, e
            pass

## python_spiderling_model_match.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
import lxml
import lxml.etree
from lxml.html.clean import Cleaner
import re
class match:
    '''
    修复HTML
    创建XPATH对象
    '''
    def __init__(self, html, url):
        cleaner = Cleaner(style=True, scripts=True,page_structure=False, safe_attrs_only=False)
        html = cleaner.clean_html(html)
        del cleaner

        self.etree = lxml.html.fromstring(html)
        self.etree.make_links_absolute( base_url=url, resolve_base_href=True)

    '''
    获取所有可以匹配链接
    '''
    def get_all_links(self, link_match, url):
        links = []
        all_links = self.etree.xpath('//a')

        for match in link_match:
            regLink = re.compile(url+match.get('value'))
            for a in all_links:
                try:
                    href = a.get('href')
                except:
                    continue;
                if regLink.match(href) != None:
                    links.append(href)
                #else:
                    #print '失败', a.get('href')
        del all_links
        return links

    '''
    获取所有需要查询的信息
    '''
    def get_match_info(self, match, url=None):

        try:
            data = {}
            for param in match:
                name = param.get('name')
                ntree =  lxml.html.fromstring(lxml.etree.tostring(param))
                #
                node = ntree.xpath('//parsers/parser')[0]
                xpath = node.get('xpath')

                infoxpath = self.etree.xpath(xpath)
                try:
                    nodetype = node.get('type')

                    if nodetype == 'text':
                        data[name] = infoxpath[0].strip()

                    elif nodetype == 'array':
                        arr = []
                        for item in infoxpath:
                            if item.strip() == '':
                                continue;
                            arr.append(item.strip())
                        data[name] = arr

                    elif nodetype == 'pageurl':
                        data[name] = url

                    elif nodetype == 'html':
                        infohtml = lxml.etree.tostring(infoxpath[0],encoding="utf-8",method="html")
                        infohtml = infohtml.strip()
                        reg = re.compile(r'<[!/]?\b(?!(\bpre\b|\bli\b|\bp\b|\bbr\b|\bspan\b|\bimg\b))+\b\s*[^>]*>|[\s\r\n\t]+')
                        infohtml = reg.sub(' ',infohtml).strip()
                        data[name] = infohtml

                except:
                    data[name] = ''
                    print name,u'读取不出来'
                    continue
        except:
            print xpath,u'读取不出来'

        return data

    def match_tiantang(self, match, url):
        try:
            data = {}
            for param in match:
                name = param.get('name')
                ntree =  lxml.html.fromstring(lxml.etree.tostring(param))
                #
                node = ntree.xpath('//parsers/parser')[0]
                xpath = node.get('xpath')

                infoxpath = self.etree.xpath(xpath)

                try:
                    nodetype = node.get('type')
                    if nodetype == 'text':
                        data[name] = infoxpath[0].strip()
                    elif nodetype == 'array':
                        arr = []
                        for item in infoxpath:
                            if item.strip() == '':
                                continue;
                            arr.append(item.strip())
                        data[name] = arr

                    elif nodetype == 'pageurl':
                        data[name] = url

                    elif nodetype == 'html':
                        infohtml = lxml.etree.tostring(infoxpath[0],encoding="utf-8",method="html")
                        infohtml = infohtml.strip()
                        reg = re.compile(r'<[!/]?\b(?!(\bpre\b|\bli\b|\bp\b|\bbr\b))+\b\s*[^>]*>|[\s\r\n\t]+')
                        infohtml = reg.sub(' ',infohtml).strip()
                        data[name] = infohtml
                except:
                    data[name] = ''
                    print name,u'读取不出来'
                    continue
        except:
            #log.write('system.log',xpath+u'读取不出来')
            print xpath,u'读取不出来'

    '''
   获取所有需要查询的信息
   '''
    def get_match_info_test(self, match, url=None):

        try:
            data = {}
            for param in match:
                name = param.get('name')
                ntree =  lxml.html.fromstring(lxml.etree.tostring(param))
                #
                node = ntree.xpath('//parsers/parser')[0]
                xpath = node.get('xpath')

                infoxpath = self.etree.xpath(xpath)
                try:
                    nodetype = node.get('type')
                    if nodetype == 'text':
                        if infoxpath != []:
                            data[name] = infoxpath[0].strip()
                        else:
                            data[name] = ''
                    elif nodetype == 'array':
                        arr = []
                        if infoxpath == []:
                            data[name] = arr
                        else:
                            for item in infoxpath:
                                if item.strip() == '':
                                    continue;
                                arr.append(item.strip())
                            data[name] = arr

                    elif nodetype == 'pageurl':
                        data[name] = url

                    elif nodetype == 'html':
                        if infoxpath == []:
                            data[name] = ''
                        else:
                            infohtml = lxml.etree.tostring(infoxpath[0],encoding="utf-8",method="html")
                            infohtml = infohtml.strip()
                            reg = re.compile(r'<[!/]?\b(?!(\bpre\b|\bli\b|\bp\b|\bbr\b))+\b\s*[^>]*>|[\s\r\n\t]+')
                            infohtml = reg.sub(' ',infohtml).strip()
                            data[name] = infohtml
                except:
                    data[name] = ''
                    print name,u'读取不出来'
                    continue
        except:
            #log.write('system.log',xpath+u'读取不出来')
            print xpath,u'读取不出来'

        return data

#        print self.etree.xpath('//h1/text()')[0]
#        print self.etree.xpath('//h1/em/text()')[0]
#        com = self.etree.xpath("//div[@class='filmcontents']/node()/text()|//div[@class='filmcontents']/text()")
#        s = ''
#        for c in com:
#            s = s+ c
#        print s
#        #规则学习
#        d = self.etree.xpath(u"//div[@class='detail_intro']/table/tr/td[text()='上映日期：']/../td[last()]/text()")
#        print d[0]
    def close(self):
        del self.etree


## python_spiderling_myapp.log
Mon, 08 Apr 2013 22:17:20 log.py[line:25] INFO 网站读取完成

## python_spiderling_setup.py
# mysetup.py
from distutils.core import setup
import py2exe
setup(options = {"py2exe":{"dll_excludes":["MSVCP90.dll",'lxml.dll'], }},windows=[{"script": "frame.py"}])

## python_spiderling_spiderling.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
from lxml import etree
from model.db import db
from model.curl import curl
from model.match import match
from model.log import log
import re
import time
i = 1
'''
爬虫
'''
class spiderling:

    def __init__(self, config):
        self.i = 0
        try:
            configtree = etree.ElementTree(file=config)

            # 获取网站属性
            sites = configtree.xpath('//site')
            site = sites[0]
            self.url = site.get('url')


            self.site_name = site.get('siteName')
            self.daily = float(site.get('daily'))
            self.log = site.get('log')
            self.errlog = site.get('error')

            self.linkRule = configtree.xpath('//linkRules/rule')
            self.infoUrlRule = configtree.xpath('//urlRules/rule')
            self.infoRule = configtree.xpath('//targets/target/model/field')

        except:
            log.write('error.log', u'配置文件读取错误')

        self.db = db()

    def run(self, url):
        #休息时间
        time.sleep(self.daily)

        if url == None:
            info = self.db.get_url(self.site_name)

            if info == None:
                log.write(self.log, u'网站读取完成')
                return 0;

            self.db.update_url(info[0])
            url = info[1]

        gurl = curl()
        html = gurl.read(url)
        try:
            if html.strip() == '':
                s = None;
                self.run(s)

        except Exception, e:
            log.write(self.log, url+u' html 获取失败'+Exception+e)
            s = None;
            self.run(s)

        #print html
        self.xtree = match(html, url)
        links = self.xtree.get_all_links(self.linkRule, self.url)

        '''把获取到的连接持久化'''
        for link in links:

            if self.db.check_url(link) == 0:
                self.db.add_url(link, self.site_name)

        '''如果当前连接是详细页则正则所需内容'''
        #for infoxpath in self.infoRule:
        #self.xtree.get_match_info(self.infoRule)

        regInfoLink = re.compile(self.infoUrlRule[0].get('value'))

        if regInfoLink.match(url) <> None:
            self.i = self.i+1
            data = self.xtree.get_match_info(self.infoRule, url)

            self.db.addData(data)
#
#            file_object = open(str(self.i)+'id.txt', 'w')
#            file_object.write(json.dumps(data))
#            file_object.close()
#
            #print json.dumps(data)
        else:
            print u'不是详细也不需要解析'
        s = None
        self.run(s)

    def close(self):
        try:
            self.xtree.close()
        except:
            pass
        try:
            self.db.close()
        except:
            pass


sp = spiderling('cate.xml')

#return
sp.run(sp.url)
#sp.run('http://www.ffdy.cc/movie/35622.html')
sp.close()

#import sqlite3 #导入模块
#cx = sqlite3.connect("d:\\test.db")
#
#cu=cx.cursor()
##cu.execute("""create table catalog ( id integer primary key, pid integer, name varchar(10) UNIQUE )""")
##
##cu.execute(u"insert into catalog values(2, 0, '哈哈')")
##cu.execute(u"insert into catalog values(3, 0, '我是中国')")
##cx.commit()
#
#cu.execute("select * from catalog")
#d =  cu.fetchall()
#for s in d:
#    print s[2]
#cu.close()
#cx.close()

## python_spiderling_test.py
# -*- coding: utf-8 -*-
__author__ = 'ShengYue'
from lxml import etree
from model.db import db
from model.curl import curl
from model.match import match
import re
import time
import lxml
i = 1
'''
爬虫
'''
class spiderling:

    def __init__(self, config):
        self.i = 0
        configtree = etree.ElementTree(file=config)

        site = configtree.xpath('//site')
        self.url = site[0].get('url')
        self.site_name = site[0].get('siteName')

        self.linkRule = configtree.xpath('//linkRules/rule')
        self.infoUrlRule = configtree.xpath('//urlRules/rule')
        self.infoRule = configtree.xpath('//targets/target/model/field')


        #print self.linkRule[0].get('value')
        self.db = db()

    def run(self, url):

        time.sleep(0.3)
        if url == None:
            info = self.db.get_url(self.site_name)

            if info == None:
                print u'爬虫完成'
                return 0;

            self.db.update_url(info[0])
            url = info[1]

        gurl = curl()
        html = gurl.read(url)

        try:
            if html.strip() == '':
                s = None;
                self.run(s)

        except:
            s = None;
            self.run(s)


        #print html
        self.xtree = match(html, url)
        d = self.xtree.etree.xpath("//div[@class='filmcontents']")
        sd = etree.tostring(d[0],encoding="utf-8",method="html")
        sd = sd.strip()
        print sd
        print '================================='
        reg = re.compile(r'<[!/]?\b(?!(\bpre\b|\bli\b|\bp\b|\bbr\b))+\b\s*[^>]*>|[\s\r\n\t]+')
        ds = reg.sub(' ',sd).strip()
        print ds
        return
        links = self.xtree.get_all_links(self.linkRule, self.url)

        '''把获取到的连接持久化'''
        for link in links:

            if self.db.check_url(link) == 0:
                self.db.add_url(link, self.site_name)

        '''如果当前连接是详细页则正则所需内容'''
        #for infoxpath in self.infoRule:
        #self.xtree.get_match_info(self.infoRule)

        regInfoLink = re.compile(self.infoUrlRule[0].get('value'))

        if regInfoLink.match(url) <> None:
            self.i = self.i+1

            print u'是详细页需要解析', str(self.i)
            data = self.xtree.get_match_info_test(self.infoRule, url)
            print u'插入数据', url
            self.db.addData(data)
        #
        #            file_object = open(str(self.i)+'id.txt', 'w')
        #            file_object.write(json.dumps(data))
        #            file_object.close()
        #
        #print json.dumps(data)
        else:
            print u'不是详细也不需要解析'
        s = None
        self.run(s)

    def close(self):
        self.xtree.close()
        self.db.close()


#sp = spiderling('cate.xml')
#sp.run(sp.url)
#sp.run('http://www.ffdy.cc/movie/35622.html')
#sp.close()

url = 'http://www.dytt8.net/html/gndy/dyzz/20130407/41866.html'
curls = curl()
html = curls.read(url,{})
xtree = match(html, url)
content = xtree.etree.xpath('//div[@id="Zoom"]')

infohtml = lxml.etree.tostring(content[0],encoding="utf-8",method="html")
infohtml = infohtml.strip()
reg = re.compile(r'<[!/]?\b(?!(\bpre\b|\bli\b|\bp\b|\bbr\b|\bspan\b|\bimg\b|\ba\b))+\b\s*[^>]*>')
infohtml = reg.sub(' ',infohtml).strip()

pattern = re.compile(r'◎年　　代　([^<]*)')
ds= pattern.search(html)
print
if(ds==[]):
    print u'找不到'
else:
    print ds[0]

print infohtml

## python_spiderling_tiantang.xml
<?xml version="1.0" encoding="utf-8"?>
<root>
  <site siteName="ffdy" url="http://www.ffdy.cc/" daily="0.3" log="ffdy.log" error="ffdy_error.log" charset="utf-8">
      <linkRules>
          <rule type="reg" value="(type/movie|movie)" />
      </linkRules>
      <targets>
          <target name="info">
              <urlRules>
                  <rule type="reg" value=".*/movie/(\d+).html" />
              </urlRules>
              <model dataType="array">
					<field name="title">
						<parsers>
							<parser type="text" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="url">
						<parsers>
							<parser type="pageurl" xpath="//h1/text()" />
						</parsers>
					</field>
					<field name="detail_pic">
						<parsers>
							<parser type="text" xpath="//div[@class='detail_pic']/span/img/@src" />
						</parsers>
					</field>
                  <field name="director">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='导演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
                  <field name="leading">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='主演：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="type">
                      <parsers>
                          <parser type="array" xpath="//div[@class='detail_intro']/table/tr/td[text()='类型：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="area">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='国家/地区：']/../td[last()]/a/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="show_day">
                      <parsers>
                          <parser type="text" xpath="//div[@class='detail_intro']/table/tr/td[text()='上映日期：']/../td[last()]/text()" code="u" />
                      </parsers>
                  </field>
				  <field name="comment">
                      <parsers>
                          <parser type="html" xpath="//div[@class='filmcontents']" reg="u" />
                      </parsers>
                  </field>
              </model>
          </target>
      </targets>
  </site>
</root>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="Encoding" useUTFGuessing="true" native2AsciiForPropertiesFiles="false" />
	</project>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.3 (C:/Python27/python.exe)" project-jdk-type="Python SDK" />
	</project>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="ProjectModuleManager">
	<modules>
	<module fileurl="file://$PROJECT_DIR$/.idea/python.iml" filepath="$PROJECT_DIR$/.idea/python.iml" />
	</modules>
	</component>
	</project>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="PyDocumentationSettings">
	<option name="myDocStringFormat" value="Plain" />
	</component>
	</project>
	<?xml version="1.0" encoding="UTF-8"?>
	<module type="PYTHON_MODULE" version="4">
	<component name="NewModuleRootManager">
	<content url="file://$MODULE_DIR$" />
	<orderEntry type="inheritedJdk" />
	<orderEntry type="sourceFolder" forTests="false" />
	</component>
	</module>
	<component name="DependencyValidationManager">
	<state>
	<option name="SKIP_IMPORT_STATEMENTS" value="false" />
	</state>
	</component>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="TestRunnerService">
	<option name="projectConfiguration" value="Unittests" />
	<option name="PROJECT_TEST_RUNNER" value="Unittests" />
	</component>
	</project>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="VcsDirectoryMappings">
	<mapping directory="" vcs="" />
	</component>
	</project>
	<component name="InspectionProjectProfileManager">
	<settings>
	<option name="PROJECT_PROFILE" />
	<option name="USE_PROJECT_PROFILE" value="false" />
	<version value="1.0" />
	</settings>
	</component>
	<?xml version="1.0" encoding="UTF-8"?>
	<project version="4">
	<component name="ChangeListManager">
	<list default="true" id="7dc9904d-98fe-4bed-8b89-bf5f25178704" name="Default" comment="" />
	<ignored path="newspider.iws" />
	<ignored path=".idea/workspace.xml" />
	<option name="TRACKING_ENABLED" value="true" />
	<option name="SHOW_DIALOG" value="false" />
	<option name="HIGHLIGHT_CONFLICTS" value="true" />
	<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
	<option name="LAST_RESOLUTION" value="IGNORE" />
	</component>
	<component name="ChangesViewManager" flattened_view="true" show_ignored="false" />
	<component name="CreatePatchCommitExecutor">
	<option name="PATCH_PATH" value="" />
	</component>
	<component name="DaemonCodeAnalyzer">
	<disable_hints />
	</component>
	<component name="ExecutionTargetManager" SELECTED_TARGET="default_target" />
	<component name="ProjectFrameBounds">
	<option name="x" value="-8" />
	<option name="y" value="-8" />
	<option name="width" value="1936" />
	<option name="height" value="1066" />
	</component>
	<component name="ProjectLevelVcsManager" settingsEditedManually="false">
	<OptionsSetting value="true" id="Add" />
	<OptionsSetting value="true" id="Remove" />
	<OptionsSetting value="true" id="Checkout" />
	<OptionsSetting value="true" id="Update" />
	<OptionsSetting value="true" id="Status" />
	<OptionsSetting value="true" id="Edit" />
	<ConfirmationsSetting value="0" id="Add" />
	<ConfirmationsSetting value="0" id="Remove" />
	</component>
	<component name="ProjectReloadState">
	<option name="STATE" value="0" />
	</component>
	<component name="ProjectView">
	<navigator currentView="ProjectPane" proportions="" version="1" splitterProportion="0.5">
	<flattenPackages />
	<showMembers />
	<showModules />
	<showLibraryContents />
	<hideEmptyPackages />
	<abbreviatePackageNames />
	<autoscrollToSource />
	<autoscrollFromSource />
	<sortByType />
	</navigator>
	<panes>
	<pane id="ProjectPane">
	<subPane>
	<PATH>
	<PATH_ELEMENT>
	<option name="myItemId" value="newspider" />
	<option name="myItemType" value="com.intellij.ide.projectView.impl.nodes.ProjectViewProjectNode" />
	</PATH_ELEMENT>
	</PATH>
	</subPane>
	</pane>
	<pane id="Scope" />
	</panes>
	</component>
	<component name="PropertiesComponent">
	<property name="WebServerToolWindowFactoryState" value="false" />
	<property name="last_opened_file_path" value="$PROJECT_DIR$/.." />
	</component>
	<component name="RunManager">
	<list size="0" />
	</component>
	<component name="ShelveChangesManager" show_recycled="false" />
	<component name="TaskManager">
	<task active="true" id="Default" summary="Default task">
	<created>1366584457012</created>
	<updated>1366584457012</updated>
	</task>
	<servers />
	</component>
	<component name="ToolWindowManager">
	<frame x="-8" y="-8" width="1936" height="1066" extended-state="0" />
	<editor active="false" />
	<layout>
	<window_info id="Changes" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
	<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
	<window_info id="Database" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
	<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="true" content_ui="tabs" />
	<window_info id="Project" active="true" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" weight="0.2497332" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
	<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
	<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
	<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
	<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
	<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
	<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
	<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
	<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
	<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
	<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
	<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
	<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
	</layout>
	</component>
	<component name="VcsContentAnnotationSettings">
	<option name="myLimit" value="2678400000" />
	</component>
	<component name="VcsManagerConfiguration">
	<option name="OFFER_MOVE_TO_ANOTHER_CHANGELIST_ON_PARTIAL_COMMIT" value="true" />
	<option name="CHECK_CODE_SMELLS_BEFORE_PROJECT_COMMIT" value="false" />
	<option name="CHECK_NEW_TODO" value="true" />
	<option name="myTodoPanelSettings">
	<value>
	<are-packages-shown value="false" />
	<are-modules-shown value="false" />
	<flatten-packages value="false" />
	<is-autoscroll-to-source value="false" />
	</value>
	</option>
	<option name="PERFORM_UPDATE_IN_BACKGROUND" value="true" />
	<option name="PERFORM_COMMIT_IN_BACKGROUND" value="true" />
	<option name="PERFORM_EDIT_IN_BACKGROUND" value="true" />
	<option name="PERFORM_CHECKOUT_IN_BACKGROUND" value="true" />
	<option name="PERFORM_ADD_REMOVE_IN_BACKGROUND" value="true" />
	<option name="PERFORM_ROLLBACK_IN_BACKGROUND" value="false" />
	<option name="CHECK_LOCALLY_CHANGED_CONFLICTS_IN_BACKGROUND" value="false" />
	<option name="CHANGED_ON_SERVER_INTERVAL" value="60" />
	<option name="SHOW_ONLY_CHANGED_IN_SELECTION_DIFF" value="true" />
	<option name="CHECK_COMMIT_MESSAGE_SPELLING" value="true" />
	<option name="DEFAULT_PATCH_EXTENSION" value="patch" />
	<option name="SHORT_DIFF_HORISONTALLY" value="true" />
	<option name="SHORT_DIFF_EXTRA_LINES" value="2" />
	<option name="SOFT_WRAPS_IN_SHORT_DIFF" value="true" />
	<option name="INCLUDE_TEXT_INTO_PATCH" value="false" />
	<option name="INCLUDE_TEXT_INTO_SHELF" value="false" />
	<option name="SHOW_FILE_HISTORY_DETAILS" value="true" />
	<option name="SHOW_VCS_ERROR_NOTIFICATIONS" value="true" />
	<option name="FORCE_NON_EMPTY_COMMENT" value="false" />
	<option name="CLEAR_INITIAL_COMMIT_MESSAGE" value="false" />
	<option name="LAST_COMMIT_MESSAGE" />
	<option name="MAKE_NEW_CHANGELIST_ACTIVE" value="false" />
	<option name="OPTIMIZE_IMPORTS_BEFORE_PROJECT_COMMIT" value="false" />
	<option name="CHECK_FILES_UP_TO_DATE_BEFORE_COMMIT" value="false" />
	<option name="REFORMAT_BEFORE_PROJECT_COMMIT" value="false" />
	<option name="REFORMAT_BEFORE_FILE_COMMIT" value="false" />
	<option name="FILE_HISTORY_DIALOG_COMMENTS_SPLITTER_PROPORTION" value="0.8" />
	<option name="FILE_HISTORY_DIALOG_SPLITTER_PROPORTION" value="0.5" />
	<option name="ACTIVE_VCS_NAME" />
	<option name="UPDATE_GROUP_BY_PACKAGES" value="false" />
	<option name="UPDATE_GROUP_BY_CHANGELIST" value="false" />
	<option name="SHOW_FILE_HISTORY_AS_TREE" value="false" />
	<option name="FILE_HISTORY_SPLITTER_PROPORTION" value="0.6" />
	</component>
	<component name="XDebuggerManager">
	<breakpoint-manager />
	</component>
	</project>