Skip to content

Instantly share code, notes, and snippets.

import re
import itertools
from bs4 import BeautifulSoup
import requests
import scrapy
def xpath_soup(element):
"""
Generate xpath from BeautifulSoup4 element
:param element: BeautifulSoup4 element.
from PyQt5 import QtWidgets, QtCore
import clientui
import requests
from datetime import datetime
class ChatWindow(QtWidgets.QMainWindow, clientui.Ui_MainWindow):
def __init__(self):
super().__init__()
self.setupUi(self)
self.pushButton.pressed.connect(self.sendMessage)
@ra2003
ra2003 / xml2dict.py
Created May 20, 2020 14:06 — forked from Saberko/xml2dict.py
XML To Dict
# from: http://stackoverflow.com/questions/2148119/how-to-convert-an-xml-string-to-a-dictionary-in-python
class XmlListConfig(list):
def __init__(self, aList):
for element in aList:
if element:
# treat like dict
if len(element) == 1 or element[0].tag != element[1].tag:
self.append(XmlDictConfig(element))
# treat like list
elif element[0].tag == element[1].tag:
@ra2003
ra2003 / xpath_soup.py
Created May 18, 2020 13:40 — forked from ergoithz/xpath_soup.py
Generate unique XPATH for BeautifulSoup element
#!/usr/bin/python
# -*- coding: utf-8 -*-
def xpath_soup(element):
# type: (typing.Union[bs4.element.Tag, bs4.element.NavigableString]) -> str
"""
Generate xpath from BeautifulSoup4 element.
:param element: BeautifulSoup4 element.
:type element: bs4.element.Tag or bs4.element.NavigableString
@ra2003
ra2003 / print_dir_tree.py
Created May 18, 2020 12:34 — forked from evandrix/print_dir_tree.py
Print Directory Tree Structure (with/without files)
#! /usr/bin/env python
# tree.py
#
# Written by Doug Dahms
#
# Prints the tree structure for the path specified on the command line
from os import listdir, sep
from os.path import abspath, basename, isdir
#! /usr/bin/env python
# tree.py
#
# Written by Doug Dahms
#
# Prints the tree structure for the path specified on the command line
from os import listdir, sep
from os.path import abspath, basename, isdir
class Node(object):
"""
Tree node: left and right child + data which can be any object
"""
def __init__(self, data):
"""
Node Constructor
@param data node data object
"""
self.left = None
@ra2003
ra2003 / lxml example
Created May 15, 2020 14:49 — forked from Artem-Mamchych/lxml example
Пример использования lxml
# coding: utf8
xml = '''<?xml version="1.0" encoding="UTF-8"?>
<soft>
<os>
<item name="linux" dist="ubuntu">
This text about linux
</item>
<item name="mac os">
Apple company
@ra2003
ra2003 / search_templates
Created May 12, 2020 15:14 — forked from bobpoekert/search_templates
A machine readable list of search engines. Each line is a query template, where all occurrences of {searchTerms} should be replaced with the search query. These were scraped off mycroft.mozdev.org, and tested that they at least respond with a page that contains the input query (to remove pages that lie behind login walls).
http://0-www.sciencedirect.com.www.consuls.org/science?_ob=QuickSearchURL&_method=submitForm&_acct=C000050221&md5=0c4b6db32507e4a332b2aa6dd47a65f4&qs_all={searchTerms}&qs_author=&qs_title=&qs_vol=&qs_issue=&qs_pages=&x=34&y=15
http://0-dictionary.oed.com.library.utulsa.edu/cgi/findword?query_type=word&queryword={searchTerms}
http://100.daum.net/search/search.do?query={searchTerms}
http://1000corks.com/search?st={searchTerms}&src=myc
http://11870.com/konsulto/{searchTerms}
http://1000memories.com/search?q={searchTerms}
http://130.219.35.129/search?q={searchTerms}&btnG=Google+Search&entqr=0&output=xml_no_dtd&sort=date%3AD%3AL%3Ad1&client=default_frontend&ud=1&oe=UTF-8&ie=UTF-8&proxystylesheet=default_frontend&site=default_collection
http://1337x.org/search/{searchTerms}/0/
http://11888.ote.gr/web/guest/white-pages/search?who={searchTerms}&where=
http://140.111.34.46/cgi-bin/newDict/dict.sh?idx=dict.idx&cond={searchTerms}&pieceLen=50&fld=1&cat=&imgFont=1
@ra2003
ra2003 / gist:1dac9f2a991f1b6138ce32899eaa3ecb
Created April 26, 2020 20:41 — forked from doloopwhile/gist:8c6ec7dd4703e8a44e559411cb2ea221
Why I decided to discontinue PyExecJS and Python-CoffeeScript ?

What is PyExecJS and Python-CoffeeScript.

They are Python libraries that I developped on 2011.

  • PyExecJS: Automatically picks the best runtime available to evaluate your JavaScript program.
  • Python-CoffeeScript: A bridge to the JS CoffeeScript compiler.

They are ports of ruby gems with same name (execjs and ruby-coffee-script).

The aim of them were to compile CoffeeScript code on Windows XP.