RAMI ALRFOU aboSamoor

## WIKI_NS_Exatractor
#!/usr/bin/php


<?php
# A script to extract namespace names (tags) for different wikipedias.
#
# The script should run in the root directory of
# https://gerrit.wikimedia.org/r/p/mediawiki/core.git
#

## Visualize
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""template.py: Description of what the module does."""

from argparse import ArgumentParser
import logging
import sys
from io import open
from os import path

## results.csv
Bench 1
Document0 = [token(w, t1, t2, t3, t4), token(...]
Document1 = [w_1, w_2, ...], [t1_1, t1_2, ...], [t2_1, t2_2, ...], ...
Document2 = [(w, t1, t2, t3, t4), ...]
Document3 = [{'w':w, 't1': t1, 't2': t2, 't3':t3, 't4':t4},...]


Heavy 1e7

Python

## bench.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""template.py: Description of what the module does."""

from optparse import OptionParser
import logging
import os
from random import randint
from time import time

## Erros of the current WordTreebank Tokenizer
58 ('14', '16')
['It', 'employs', '2,700', 'people', 'and', 'has', 'annual', 'revenue', 'of', 'about', '$', '370', 'million', '.']
['It', 'employs', '2', ',', '700', 'people', 'and', 'has', 'annual', 'revenue', 'of', 'about', '$', '370', 'million', '.']
101 ('12', '14')
['A', 'full', ',', 'four-color', 'page', 'in', 'Newsweek', 'will', 'cost', '$', '100,980', '.']
['A', 'full', ',', 'four-color', 'page', 'in', 'Newsweek', 'will', 'cost', '$', '100', ',', '980', '.']
102 ('49', '51')
['In', 'mid-October', ',', 'Time', 'magazine', 'lowered', 'its', 'guaranteed', 'circulation', 'rate', 'base', 'for', '1990', 'while', 'not', 'increasing', 'ad', 'page', 'rates', ';', 'with', 'a', 'lower', 'circulation', 'base', ',', 'Time', "'s", 'ad', 'rate', 'will', 'be', 'effectively', '7.5', '%', 'higher', 'per', 'subscriber', ';', 'a', 'full', 'page', 'in', 'Time', 'costs', 'about', '$', '120,000', '.']
['In', 'mid-October', ',', 'Time', 'magazine', 'lowered', 'its', 'guaranteed', 'circulation', 'rate', 'base', 'for', '1990',

## gist:1238807
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""ne.py: Extract name entities from text using nltk."""

from optparse import OptionParser
import nltk
import logging

__author__ = "Rami Al-Rfou"

## template.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""template.py: Description of what the module does."""

__author__ = "Rami Al-Rfou"
__email__ = "rmyeid@gmail.com"


def main():

## .vimrc
set hlsearch
set ruler
syntax on
set number

" set spell

" Tabs are not easy to handle it is better
" to stick to an easy scenario
" @ts tabstop, @sts softtabstop @sw shiftwidth

## gist:1140942
import pydot
open('twopi2.gv.txt').read()
'digraph G {\n ranksep=3;\n ratio=auto;\n"1" [ label="01",shape="hexagon",style="filled",color="green" ];\n"2" [ label="02",shape="hexagon",style="filled",color="green" ];\n"3" [ label="02",shape="hexagon",style="filled",color="green" ];\n"1" -> "2" [ label=" ",color="blue",arrowhead="dot" ];\n"1" -> "3" [label="",color="blue",arrowhead="
dot" ];\n}\n'

graph = pydot.graph_from_dot_data(open('twopi2.gv.txt').read())
svg = graph.create_svg()
svg
# OUT: '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"\n "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [\n <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">\n]>\n<!-- Generated by Graphviz version 2.20.2 (Tue Mar &#160;2 19:03:41 UTC 2010)\n     For user: (rmyeid) Rami Al&#45;rfou&#39; -->\n<!-- Title: G Pages: 1 -->\n<svg width="134pt" height="314pt"\n viewBox="0.00 0.00 134.00 314.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http:

## TODO
try this
http://www.reddit.com/r/linux/comments/iqbt7/share_your_bash_command_line_history_among_all/

PROMPT_COMMAND="history -a; history -n"
	#!/usr/bin/php



	<?php
	# A script to extract namespace names (tags) for different wikipedias.
	#
	# The script should run in the root directory of
	# https://gerrit.wikimedia.org/r/p/mediawiki/core.git
	#
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""template.py: Description of what the module does."""

	from argparse import ArgumentParser
	import logging
	import sys
	from io import open
	from os import path
	Bench 1
	Document0 = [token(w, t1, t2, t3, t4), token(...]
	Document1 = [w_1, w_2, ...], [t1_1, t1_2, ...], [t2_1, t2_2, ...], ...
	Document2 = [(w, t1, t2, t3, t4), ...]
	Document3 = [{'w':w, 't1': t1, 't2': t2, 't3':t3, 't4':t4},...]


	Heavy 1e7

	Python
	58 ('14', '16')
	['It', 'employs', '2,700', 'people', 'and', 'has', 'annual', 'revenue', 'of', 'about', '$', '370', 'million', '.']
	['It', 'employs', '2', ',', '700', 'people', 'and', 'has', 'annual', 'revenue', 'of', 'about', '$', '370', 'million', '.']
	101 ('12', '14')
	['A', 'full', ',', 'four-color', 'page', 'in', 'Newsweek', 'will', 'cost', '$', '100,980', '.']
	['A', 'full', ',', 'four-color', 'page', 'in', 'Newsweek', 'will', 'cost', '$', '100', ',', '980', '.']
	102 ('49', '51')
	['In', 'mid-October', ',', 'Time', 'magazine', 'lowered', 'its', 'guaranteed', 'circulation', 'rate', 'base', 'for', '1990', 'while', 'not', 'increasing', 'ad', 'page', 'rates', ';', 'with', 'a', 'lower', 'circulation', 'base', ',', 'Time', "'s", 'ad', 'rate', 'will', 'be', 'effectively', '7.5', '%', 'higher', 'per', 'subscriber', ';', 'a', 'full', 'page', 'in', 'Time', 'costs', 'about', '$', '120,000', '.']
	['In', 'mid-October', ',', 'Time', 'magazine', 'lowered', 'its', 'guaranteed', 'circulation', 'rate', 'base', 'for', '1990',
	#!/usr/bin/env python
	# -- coding: utf-8 --

	"""ne.py: Extract name entities from text using nltk."""

	from optparse import OptionParser
	import nltk
	import logging

	__author__ = "Rami Al-Rfou"
	set hlsearch
	set ruler
	syntax on
	set number

	" set spell

	" Tabs are not easy to handle it is better
	" to stick to an easy scenario
	" @ts tabstop, @sts softtabstop @sw shiftwidth
	import pydot
	open('twopi2.gv.txt').read()
	'digraph G {\n ranksep=3;\n ratio=auto;\n"1" [ label="01",shape="hexagon",style="filled",color="green" ];\n"2" [ label="02",shape="hexagon",style="filled",color="green" ];\n"3" [ label="02",shape="hexagon",style="filled",color="green" ];\n"1" -> "2" [ label=" ",color="blue",arrowhead="dot" ];\n"1" -> "3" [label="",color="blue",arrowhead="
	dot" ];\n}\n'

	graph = pydot.graph_from_dot_data(open('twopi2.gv.txt').read())
	svg = graph.create_svg()
	svg
	# OUT: '<?xml version="1.0" encoding="UTF-8" standalone="no"?>\n<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN"\n "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [\n <!ATTLIST svg xmlns:xlink CDATA #FIXED "http://www.w3.org/1999/xlink">\n]>\n<!-- Generated by Graphviz version 2.20.2 (Tue Mar  2 19:03:41 UTC 2010)\n For user: (rmyeid) Rami Al-rfou' -->\n<!-- Title: G Pages: 1 -->\n<svg width="134pt" height="314pt"\n viewBox="0.00 0.00 134.00 314.00" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http:
	try this
	http://www.reddit.com/r/linux/comments/iqbt7/share_your_bash_command_line_history_among_all/

	PROMPT_COMMAND="history -a; history -n"