Devi Prasad Khatua wolframalpha

## data_extraction.py
# this should work - confirm it !!
def get_lists(filename='evaluation3_without\n.txt')
    with open(filename, 'r') as outfile:
        # filename of the file w/o '\n'
        lines = outfile.readlines()
        data_list = [eval(lists.strip('\n')) for index, lists in enumerate(lines) if (index+1)%2 == 0]
    return data_list

## doc_to_pdf.py
import os
import subprocess
import shlex
import time
def convert_doc_to_pdf(ipfile_path, opfile_path):
    subprocess.Popen(shlex.split('libreoffice --headless --convert-to pdf "{}" --outdir "{}"'.format(ipfile_path, opfile_path)), stdout=True)

## ExperienceTagger.py
# coding: utf-8

import pickle
import re
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
import numpy as np
import pandas as pd
from nltk.tag import pos_tag, pos_tag_sents
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

## README.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                wolframalpha
                / README.md
            
            
              Created
              March 31, 2017 20:48
            
          
    dateextractor

Basic Example is given in the DateFinder.py file itself!
from DateExtractor import DateFinder

df = DateFinder()

# This `find_dates(text)` returns a list of datetime objects taking a string/buffer as a parameter 
date_list = df.find_dates('<foobar> (oct 2013 to december 2014) </foobar>')


## links.txt
http://stackoverflow.com/questions/23322674/how-to-improve-speed-with-stanford-nlp-tagger-and-nltk

## drawmap.html
{% load staticfiles %}<!--
<script type="text/javascript" src="http://google-maps-utility-library-v3.googlecode.com/svn/trunk/markerwithlabel/src/markerwithlabel.js"></script> -->
<script type="text/javascript">

function inherits(childCtor, parentCtor) {
  /** @constructor */
  function tempCtor() {};
  tempCtor.prototype = parentCtor.prototype;
  childCtor.superClass_ = parentCtor.prototype;
  childCtor.prototype = new tempCtor();

## finding.txt
> Official Google news API - depreciated
Alternative:
  - https://newsapi.org/
    - Free to use while not clearly mentioned when the limit is breached
    - Mulitple&Huge news sources - can explicit select one of those https://newsapi.org/sources
    - Key/Pair auth
    - response type: JSON

> Google finance API - depriciated/ Yahoo finance API - depreciated
Alternative:

## generate_result.py
import itertools
import pandas as pd
from pyspark.ml.linalg import Vectors
from pyspark.ml.feature import VectorAssembler
from pyspark.ml import Pipeline
from pyspark.ml.regression import LinearRegression

results = []
# START ----- VARIABLES
mandatory_columns = set(['Log_Price'])

## udschema.py
from pyspark.sql.functions import udf
from pyspark.sql.types import *

schema = StructType([
    StructField("foo", FloatType(), False),
    StructField("bar", FloatType(), False)
])
udf(function(), schema)

## createCluster1434.sh
gcloud dataproc clusters create meow-1434   --bucket shopko-bucket --subnet default   --zone asia-east1-b   --master-machine-type n1-standard-4   --master-boot-disk-size 100   --num-workers 3   --worker-machine-type n1-standard-4   --worker-boot-disk-size 50   --scopes 'https://www.googleapis.com/auth/cloud-platform'   --project affine-dev   --initialization-actions 'gs://shopko-bucket/initialisation_script.sh'
	# this should work - confirm it !!
	def get_lists(filename='evaluation3_without\n.txt')
	with open(filename, 'r') as outfile:
	# filename of the file w/o '\n'
	lines = outfile.readlines()
	data_list = [eval(lists.strip('\n')) for index, lists in enumerate(lines) if (index+1)%2 == 0]
	return data_list
	import os
	import subprocess
	import shlex
	import time
	def convert_doc_to_pdf(ipfile_path, opfile_path):
	subprocess.Popen(shlex.split('libreoffice --headless --convert-to pdf "{}" --outdir "{}"'.format(ipfile_path, opfile_path)), stdout=True)
	# coding: utf-8

	import pickle
	import re
	from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
	import numpy as np
	import pandas as pd
	from nltk.tag import pos_tag, pos_tag_sents
	from nltk.tokenize import word_tokenize
	from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
	{% load staticfiles %}<!--
	<script type="text/javascript" src="http://google-maps-utility-library-v3.googlecode.com/svn/trunk/markerwithlabel/src/markerwithlabel.js"></script> -->
	<script type="text/javascript">

	function inherits(childCtor, parentCtor) {
	/** @constructor */
	function tempCtor() {};
	tempCtor.prototype = parentCtor.prototype;
	childCtor.superClass_ = parentCtor.prototype;
	childCtor.prototype = new tempCtor();
	> Official Google news API - depreciated
	Alternative:
	- https://newsapi.org/
	- Free to use while not clearly mentioned when the limit is breached
	- Mulitple&Huge news sources - can explicit select one of those https://newsapi.org/sources
	- Key/Pair auth
	- response type: JSON

	> Google finance API - depriciated/ Yahoo finance API - depreciated
	Alternative:
	import itertools
	import pandas as pd
	from pyspark.ml.linalg import Vectors
	from pyspark.ml.feature import VectorAssembler
	from pyspark.ml import Pipeline
	from pyspark.ml.regression import LinearRegression

	results = []
	# START ----- VARIABLES
	mandatory_columns = set(['Log_Price'])
	from pyspark.sql.functions import udf
	from pyspark.sql.types import *

	schema = StructType([
	StructField("foo", FloatType(), False),
	StructField("bar", FloatType(), False)
	])
	udf(function(), schema)