JamesG jamespaultg

## SPSS_to_CSV.R
library(foreign)

# Converting one SPSS file
data <- read.spss("File_path_SPSS.sav",
                  reencode='utf-8',
                  use.value.labels = FALSE,
                  to.data.frame = TRUE)
head(data)
View(data)
write.csv2(data,"C:/Users/LYCJPG1/Documents/survey_results.csv")

## csv_diff.py
pip install csv-diff
from csv_diff import load_csv, compare
diff = compare(
    load_csv(open("one.csv"), key="id"),
    load_csv(open("two.csv"), key="id")
)

## iterrows.py
# iterrows functionality for quick reference
for index, row in document.iterrows():
    print(index, row['section name'], len(row['tekst']), len(row['sentence_list']))

## enumerate.py
# just to check the enumerate functionality on lists
temp_list = ['a','b','c']
for i,val in enumerate(temp_list):
    print(i, val)

## readDBF.py
from dbfread import DBF
import pandas as pd

dbf = DBF('your_dbf_file.dbf')
frame = pd.DataFrame(iter(dbf))
frame

## getworddoc.py
!pip3 install python-docx -q
import docx2txt

# replace following line with location of your .docx file
wordfile = "your word document.docx"

# get the contents of the word document
def getDocxContent(filename):
    doc = docx.Document(filename)
    fullText = ""

## formattedprint.py
# https://docs.python.org/3/tutorial/inputoutput.html
#https://www.python-course.eu/python3_formatted_output.php
print(f'section {file_names[i]:70} has length {len(section_text):10d}')

## regex_match.py
import re
p = re.compile("[a-z]")
for m in p.finditer('a1b2c3d4'):
    print(m.start(), m.end(), m.group())

## gettweets.py
# Thanks to Kaggle user yassinehamdaoui1
# https://www.kaggle.com/c/nlp-getting-started/discussion/132762
import pandas as pd
import tweepy as tw

consumer_key = "put here you consumer_key"
consumer_secret ="put here your consumer_secret"
access_token = "your access"
access_token_secret ="your access token"

## accessS3.R
require("devtools")
install.packages("aws.s3", repos = c("cloudyr" = "http://cloudyr.github.io/drat"))


library("aws.s3")
# set the environment parameters
Sys.setenv("AWS_ACCESS_KEY_ID" = "Key_id",
           "AWS_SECRET_ACCESS_KEY" = "secret_access_key",
           "AWS_DEFAULT_REGION" = "region")
	library(foreign)

	# Converting one SPSS file
	data <- read.spss("File_path_SPSS.sav",
	reencode='utf-8',
	use.value.labels = FALSE,
	to.data.frame = TRUE)
	head(data)
	View(data)
	write.csv2(data,"C:/Users/LYCJPG1/Documents/survey_results.csv")
	pip install csv-diff
	from csv_diff import load_csv, compare
	diff = compare(
	load_csv(open("one.csv"), key="id"),
	load_csv(open("two.csv"), key="id")
	)
	# iterrows functionality for quick reference
	for index, row in document.iterrows():
	print(index, row['section name'], len(row['tekst']), len(row['sentence_list']))
	# just to check the enumerate functionality on lists
	temp_list = ['a','b','c']
	for i,val in enumerate(temp_list):
	print(i, val)
	from dbfread import DBF
	import pandas as pd

	dbf = DBF('your_dbf_file.dbf')
	frame = pd.DataFrame(iter(dbf))
	frame
	!pip3 install python-docx -q
	import docx2txt

	# replace following line with location of your .docx file
	wordfile = "your word document.docx"

	# get the contents of the word document
	def getDocxContent(filename):
	doc = docx.Document(filename)
	fullText = ""
	# https://docs.python.org/3/tutorial/inputoutput.html
	#https://www.python-course.eu/python3_formatted_output.php
	print(f'section {file_names[i]:70} has length {len(section_text):10d}')
	import re
	p = re.compile("[a-z]")
	for m in p.finditer('a1b2c3d4'):
	print(m.start(), m.end(), m.group())
	# Thanks to Kaggle user yassinehamdaoui1
	# https://www.kaggle.com/c/nlp-getting-started/discussion/132762
	import pandas as pd
	import tweepy as tw

	consumer_key = "put here you consumer_key"
	consumer_secret ="put here your consumer_secret"
	access_token = "your access"
	access_token_secret ="your access token"
	require("devtools")
	install.packages("aws.s3", repos = c("cloudyr" = "http://cloudyr.github.io/drat"))


	library("aws.s3")
	# set the environment parameters
	Sys.setenv("AWS_ACCESS_KEY_ID" = "Key_id",
	"AWS_SECRET_ACCESS_KEY" = "secret_access_key",
	"AWS_DEFAULT_REGION" = "region")