Brandon Locke brandontlocke

## working with gale directories
#delete all of the stray images
find . -type f -name '*.jpg' -delete
#delete all of the *images* folders (had to run with capital and lowercase 'images'
find -type d -name images -exec rm -rf {} \;
#rename
for subdir in *; do mv $subdir/*.txt $subdir.txt; done;
#remove empty directories
find . -type d -empty -delete

## keybase.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                brandontlocke
                / keybase.md
            
            
              Created
              September 14, 2017 00:56
            
          
    Keybase proof

I hereby claim:

I am brandontlocke on github.
I am brandontlocke (https://keybase.io/brandontlocke) on keybase.
I have a public key ASC7AocCJ5yFidT21Vyme-OzMnJBez8WfNsTdWH_EVpnOwo

To claim this, I am signing this object:

  
## concatmovie.sh
#!/bin/bash

printf "file '%s'\n" *.mov > mylist.txt

ffmpeg -f concat -i mylist.txt -c copy video.mov

rm mylist.txt

## videoprocess.sh
#!/bin/bash

IFS=$(echo -en "\n\b"); for i in AVCHD/BDMV/STREAM/*.MTS; do ffmpeg -i "$i" -vcodec mpeg4 -b:v 3000k -b:a 192k "$i.mp4"; done

#IFS=$(echo -en "\n\b"); for i in AVCHD/BDMV/STREAM/*.MTS; do ffmpeg -i "$i" -b:v 400k -preset veryfast -crf 29 -vcodec copy "$i.mp4"; done
#ffmpeg -i 00005.mts -s 480x320 -vcodec mpeg4 -b:v 3000k -b:a 192k test.mp4

printf "file '%s'\n" AVCHD/BDMV/STREAM/*.mp4 > mylist.txt

ffmpeg -f concat -i mylist.txt -c copy concat.mp4

## jsonsplit.py
import json

with open('path/to/file.json') as json_file:
    data = json.load(json_file)
    for p in data['items']:
        file = open(p['date']+p['title']+"pg"+p['page']+".txt", "w")
        file.write(p['ocr_eng'])
        file.close()

## neh-chronam.py
#!/usr/bin/env python

import json

with open('fordlaborunion.json') as json_file:
    data = json.load(json_file)
    for p in data['items']:
        file = open(p['date']+p['title']+"pg"+p['page']+".txt", "w")
        file.write(p['ocr_eng'])
file.close()

## batchner-to-chunked-network.py
import pandas as pd

#import file & rename column headers
edges = pd.read_csv('https://raw.githubusercontent.com/FannieLouHamerPapers/NamedEntities/master/flh_ner_all.csv')
edges.columns = ['source', 'target', 'entityType', 'weight']
#add column to make network undirected
edges['type'] = 'undirected'

#chunk out into multiple edges files by selecting one of the numbers in the filename
#one file includes most of the rows, so these are divded weirdly

## batchner-to-network-deprecated.py
import networkx as nx
from networkx.algorithms import bipartite
import pandas as pd

#create empty multigraph - multigraph is an undirected graph with parallel edges
G = nx.MultiGraph()

#import file & create nodes
flhfull=pd.read_csv('https://raw.githubusercontent.com/FannieLouHamerPapers/NamedEntities/master/flh_ner_all.csv')
nodes=flhfull['name'].drop_duplicates()

## batchner-to-network.py
import networkx as nx
from networkx.algorithms import bipartite
import pandas as pd

##########################################
##### BE SURE TO SET THESE VARIABLES #####
##########################################

#import batchner results into a dataframe—learn more about batchner: https://github.com/brandontlocke/batchner
batchner=pd.read_csv('PATH/TO/FILE', low_memory=False)

## flh-metadatamerge.py
import pandas as pd

#read in data
entities = pd.read_csv('https://raw.githubusercontent.com/FannieLouHamerPapers/NamedEntities/master/flh_ner_all.csv')
metadata = pd.read_csv('flhmetadata.csv')

#cut '.txt' from the doc names
entities.doc = entities.doc.str[:16]

#join dataframes; select only some
	#delete all of the stray images
	find . -type f -name '*.jpg' -delete
	#delete all of the images folders (had to run with capital and lowercase 'images'
	find -type d -name images -exec rm -rf {} \;
	#rename
	for subdir in ; do mv $subdir/.txt $subdir.txt; done;
	#remove empty directories
	find . -type d -empty -delete
	#!/bin/bash

	printf "file '%s'\n" *.mov > mylist.txt

	ffmpeg -f concat -i mylist.txt -c copy video.mov

	rm mylist.txt
	#!/bin/bash

	IFS=$(echo -en "\n\b"); for i in AVCHD/BDMV/STREAM/*.MTS; do ffmpeg -i "$i" -vcodec mpeg4 -b:v 3000k -b:a 192k "$i.mp4"; done

	#IFS=$(echo -en "\n\b"); for i in AVCHD/BDMV/STREAM/*.MTS; do ffmpeg -i "$i" -b:v 400k -preset veryfast -crf 29 -vcodec copy "$i.mp4"; done
	#ffmpeg -i 00005.mts -s 480x320 -vcodec mpeg4 -b:v 3000k -b:a 192k test.mp4

	printf "file '%s'\n" AVCHD/BDMV/STREAM/*.mp4 > mylist.txt

	ffmpeg -f concat -i mylist.txt -c copy concat.mp4
	import json

	with open('path/to/file.json') as json_file:
	data = json.load(json_file)
	for p in data['items']:
	file = open(p['date']+p['title']+"pg"+p['page']+".txt", "w")
	file.write(p['ocr_eng'])
	file.close()
	#!/usr/bin/env python

	import json

	with open('fordlaborunion.json') as json_file:
	data = json.load(json_file)
	for p in data['items']:
	file = open(p['date']+p['title']+"pg"+p['page']+".txt", "w")
	file.write(p['ocr_eng'])
	file.close()
	import pandas as pd

	#import file & rename column headers
	edges = pd.read_csv('https://raw.githubusercontent.com/FannieLouHamerPapers/NamedEntities/master/flh_ner_all.csv')
	edges.columns = ['source', 'target', 'entityType', 'weight']
	#add column to make network undirected
	edges['type'] = 'undirected'

	#chunk out into multiple edges files by selecting one of the numbers in the filename
	#one file includes most of the rows, so these are divded weirdly
	import networkx as nx
	from networkx.algorithms import bipartite
	import pandas as pd

	#create empty multigraph - multigraph is an undirected graph with parallel edges
	G = nx.MultiGraph()

	#import file & create nodes
	flhfull=pd.read_csv('https://raw.githubusercontent.com/FannieLouHamerPapers/NamedEntities/master/flh_ner_all.csv')
	nodes=flhfull['name'].drop_duplicates()
	import pandas as pd

	#read in data
	entities = pd.read_csv('https://raw.githubusercontent.com/FannieLouHamerPapers/NamedEntities/master/flh_ner_all.csv')
	metadata = pd.read_csv('flhmetadata.csv')

	#cut '.txt' from the doc names
	entities.doc = entities.doc.str[:16]

	#join dataframes; select only some