Skip to content

Instantly share code, notes, and snippets.

View mizvol's full-sized avatar
🦒
Focusing

Volodymyr Miz mizvol

🦒
Focusing
View GitHub Profile
def getFollowers(userId, nextCursor):
params = {
'cursor': nextCursor,
'access_token': INSTAGRAM_ACCESS_TOKEN
}
session = Session()
session.mount("https://", adapters.HTTPAdapter(max_retries=50))
response = session.get("https://api.instagram.com/v1/users/" + userId + "/followed-by", params = params, verify = True)
db.allPosts.aggregate([
{$group: {_id: "$user.id"}},
{$out: "users"}
]);
from requests import get, Session, adapters
def getInstaPosts(latitude, longitude, distance, minTimestamp, maxTimestamp, count):
params = {
'lat': latitude,
'lng': longitude,
'distance': distance, # radius of requested area
'min_timestamp': str(minTimestamp), #start date
'max_timestamp': str(maxTimestamp), #end date
'count': COUNT, # number of posts(100 max)
topicIndices = ldaModel.describeTopics(maxTermsPerTopic=5)
vocablist = vectorizer.vocabulary
topicsRDD = sc.parallelize(topicIndices)
termsRDD = topicsRDD.map(lambda topic: (zip(itemgetter(*topic[0])(vocablist), topic[1])))
indexedTermsRDD = termsRDD.zipWithIndex()
termsRDD = indexedTermsRDD.flatMap(lambda term: [(t[0], t[1], term[1]) for t in term[0]])
termDF = termsRDD.toDF(['term', 'probability', 'topicId'])
from pyspark.mllib.clustering import LDA, LDAModel
from pyspark.mllib.feature import IDF
from pyspark.ml.feature import CountVectorizer
#vectorize tags array for each user
vectorizer = CountVectorizer(inputCol="tokens", outputCol="features").fit(tagsListDF)
countVectors = vectorizer.transform(tagsListDF).select("id", "features")
#find TF-IDF coefficients for each tag
frequencyVectors = countVectors.map(lambda vector: vector[1])
import pymongo as pm
import unicodedata
client = pm.MongoClient()
db = client.instagram
tagsDB = db.tags
tagsList = []
for tag in tagsDB.find():
tagsList.append((str(tag['_id']), [unicodedata.normalize('NFKD', t).encode('ascii','ignore')
db.allPosts.aggregate([
{$group: {_id: "$user.id", tags: {$addToSet: "$tags"}}},
{$unwind: "$tags"},
{$unwind: "$tags"},
{$group: {_id: "$_id", tags: {$addToSet: "$tags"}}},
{$out: "tags"}
])
@mizvol
mizvol / build.sbt
Last active October 26, 2018 15:29
name := "SparkScalaTest"
version := "1.0"
scalaVersion := "2.11.12"
libraryDependencies ++= Seq(
"org.apache.spark" %% "spark-core" % "2.3.2",
"org.apache.spark" %% "spark-sql" % "2.3.2"
)
import org.apache.spark.sql.SparkSession
object SparkWordCount extends App {
val spark = SparkSession.builder
.master("local[*]")
.appName("Spark Word Count")
.getOrCreate()
val lines = spark.sparkContext.parallelize(
@mizvol
mizvol / Tags LDA topic analysis.ipynb
Created January 18, 2017 10:03
LDA topic analysis of Instagram hashtags for clustering. Analysis + Visualization in D3JS
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.