Aditya Singh Aditya1001001

## clone_brat.sh
git clone https://github.com/nlplab/brat.git

## RSS_feed_aggregator_output.py
{
    "title": "DeFi Portal 1inch Launches Wallet App on Android",
    "link": "https://www.coindesk.com/business/2022/03/31/defi-portal-1inch-launches-wallet-on-android/?utm_medium=referral&utm_source=rss&utm_campaign=headlines",
    "pub_date": "31 Mar 2022 15:00:00 ",
    "summary": "The move comes nearly one year after the app became available on Apple's iPhone.",
    "image": "https://www.coindesk.com/resizer/QOu3JOV1i6UgnbwUc4nD1hXvaeo=/800x600/cloudfront-us-east-1.images.arcpublishing.com/coindesk/GXWHA5WEUJEFJJGAL44U5IXVKQ.png"
}
{
    "title": "The state of Web3: Community talks about opportunities around the world",
    "link": "https://cointelegraph.com/news/the-state-of-web3-community-talks-about-opportunities-around-the-world",

## clean_dict.py
for i in range(len(data)):
  for key in data[i].keys():
    data[i][key] = data[i][key]["S"]

print(data[:2])

## clean_data.py
training_data = {'classes' : ['MEDICINE', "MEDICALCONDITION", "PATHOGEN"], 'annotations' : []}
for example in data['examples']:
  temp_dict = {}
  temp_dict['text'] = example['content']
  temp_dict['entities'] = []
  for annotation in example['annotations']:
    start = annotation['start']
    end = annotation['end']
    label = annotation['tag_name'].upper()
    temp_dict['entities'].append((start, end, label))

## add_names_and_tickers.py
patterns = [nlp.make_doc(name) for name in names]
matcher.add("COMPANY", patterns)

patterns = [nlp.make_doc(symbol) for symbol in data['Symbol']]
matcher.add("SYMBOL", patterns)

## newscatcher_website_details.py
from newscatcher import describe_url

websites = ['nytimes.com', 'cronachediordinariorazzismo.org', 'libertaegiustizia.it']

for website in websites:
   print(describe_url(website))

## build_train_doc2vec.py
def tagged_document(list_of_list_of_words):
   for i, list_of_words in enumerate(list_of_list_of_words):
      yield gensim.models.doc2vec.TaggedDocument(list_of_words, [i])

training_data = list(tagged_document(data))
model = gensim.models.doc2vec.Doc2Vec(vector_size=40, min_count=2, epochs=30)

model.build_vocab(training_data)
model.train(training_data, total_examples=model.corpus_count, epochs=model.epochs)

## consolidated_pipeline.py
def dividend_info(article):
  headline = nlp(article['title'])
  if 'date' in [token.text.lower() for token in headline]:
    date = get_date(headline)
    if date:
      org = get_org(headline)
      ticker = get_ticker(headline)
      amount = get_amount_summary(nlp(article['summary']))
      pay_date = get_pay_date(nlp(article['summary']))
      print("HEADLINE: " + article['title'])

## index.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                Aditya1001001
                / index.ipynb
            
            
              Created
              September 15, 2021 11:48
                — forked from georgeblck/index.ipynb
            
              
                Comparison of openCV Interpolation methods by Anthony Tanbakuchi (Backup because his site is down)
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## rename_image.py
import os
count = 0
for i in os.listdir():
    os.rename(i,str(count)+ '.'+ i.split('.')[-1])
    count+=1
	{
	"title": "DeFi Portal 1inch Launches Wallet App on Android",
	"link": "https://www.coindesk.com/business/2022/03/31/defi-portal-1inch-launches-wallet-on-android/?utm_medium=referral&utm_source=rss&utm_campaign=headlines",
	"pub_date": "31 Mar 2022 15:00:00 ",
	"summary": "The move comes nearly one year after the app became available on Apple's iPhone.",
	"image": "https://www.coindesk.com/resizer/QOu3JOV1i6UgnbwUc4nD1hXvaeo=/800x600/cloudfront-us-east-1.images.arcpublishing.com/coindesk/GXWHA5WEUJEFJJGAL44U5IXVKQ.png"
	}
	{
	"title": "The state of Web3: Community talks about opportunities around the world",
	"link": "https://cointelegraph.com/news/the-state-of-web3-community-talks-about-opportunities-around-the-world",
	for i in range(len(data)):
	for key in data[i].keys():
	data[i][key] = data[i][key]["S"]

	print(data[:2])
	training_data = {'classes' : ['MEDICINE', "MEDICALCONDITION", "PATHOGEN"], 'annotations' : []}
	for example in data['examples']:
	temp_dict = {}
	temp_dict['text'] = example['content']
	temp_dict['entities'] = []
	for annotation in example['annotations']:
	start = annotation['start']
	end = annotation['end']
	label = annotation['tag_name'].upper()
	temp_dict['entities'].append((start, end, label))
	patterns = [nlp.make_doc(name) for name in names]
	matcher.add("COMPANY", patterns)

	patterns = [nlp.make_doc(symbol) for symbol in data['Symbol']]
	matcher.add("SYMBOL", patterns)
	from newscatcher import describe_url

	websites = ['nytimes.com', 'cronachediordinariorazzismo.org', 'libertaegiustizia.it']

	for website in websites:
	print(describe_url(website))
	def tagged_document(list_of_list_of_words):
	for i, list_of_words in enumerate(list_of_list_of_words):
	yield gensim.models.doc2vec.TaggedDocument(list_of_words, [i])

	training_data = list(tagged_document(data))
	model = gensim.models.doc2vec.Doc2Vec(vector_size=40, min_count=2, epochs=30)

	model.build_vocab(training_data)
	model.train(training_data, total_examples=model.corpus_count, epochs=model.epochs)
	def dividend_info(article):
	headline = nlp(article['title'])
	if 'date' in [token.text.lower() for token in headline]:
	date = get_date(headline)
	if date:
	org = get_org(headline)
	ticker = get_ticker(headline)
	amount = get_amount_summary(nlp(article['summary']))
	pay_date = get_pay_date(nlp(article['summary']))
	print("HEADLINE: " + article['title'])
	import os
	count = 0
	for i in os.listdir():
	os.rename(i,str(count)+ '.'+ i.split('.')[-1])
	count+=1