Anton Alekseev alexeyev

## simple_text_classification_distilbert.py
import evaluate
import numpy as np
from datasets import load_dataset
from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import AutoTokenizer
from transformers import DataCollatorWithPadding

id2label = {0: "NEGATIVE", 1: "POSITIVE"}
label2id = {"NEGATIVE": 0, "POSITIVE": 1}

## apertium_tokenizer.py
# coding: utf-8
"""
    Tokenization as it is done in Apertium; may not be blazing fast,
    since a full-scale morphological analysis is carried out
"""

import apertium
import re
from typing import List, Tuple
from streamparser import LexicalUnit, reading_to_string

## LICENSE
MIT License

Copyright (c) 2023 Anton Alekseev

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

## sequence_to_sentences.py
# coding: utf-8

sentences = []

for line in open("test.txt", "r", encoding="utf-8").readlines()[1:]:

    seq = line.strip().split(" ")

    if len(seq) == 1:
        sentences.append([])

## onnx2pytorch.py
import onnx
import struct
import torch
import torch.nn as nn
import torchvision as tv
import warnings


# enum DataType {
#     UNDEFINED = 0;

## raspberry_pi_camera_telegram_bot.py
# coding: utf-8

import configparser
import logging
import telebot
from time import sleep
from picamera import PiCamera

logger = logging.getLogger("counting-crops")
logger.setLevel(logging.DEBUG)

## hogweed_photos_collector_bot.py
# coding: utf-8

import configparser
import logging

import telebot

logger = logging.getLogger("hogweed-ground-level")
logger.setLevel(logging.DEBUG)

## gsdmm_example.py
from functools import lru_cache

from nltk import TweetTokenizer, WordNetLemmatizer
from tqdm import tqdm
from gsdmm import MovieGroupProcess
from sklearn.datasets import fetch_20newsgroups
from nltk.corpus import stopwords

import pickle
import nltk

## texts_similarity_difflib.py
# coding: utf-8

from difflib import SequenceMatcher

t0 = open("text0.txt", "r+").read().strip().replace("\n", " ").replace("  ", " ")
t1 = open("text1.txt", "r+").read().strip().replace("\n", " ").replace("  ", " ")

matcher = SequenceMatcher(a=t0, b=t1)
ratio = matcher.ratio()
mbs = matcher.get_matching_blocks()

## gtranslate_selenium.py
#!/usr/bin/env python3
"""
    We do not recommend using this script for any purposes other than learning to use Selenium;
    for batched machine translation via Google Translate using 'document' translation feature
    is arguably the most suitable. For regular translations one should use the Cloud API.
"""

import time

from selenium.common.exceptions import TimeoutException
	import evaluate
	import numpy as np
	from datasets import load_dataset
	from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
	from transformers import AutoTokenizer
	from transformers import DataCollatorWithPadding

	id2label = {0: "NEGATIVE", 1: "POSITIVE"}
	label2id = {"NEGATIVE": 0, "POSITIVE": 1}
	# coding: utf-8
	"""
	Tokenization as it is done in Apertium; may not be blazing fast,
	since a full-scale morphological analysis is carried out
	"""

	import apertium
	import re
	from typing import List, Tuple
	from streamparser import LexicalUnit, reading_to_string
	MIT License

	Copyright (c) 2023 Anton Alekseev

	Permission is hereby granted, free of charge, to any person obtaining a copy
	of this software and associated documentation files (the "Software"), to deal
	in the Software without restriction, including without limitation the rights
	to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
	copies of the Software, and to permit persons to whom the Software is
	furnished to do so, subject to the following conditions:
	# coding: utf-8

	sentences = []

	for line in open("test.txt", "r", encoding="utf-8").readlines()[1:]:

	seq = line.strip().split(" ")

	if len(seq) == 1:
	sentences.append([])
	import onnx
	import struct
	import torch
	import torch.nn as nn
	import torchvision as tv
	import warnings


	# enum DataType {
	# UNDEFINED = 0;
	# coding: utf-8

	import configparser
	import logging
	import telebot
	from time import sleep
	from picamera import PiCamera

	logger = logging.getLogger("counting-crops")
	logger.setLevel(logging.DEBUG)
	from functools import lru_cache

	from nltk import TweetTokenizer, WordNetLemmatizer
	from tqdm import tqdm
	from gsdmm import MovieGroupProcess
	from sklearn.datasets import fetch_20newsgroups
	from nltk.corpus import stopwords

	import pickle
	import nltk
	# coding: utf-8

	from difflib import SequenceMatcher

	t0 = open("text0.txt", "r+").read().strip().replace("\n", " ").replace(" ", " ")
	t1 = open("text1.txt", "r+").read().strip().replace("\n", " ").replace(" ", " ")

	matcher = SequenceMatcher(a=t0, b=t1)
	ratio = matcher.ratio()
	mbs = matcher.get_matching_blocks()
	#!/usr/bin/env python3
	"""
	We do not recommend using this script for any purposes other than learning to use Selenium;
	for batched machine translation via Google Translate using 'document' translation feature
	is arguably the most suitable. For regular translations one should use the Cloud API.
	"""

	import time

	from selenium.common.exceptions import TimeoutException