Skip to content

Instantly share code, notes, and snippets.

import json
import multiprocessing as mp
import re
from collections import defaultdict
from typing import List, Optional, Set
from datasets import load_dataset
from datasketch import MinHash, MinHashLSH, minhash
from dpu_utils.utils.iterators import ThreadedIterator
from tqdm import tqdm
@mallamanis
mallamanis / repos.json
Last active December 30, 2022 20:01
MSR 2021 "Fast and Memory-Efficient Neural Code Completion" Dataset
[
"https://github.com/minimaxir/big-list-of-naughty-strings.git",
"https://github.com/shadowsocks/shadowsocks.git",
"https://github.com/littlecodersh/ItChat.git",
"https://github.com/google-research/bert.git",
"https://github.com/0voice/interview_internal_reference.git",
"https://github.com/keon/algorithms.git",
"https://github.com/satwikkansal/wtfpython.git",
"https://github.com/drduh/macOS-Security-and-Privacy-Guide.git",
"https://github.com/google/python-fire.git",
@mallamanis
mallamanis / useWithCxManager.ql
Created May 1, 2020 08:07
useCxManagerForOpen.ql
/**
* @name Use with open() as
* @description Consider using a context manager
* @kind problem
* @tags maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision medium
* @id py/use-context-manager
*/
@mallamanis
mallamanis / usePathJoin.ql
Last active May 6, 2020 13:21
Use platform-independent path joining
/**
* @name Join paths correctly
* @description use os.path.join or an alternative to correctly join paths.
* @kind path-problem
* @tags maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision medium
* @tags speed
* @sub-severity low
@mallamanis
mallamanis / useItemsToIterateDict.ql
Created April 30, 2020 19:44
Use dict.items() instead of explicitly accessing the values within the loop.
/**
* @name Iterate of the items of a dictionary using `.items()`.
* @description instead of iterating over the keys of a dictionary and the indexing the dictionary,
use `.items()` to retrieve the key-value pairs.
* @kind problem
* @tags maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision medium
* @tags speed
@mallamanis
mallamanis / preferISlice.ql
Created April 30, 2020 18:19
Prefer islice instead of list(...)[slice]
/**
* @name Use islice() to slice an iterable.
* @description instead of converting an iterable to a list and then slicing it
use `islice` for efficiency.
* @kind problem
* @tags maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision high
* @tags speed
@mallamanis
mallamanis / useHash.ql
Created April 30, 2020 18:11
Prefer invocation to hash()
/**
* @name Use hash()
* @description use hash() instead of __hash__
* @kind problem
* @tags maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision high
* @tags style
* @sub-severity low
@mallamanis
mallamanis / preferIGlob.ql
Created April 30, 2020 18:01
Prefer iglob instead of glob
/**
* @name Prefer iglob
* @description Use iglob instead of glob
* @kind problem
* @tags speed
* @problem.severity recommendation
* @sub-severity low
* @precision high
* @id py/use-iglob
*/
@mallamanis
mallamanis / NoModuloForLeapYear.ql
Created April 30, 2020 17:42
Do not check for leap year with modulo.
/**
* @name Do not check leap year using modulo
* @description Use the system functions to check for leap years instead of modulo. Very low precision.
* @kind problem
* @tags reliability
* maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision low
* @id py/no-leap-check-with-modulo
@mallamanis
mallamanis / noParseFloatInfOrNan.ql
Created April 30, 2020 17:14
No string parsing for inf/nan
/**
* @name Use math.inf or math.nan
* @description Prefer using math.inf instead of parsing infinity
from a string
* @kind problem
* @tags maintainability
* @problem.severity recommendation
* @sub-severity low
* @precision high
* @id py/no-float-inf