Skip to content

Instantly share code, notes, and snippets.

@pfmiles
Created June 26, 2018 07:40
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pfmiles/3bc3a66793a5ea5bb0ebbe4a4b6699d8 to your computer and use it in GitHub Desktop.
Save pfmiles/3bc3a66793a5ea5bb0ebbe4a4b6699d8 to your computer and use it in GitHub Desktop.
可配置的随机字符串产生器,可满足任意大窗口区间内生成值绝不重复的要求
package test;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;
/**
* 可自定义的随机字符串产生器;
* 通过指定组成最终随机值的各“段(segment)”的定义,来自动产生出"uniqness()"个数窗口内循环不重复的随机值;
* 若从randSeq被创建、执行后产生的第一个值算起,往后数uniqness()个值,能保证100%不重复;
* 若从randSeq被创建、执行后产生的第任意一个值算起,往前、或往后数uniqness()个值,能保证99.7%的概率不重复;
* 可通过控制一些参数来控制最终产生出的随机值看起来的“随机程度”
* 注意:各segment最好不要有重叠值,否则破坏结果的唯一性
*
* 目前该工具主要用于数据安全脱敏“假名替换”场景
*
* @author pf-miles Feb 28, 2018 4:27:22 PM
*/
public class RandSeq implements Iterator<String> {
/*
* 决定了产生的值序列看上去的“随机程度”,也因为该值决定了内存会缓存多少待输出数据,因此该值也直接影响运行时内存占用量
* 运行时内存占用量约为:randness的segRanges.length次方个待输出String片段
*/
private int randness;
private SegRange[] segRanges;
private transient RuntimeSeq rs = null;
private Random rand = new Random();
/**
* 取下一个随机值,该随机值保证与前后"uniqness()"个连续的随机取值不重复的概率大于99.7%
* @return 下一个随机值
*/
@Override
public synchronized String next() {
// 1.若还没有运行时状态或状态已用完,则先准备运行时状态
if (rs == null || !rs.hasNext()) {
rs = new RuntimeSeq();
}
try {
// 2.直接取下一个随机值
return rs.next();
} catch (RuntimeSeqRefreshException e) {
// 重建runtimeSeq,全新轮回
rs = new RuntimeSeq();
return rs.next();
}
}
private static final class RuntimeSeqRefreshException extends RuntimeException {
private static final long serialVersionUID = 179475654213413913L;
@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}
// 运行时seq,封装了randness随机逻辑
private class RuntimeSeq implements Iterator<String> {
private List<CloneableIterator<String>> loopIters;// 各层候选值模板, 包括顶层候选值
private SegNode root; // 当前动态输出值, root的cur值是null, 第二层值才是顶层值
public RuntimeSeq() {
loopIters = new ArrayList<CloneableIterator<String>>(segRanges.length);
for (SegRange sr : segRanges)
loopIters.add(sr.iterator());
}
@Override
public boolean hasNext() {
return loopIters.get(0).hasNext() || (root != null && !root.isFinished());
}
@Override
public String next() {
// 1.如果root未初始化,则初始化
if (root == null)
root = buildRandnessTree(null, 0);
// 2.递归下降取随机段,组合成最终值
List<String> retList = new ArrayList<String>();
try {
constructOne(retList, root, 0);
} catch (ChildDelException e) {
// root代表的整颗树已被耗尽,重新生成
if (this.hasNext()) {
root = buildRandnessTree(null, 0);
try {
constructOne(retList, root, 0);
} catch (ChildDelException ex) {
throw new RuntimeException("Impossible.");// 这不可能发生,除非程序写错
}
} else {
// 整个runtimeSeq需要被重建, 新的轮回开始
throw new RuntimeSeqRefreshException();
}
}
StringBuilder ret = new StringBuilder();
for (String s : retList)
ret.append(s);
return ret.toString();
}
/*
* 在sb中构造一条完整的随机值
*
* @param retList 构造好的结果列表
* @param thisNode 当前层级node节点
* @param curLevel 当前层级,0级为root节点,1级才是顶层值
*
* @throws ChildDelException 当children动态节点输出耗尽时抛出,表示应该删除当前children动态输出节点
*/
private void constructOne(List<String> retList, SegNode thisNode,
int curLevel) throws ChildDelException {
// 1.加入本层值, 若不是root层的话(root层本层值为null)
if (curLevel != 0)
retList.add(thisNode.cur);
// 2.若chilren耗尽则抛出childDelException,若还没耗尽,则向下(children)递归
while (true) {// :child
if (thisNode.children != null && !thisNode.children.isEmpty()) {
// 2.1.动态输出节点children还有,直接取
if (_fetchNextChild(retList, thisNode, curLevel))
continue;
return;
} else if (thisNode.childIter != null && thisNode.childIter.hasNext()) {
// 2.2.动态输出节点children没有,从候选值iter补充后取
List<SegNode> children = new ArrayList<SegNode>();
for (String val : drawVals(thisNode.childIter))
children.add(buildRandnessTree(val, curLevel + 1));
thisNode.children.addAll(children);
// 然后从children里取
if (_fetchNextChild(retList, thisNode, curLevel))
continue;
return;
} else {
// 2.3.children和iter都没有(可能是叶子节点,也可能是分支节点children耗尽),通知上层删除本节点
// 是否由于碰到叶子节点而抛出本exception
if (curLevel == loopIters.size()) {
// 遇到叶子节点,是一次成功的执行
throw new ChildDelException(true);
} else {
// 由于分支节点耗尽而抛出本exception,需要将已经加入的本层值吐出来
if (curLevel != 0)
retList.remove(retList.size() - 1);
throw new ChildDelException(false);
}
}
}
}
// 取下一个children输出节点,返回true/false指示是否需要continue
private boolean _fetchNextChild(List<String> retList, SegNode thisNode, int curLevel) {
int randIdx = (int) (rand.nextDouble() * thisNode.children.size());
try {
constructOne(retList, thisNode.children.get(randIdx), curLevel + 1);
return false;
} catch (ChildDelException e) {
thisNode.children.remove(randIdx);
if (e.leaf) {
// 碰到叶子节点,说明成功构造了retList,返回
return false;
} else {
// 分支节点耗尽,继续循环子节点
return true;// 指示外层循环continue, goto :child
}
}
}
private SegNode buildRandnessTree(String cur, int childIterIdx) {
SegNode ret = new SegNode();
ret.cur = cur;
// build children iter & children
if (childIterIdx < loopIters.size()) {
ret.childIter = childIterIdx == 0 ? loopIters.get(childIterIdx) // 顶层iter用本体,其它层用clone
: loopIters.get(childIterIdx).clone();
List<SegNode> children = new ArrayList<SegNode>();
for (String val : drawVals(ret.childIter))
children.add(buildRandnessTree(val, childIterIdx + 1));
ret.children = children;
}
return ret;
}
// 从iter中连续获取randness个元素,以list形式返回,如果iter中元素不够,则返回的元素个数可以少于randness个;若iter中完全没有任何元素,则整体返回null
private List<String> drawVals(Iterator<String> iter) {
if (iter == null || !iter.hasNext())
return null;
List<String> ret = new ArrayList<String>();
for (int i = 0; i < randness && iter.hasNext(); i++)
ret.add(iter.next());
return ret;
}
}
// 指示上层节点应当删除该下层节点的纯控制流类型exception
private static final class ChildDelException extends Exception {
private static final long serialVersionUID = -1698558066692055092L;
private boolean leaf; // 指示是否由于碰到叶子节点而导致抛出本exception而不是分支节点children耗尽
public ChildDelException(boolean leaf) {
this.leaf = leaf;
}
@Override
public synchronized Throwable fillInStackTrace() {
return this;
}
}
// 构造树状随机结构的节点
private static class SegNode {
private String cur; // 当前节点值,rootNode的该值为null,rootNode的children才是顶层值
private List<SegNode> children; // 下一层已选动态输出节点
private Iterator<String> childIter;// 下一层可选值iter
public boolean isFinished() {
return children.isEmpty();
}
}
public RandSeq(int randness, SegRange... segRanges) {
if (randness < 1 || segRanges == null || segRanges.length == 0)
throw new RuntimeException("Illegal constructor arguments!");
this.randness = randness;
this.segRanges = segRanges;
}
/**
* 返回本randSeq产生的随机序列的“唯一性程度”,即该seq能保证连续产生多少个随机值不重复(或大概率不重复,>99.7%)
* @return uniqness
*/
public long uniqness() {
long ret = 0l;
for (SegRange sr : this.segRanges)
if (ret == 0l)
ret = sr.size();
else
ret *= sr.size();
return ret;
}
public interface CloneableIterator<T> extends Iterator<T>, Cloneable {
CloneableIterator<T> clone();
}
/**
* 段定义
*
* @author pf-miles Feb 28, 2018 4:58:14 PM
*/
public static interface SegRange extends Iterable<String> {
/**
* 获取本range所含段值个数
* @return
*/
long size();
@Override
CloneableIterator<String> iterator();
}
/**
* 固定枚举可选值列表组成的随机段
*
* @author pf-miles Feb 28, 2018 5:41:19 PM
*/
public static class EnumRange implements SegRange {
private Set<String> enums = new LinkedHashSet<String>();
public EnumRange(String... enums) {
if (enums == null || enums.length == 0)
throw new RuntimeException("Illegal constructor arguments.");
for (String e : enums)
this.enums.add(e);
}
private final class EnumRangeIter implements CloneableIterator<String> {
private Set<String> enums;
private Iterator<String> iter;
public EnumRangeIter(Set<String> enums) {
this.enums = enums;
this.iter = this.enums.iterator();
}
@Override
public boolean hasNext() {
return this.iter.hasNext();
}
@Override
public String next() {
return this.iter.next();
}
@Override
public CloneableIterator<String> clone() {
return new EnumRangeIter(this.enums);
}
}
@Override
public CloneableIterator<String> iterator() {
return new EnumRangeIter(enums);
}
@Override
public long size() {
return this.enums.size();
}
}
/**
* 连续数字组成的随机段
*
* @author pf-miles Feb 28, 2018 5:41:56 PM
*/
public static class IntRange implements SegRange {
private long start;
private long end;
/**
* 范围起止,从start(inclusive)到end(inclusive)的所有数字作为范围
* @param start inclusive
* @param end inclusive
*/
public IntRange(long start, long end) {
if (!(start < end))
throw new RuntimeException("Illegal constructor arguments.");
this.start = start;
this.end = end;
}
private final class IntRangeIter implements CloneableIterator<String> {
private long cur;
private long end;
@Override
public boolean hasNext() {
return cur <= end;
}
@Override
public String next() {
String ret = String.valueOf(cur);
cur++;
return ret;
}
@Override
public CloneableIterator<String> clone() {
IntRangeIter ret = new IntRangeIter();
ret.cur = this.cur;
ret.end = this.end;
return ret;
}
}
@Override
public CloneableIterator<String> iterator() {
IntRangeIter ret = new IntRangeIter();
ret.cur = start;
ret.end = IntRange.this.end;
return ret;
}
@Override
public long size() {
return end - start + 1;
}
}
@Override
public boolean hasNext() {
return true;// 循环往复,永远有下一个值
}
}
package test;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import org.junit.Assert;
import org.junit.Test;
import test.RandSeq.EnumRange;
import test.RandSeq.IntRange;
/**
* @author pf-miles Mar 1, 2018 9:05:36 PM
*/
public class RandSeqTest {
@Test
public void test() {
// randness等于range
System.out.println("randness equals to range length: ");
RandSeq seq = new RandSeq(2, new EnumRange("apple", "pear"), new IntRange(1, 3));
Assert.assertTrue(seq.uniqness() == 6l);
for (int i = 0; i < seq.uniqness() + 1; i++)
System.out.println(seq.next());
}
// 测试只有1个候选值的情况
@Test
public void testOneCandidate() {
RandSeq seq = new RandSeq(1, new EnumRange("test"));
Assert.assertTrue(seq.uniqness() == 1l);
for (int i = 0; i < seq.uniqness() + 1; i++)
System.out.println(seq.next());
}
// 测试randness值各种边界情况
@Test
public void test32() {
// randness小于range
System.out.println("randness less than range length: ");
RandSeq seq = new RandSeq(1, new EnumRange("test1", "test2"), new IntRange(1, 2),
new IntRange(3, 4));
Assert.assertTrue(seq.uniqness() == 8l);
for (int i = 0; i < seq.uniqness() + 1; i++)
System.out.println(seq.next());
// randness大于range
System.out.println("randness bigger than range length: ");
seq = new RandSeq(3, new EnumRange("test1", "test2"), new IntRange(1, 2),
new IntRange(3, 4));
Assert.assertTrue(seq.uniqness() == 8l);
for (int i = 0; i < seq.uniqness() + 1; i++)
System.out.println(seq.next());
}
// 测试产生序列严格在任意位置开始、往后数uniqness个值里有至少99.7%的值不重复
@Test
public void testUniqness() {
RandSeq seq = new RandSeq(20, new IntRange(1, 100), new IntRange(101, 200),
new IntRange(201, 300));
Assert.assertTrue(seq.uniqness() == 1000000l);
List<String> vals = new ArrayList<String>(1000000);
// 从第50多w个值开始
for (int i = 0; i < 500000; i++)
seq.next();
// 累积uniqness个值
for (int i = 0; i < seq.uniqness(); i++)
vals.add(seq.next());
System.out.println(vals.size());
System.out.println(new HashSet<String>(vals).size());
Assert.assertTrue((new HashSet<String>(vals).size() / (double) seq.uniqness()) > 0.997);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment