Created
July 23, 2014 09:49
-
-
Save 88250/2e71dd8aff0540ac8139 to your computer and use it in GitHub Desktop.
标签图生成 - Java 部分
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package tags; | |
import java.io.InputStream; | |
import java.util.ArrayList; | |
import java.util.HashSet; | |
import java.util.List; | |
import java.util.Set; | |
import org.apache.commons.io.IOUtils; | |
import org.json.JSONArray; | |
public class Main { | |
private static final int WEIGHT = 12; | |
private static final float SCALE = 10; | |
public static void main(final String[] args) throws Exception { | |
final InputStream inputStream = Main.class.getResourceAsStream("data.json"); | |
final JSONArray tagsArray = new JSONArray(IOUtils.toString(inputStream, "UTF-8")); | |
final Set<String> tagSet = new HashSet<>(); | |
for (int i = 0; i < tagsArray.length(); i++) { | |
final JSONArray tagArray = tagsArray.getJSONArray(i); | |
for (int j = 0; j < tagArray.length(); j++) { | |
final String tag = tagArray.getString(j); | |
tagSet.add(tag); | |
} | |
} | |
final List<String> tagList = new ArrayList<>(tagSet); | |
final Set<String> relations = new HashSet<>(); | |
for (int i = 0; i < tagList.size(); i++) { | |
final String tag1 = tagList.get(i); | |
for (int j = i + 1; j < tagList.size(); j++) { | |
final String tag2 = tagList.get(j); | |
int originalWeight = getWeight(tag1, tag2, tagsArray); | |
if (originalWeight > WEIGHT) { | |
int weight = (int) Math.floor(originalWeight / SCALE); | |
if (weight > 1) { | |
// 生成 tag1 和 tag2 带权重的边 | |
relations.add("G.add_weighted_edges_from([['" + tag1 + "', '" + tag2 + "', " + weight + "]]);"); | |
} | |
} | |
} | |
} | |
System.out.println(tagSet.toString()); | |
System.out.println(tagSet.size()); | |
for (final String string : relations) { | |
System.out.println(string); | |
} | |
} | |
private static int getWeight(final String tag1, final String tag2, final JSONArray tagsArray) { | |
int ret = 0; | |
for (int i = 0; i < tagsArray.length(); i++) { | |
// 一篇文章的标签数组 | |
final JSONArray tagArray = tagsArray.getJSONArray(i); | |
// 转成集合 | |
final Set<String> tagSet = new HashSet<>(); | |
for (int j = 0; j < tagArray.length(); j++) { | |
final String tag = tagArray.getString(j); | |
tagSet.add(tag); | |
} | |
// 如果这篇文章同时出现了 tag1 和 tag2,则权重加 1 | |
if (tagSet.contains(tag1) && tagSet.contains(tag2)) { | |
ret++; | |
} | |
} | |
return ret; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment