Skip to content

Instantly share code, notes, and snippets.

@kryvoboker
Created August 19, 2020 21:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kryvoboker/695975430620f53c0f30898d6a83332a to your computer and use it in GitHub Desktop.
Save kryvoboker/695975430620f53c0f30898d6a83332a to your computer and use it in GitHub Desktop.
HomeWork9(Работа с сетью)(3)(Вариант c регулярными выражениями)
import java.io.BufferedReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
private static final Pattern urlPattern = Pattern.compile(
"(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)"
+ "(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*"
+ "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)",
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL);
public static void main(String[] args) {
String urlAdress = "https://prog.kiev.ua/forum/index.php/board,2.0.html";
String text = getStringFromUrl(urlAdress);
StringBuilder urlOut = new StringBuilder();
Matcher matcher = urlPattern.matcher(text);
while (matcher.find()) {
int matchStart = matcher.start(1);
int matchEnd = matcher.end();
// now you have the offsets of a URL match
String source, delete;
source = matcher.group();
delete = "\"";
source = source.replace(delete, "");
delete = "'";
source = source.replace(delete, "");
// System.out.println(source);
urlOut.append(source).append(System.lineSeparator());
}
try (FileWriter writer = new FileWriter("text.txt")) {
writer.write(urlOut.toString());
writer.flush();
} catch (IOException e) {
e.printStackTrace();
}
}
public static String getStringFromUrl(String urlAdress) { //получения HTML разметки страницы
StringBuilder sb = new StringBuilder();
try {
URL url = new URL(urlAdress);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream())); //InputStreamReader() -> может принимать второй параметр - это кодировка текста, если сервер передает информацию по другой кодировке и текст не читабельный
String text = null;
for (; (text = br.readLine()) != null;) {
sb.append(text).append(System.lineSeparator());
}
} catch (IOException e) {
e.printStackTrace();
}
return sb.toString();
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment