Created
August 19, 2020 21:33
-
-
Save kryvoboker/695975430620f53c0f30898d6a83332a to your computer and use it in GitHub Desktop.
HomeWork9(Работа с сетью)(3)(Вариант c регулярными выражениями)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import java.io.BufferedReader; | |
import java.io.FileWriter; | |
import java.io.IOException; | |
import java.io.InputStreamReader; | |
import java.net.HttpURLConnection; | |
import java.net.URL; | |
import java.util.regex.Matcher; | |
import java.util.regex.Pattern; | |
public class Main { | |
private static final Pattern urlPattern = Pattern.compile( | |
"(?:^|[\\W])((ht|f)tp(s?):\\/\\/|www\\.)" | |
+ "(([\\w\\-]+\\.){1,}?([\\w\\-.~]+\\/?)*" | |
+ "[\\p{Alnum}.,%_=?&#\\-+()\\[\\]\\*$~@!:/{};']*)", | |
Pattern.CASE_INSENSITIVE | Pattern.MULTILINE | Pattern.DOTALL); | |
public static void main(String[] args) { | |
String urlAdress = "https://prog.kiev.ua/forum/index.php/board,2.0.html"; | |
String text = getStringFromUrl(urlAdress); | |
StringBuilder urlOut = new StringBuilder(); | |
Matcher matcher = urlPattern.matcher(text); | |
while (matcher.find()) { | |
int matchStart = matcher.start(1); | |
int matchEnd = matcher.end(); | |
// now you have the offsets of a URL match | |
String source, delete; | |
source = matcher.group(); | |
delete = "\""; | |
source = source.replace(delete, ""); | |
delete = "'"; | |
source = source.replace(delete, ""); | |
// System.out.println(source); | |
urlOut.append(source).append(System.lineSeparator()); | |
} | |
try (FileWriter writer = new FileWriter("text.txt")) { | |
writer.write(urlOut.toString()); | |
writer.flush(); | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
} | |
public static String getStringFromUrl(String urlAdress) { //получения HTML разметки страницы | |
StringBuilder sb = new StringBuilder(); | |
try { | |
URL url = new URL(urlAdress); | |
HttpURLConnection con = (HttpURLConnection) url.openConnection(); | |
BufferedReader br = new BufferedReader(new InputStreamReader(con.getInputStream())); //InputStreamReader() -> может принимать второй параметр - это кодировка текста, если сервер передает информацию по другой кодировке и текст не читабельный | |
String text = null; | |
for (; (text = br.readLine()) != null;) { | |
sb.append(text).append(System.lineSeparator()); | |
} | |
} catch (IOException e) { | |
e.printStackTrace(); | |
} | |
return sb.toString(); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment