Created
January 7, 2021 15:09
-
-
Save oonid/f9dc160e29afda8ae599730b0c423cf0 to your computer and use it in GitHub Desktop.
top-id-programming-telegram-group.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "top-id-programming-telegram-group.ipynb", | |
"provenance": [], | |
"collapsed_sections": [], | |
"authorship_tag": "ABX9TyPCRkBWav5G1+4ddkDZLQS6", | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/oonid/f9dc160e29afda8ae599730b0c423cf0/top-id-programming-telegram-group.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "EvW7mRWqzWYA" | |
}, | |
"source": [ | |
"import requests\n", | |
"from lxml import html\n", | |
"from time import sleep\n", | |
"from datetime import datetime\n", | |
"import tqdm" | |
], | |
"execution_count": 1, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MVTEb5Ky0rqu" | |
}, | |
"source": [ | |
"def get_tg_links(md_url): # get url of many tgg in markdown format\n", | |
" links = []\n", | |
" response = requests.get(md_url)\n", | |
" if response.ok:\n", | |
" for b_line in response.iter_lines(): # iterate response by line\n", | |
" s_line = b_line.decode('ascii') # do we need utf-8?\n", | |
" # link on markdown format: []()\n", | |
" if '(' in s_line and ')' in s_line and 't.me' in s_line:\n", | |
" i1 = s_line.index('[')\n", | |
" i2 = s_line.index(']')\n", | |
" tg_name = s_line[i1+1:i2]\n", | |
" i1 = s_line.rindex('(') # right index\n", | |
" i2 = s_line.rindex(')') # right index\n", | |
" tg_link = s_line[i1+1:i2]\n", | |
" links.append((tg_name, tg_link)) # list of tuple\n", | |
" return links\n", | |
"\n", | |
"\n", | |
"def get_div_members(div_line): # get total members from html div tag\n", | |
" total_members = 0\n", | |
" if div_line is not None and len(div_line) > 0:\n", | |
" tree = html.fromstring(div_line) # string to html tree\n", | |
" for node in tree.xpath('//div'): # query html with xpath\n", | |
" # only if the div text declare the members\n", | |
" if node.text is not None and ' members' in node.text:\n", | |
" ss = node.text.split(' members')\n", | |
" total = ss[0].replace(' ', '') # remove space as thousand sep\n", | |
" if total.isdigit(): # make sure the total is digit\n", | |
" total_members = int(total) # return as int type \n", | |
" return total_members\n", | |
"\n", | |
"def get_group_members(group_url): # get tgg members by its link\n", | |
" group_members = 0\n", | |
" response = requests.get(link)\n", | |
" if response.ok:\n", | |
" for b_line in response.iter_lines(): # iterate response by line\n", | |
" s_line = b_line.decode('utf-8') # emojis, and so on\n", | |
" # total members on css class: tgme_page_extra\n", | |
" if 'tgme_page_extra' in s_line:\n", | |
" group_members = get_div_members(s_line)\n", | |
" return group_members" | |
], | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "cogN9-sV9P7O", | |
"outputId": "6d9945f5-63b5-43ec-c596-fccfc3da3727" | |
}, | |
"source": [ | |
"# main program\n", | |
"\n", | |
"md_url = 'https://github.com/hendisantika/List-All-Programming-Telegram-Group/raw/master/README.md'\n", | |
"top = 20 # get top 20 groups\n", | |
"delay = 0.42 # around 2 iterations per sec\n", | |
"id_groups = []\n", | |
"tg_groups = get_tg_links(md_url)\n", | |
"with tqdm.tqdm(total=len(tg_groups)) as t:\n", | |
" for name, link in tg_groups:\n", | |
" members = get_group_members(link)\n", | |
" id_groups.append({'name': name, 'link': link, 'members': members})\n", | |
" sleep(delay) # no need to rush\n", | |
" t.update(1) # update tqdm progress\n", | |
"\n", | |
"# sort descending list of telegram group by its element (dict) with key members\n", | |
"top_groups = sorted(id_groups, key=lambda k: k['members'], reverse=True)\n", | |
"\n", | |
"print('\\n\\ntop {} telegram groups by members ({}):'\n", | |
" .format(top, datetime.today().strftime('%Y-%m-%d'))) # display!\n", | |
"for group in top_groups[:top]:\n", | |
" print('{:>6} {} {}'.format(group['members'], group['name'], group['link']))\n" | |
], | |
"execution_count": 3, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"100%|██████████| 210/210 [01:46<00:00, 1.97it/s]" | |
], | |
"name": "stderr" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n", | |
"\n", | |
"top 20 telegram groups by members (2021-01-07):\n", | |
" 43244 Loker Jakarta https://t.me/loker_jakarta\n", | |
" 32485 Telegram beta https://t.me/tgbeta\n", | |
" 32447 Lowongan Kerja IT https://t.me/LowonganKerjaIT\n", | |
" 22539 Freelancer - Indonesia https://t.me/freelancerID\n", | |
" 21675 Python ID https://t.me/pythonID\n", | |
" 16602 PHP Indonesia for Student https://t.me/PHPIDforStudent\n", | |
" 15401 Telegram Bot PHP - Indonesia https://t.me/botphp\n", | |
" 14855 Laravel Indonesia https://t.me/laravelindonesia\n", | |
" 14299 LOKER DEVELOPER/PROGRAMMER https://t.me/LokerDeveloper\n", | |
" 14118 ADN (Android Developer Nasional) https://t.me/androiddevelopernasional\n", | |
" 12975 Flutter Indonesia https://t.me/flutter_id\n", | |
" 12277 Belajar HTML https://t.me/belajarhtmlcss\n", | |
" 12150 CodeIgniter Indonesia https://t.me/codeigniterindonesia\n", | |
" 11323 Windows 10 Community ID https://t.me/WinTenGroup\n", | |
" 10671 Javascript Indonesia https://t.me/js_id\n", | |
" 9194 React Native Indonesia https://t.me/reactnative_id\n", | |
" 9117 Machine Learning Indonesia https://t.me/machinelearningid\n", | |
" 8785 Nodejs Indonesia https://t.me/nodejsid\n", | |
" 8071 Kotlin Indonesia https://t.me/KotlinID\n", | |
" 7941 MySQL Indonesia https://t.me/mysqlid\n" | |
], | |
"name": "stdout" | |
}, | |
{ | |
"output_type": "stream", | |
"text": [ | |
"\n" | |
], | |
"name": "stderr" | |
} | |
] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment