Skip to content

Instantly share code, notes, and snippets.

@oonid
Created January 7, 2021 15:09
Show Gist options
  • Save oonid/f9dc160e29afda8ae599730b0c423cf0 to your computer and use it in GitHub Desktop.
Save oonid/f9dc160e29afda8ae599730b0c423cf0 to your computer and use it in GitHub Desktop.
top-id-programming-telegram-group.ipynb
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "top-id-programming-telegram-group.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyPCRkBWav5G1+4ddkDZLQS6",
"include_colab_link": true
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/oonid/f9dc160e29afda8ae599730b0c423cf0/top-id-programming-telegram-group.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"metadata": {
"id": "EvW7mRWqzWYA"
},
"source": [
"import requests\n",
"from lxml import html\n",
"from time import sleep\n",
"from datetime import datetime\n",
"import tqdm"
],
"execution_count": 1,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "MVTEb5Ky0rqu"
},
"source": [
"def get_tg_links(md_url): # get url of many tgg in markdown format\n",
" links = []\n",
" response = requests.get(md_url)\n",
" if response.ok:\n",
" for b_line in response.iter_lines(): # iterate response by line\n",
" s_line = b_line.decode('ascii') # do we need utf-8?\n",
" # link on markdown format: []()\n",
" if '(' in s_line and ')' in s_line and 't.me' in s_line:\n",
" i1 = s_line.index('[')\n",
" i2 = s_line.index(']')\n",
" tg_name = s_line[i1+1:i2]\n",
" i1 = s_line.rindex('(') # right index\n",
" i2 = s_line.rindex(')') # right index\n",
" tg_link = s_line[i1+1:i2]\n",
" links.append((tg_name, tg_link)) # list of tuple\n",
" return links\n",
"\n",
"\n",
"def get_div_members(div_line): # get total members from html div tag\n",
" total_members = 0\n",
" if div_line is not None and len(div_line) > 0:\n",
" tree = html.fromstring(div_line) # string to html tree\n",
" for node in tree.xpath('//div'): # query html with xpath\n",
" # only if the div text declare the members\n",
" if node.text is not None and ' members' in node.text:\n",
" ss = node.text.split(' members')\n",
" total = ss[0].replace(' ', '') # remove space as thousand sep\n",
" if total.isdigit(): # make sure the total is digit\n",
" total_members = int(total) # return as int type \n",
" return total_members\n",
"\n",
"def get_group_members(group_url): # get tgg members by its link\n",
" group_members = 0\n",
" response = requests.get(link)\n",
" if response.ok:\n",
" for b_line in response.iter_lines(): # iterate response by line\n",
" s_line = b_line.decode('utf-8') # emojis, and so on\n",
" # total members on css class: tgme_page_extra\n",
" if 'tgme_page_extra' in s_line:\n",
" group_members = get_div_members(s_line)\n",
" return group_members"
],
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "cogN9-sV9P7O",
"outputId": "6d9945f5-63b5-43ec-c596-fccfc3da3727"
},
"source": [
"# main program\n",
"\n",
"md_url = 'https://github.com/hendisantika/List-All-Programming-Telegram-Group/raw/master/README.md'\n",
"top = 20 # get top 20 groups\n",
"delay = 0.42 # around 2 iterations per sec\n",
"id_groups = []\n",
"tg_groups = get_tg_links(md_url)\n",
"with tqdm.tqdm(total=len(tg_groups)) as t:\n",
" for name, link in tg_groups:\n",
" members = get_group_members(link)\n",
" id_groups.append({'name': name, 'link': link, 'members': members})\n",
" sleep(delay) # no need to rush\n",
" t.update(1) # update tqdm progress\n",
"\n",
"# sort descending list of telegram group by its element (dict) with key members\n",
"top_groups = sorted(id_groups, key=lambda k: k['members'], reverse=True)\n",
"\n",
"print('\\n\\ntop {} telegram groups by members ({}):'\n",
" .format(top, datetime.today().strftime('%Y-%m-%d'))) # display!\n",
"for group in top_groups[:top]:\n",
" print('{:>6} {} {}'.format(group['members'], group['name'], group['link']))\n"
],
"execution_count": 3,
"outputs": [
{
"output_type": "stream",
"text": [
"100%|██████████| 210/210 [01:46<00:00, 1.97it/s]"
],
"name": "stderr"
},
{
"output_type": "stream",
"text": [
"\n",
"\n",
"top 20 telegram groups by members (2021-01-07):\n",
" 43244 Loker Jakarta https://t.me/loker_jakarta\n",
" 32485 Telegram beta https://t.me/tgbeta\n",
" 32447 Lowongan Kerja IT https://t.me/LowonganKerjaIT\n",
" 22539 Freelancer - Indonesia https://t.me/freelancerID\n",
" 21675 Python ID https://t.me/pythonID\n",
" 16602 PHP Indonesia for Student https://t.me/PHPIDforStudent\n",
" 15401 Telegram Bot PHP - Indonesia https://t.me/botphp\n",
" 14855 Laravel Indonesia https://t.me/laravelindonesia\n",
" 14299 LOKER DEVELOPER/PROGRAMMER https://t.me/LokerDeveloper\n",
" 14118 ADN (Android Developer Nasional) https://t.me/androiddevelopernasional\n",
" 12975 Flutter Indonesia https://t.me/flutter_id\n",
" 12277 Belajar HTML https://t.me/belajarhtmlcss\n",
" 12150 CodeIgniter Indonesia https://t.me/codeigniterindonesia\n",
" 11323 Windows 10 Community ID https://t.me/WinTenGroup\n",
" 10671 Javascript Indonesia https://t.me/js_id\n",
" 9194 React Native Indonesia https://t.me/reactnative_id\n",
" 9117 Machine Learning Indonesia https://t.me/machinelearningid\n",
" 8785 Nodejs Indonesia https://t.me/nodejsid\n",
" 8071 Kotlin Indonesia https://t.me/KotlinID\n",
" 7941 MySQL Indonesia https://t.me/mysqlid\n"
],
"name": "stdout"
},
{
"output_type": "stream",
"text": [
"\n"
],
"name": "stderr"
}
]
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment