Skip to content

Instantly share code, notes, and snippets.

@chandruscm
Created April 10, 2020 12:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save chandruscm/381abe465002226a25f13a603a645fea to your computer and use it in GitHub Desktop.
Save chandruscm/381abe465002226a25f13a603a645fea to your computer and use it in GitHub Desktop.
/*
* NRLM Scraper by chandruscm
* --------------------------
* - Intended for educational purposes ONLY 📖
* - Use at your own risk ☠️
* - Requires jsoup : https://jsoup.org
*/
import org.jsoup.Connection
import org.jsoup.Jsoup
import org.jsoup.nodes.Document
import java.io.File
import java.io.IOException
/*
* encd -> Combination of state,district,block,grampanchayat,village codes.
* reqcode -> Unique code generated with each request, subsequent requests need to pass this.
* abc -> Needed for no apparent reason when requesting SHG members.
*/
const val BASE_URL = "https://nrlm.gov.in/BlockWiseSHGMemebrsAction.do?methodName=showShgMembers"
const val ENCD = "&encd="
const val REQ_CODE = "&reqcode="
const val ABC = "&abc=1"
const val DEFAULT_ENCD = "01"
const val STATE_CODE_ID = "stateCodeId"
const val DISTRICT_CODE_ID = "districtCodeId"
const val BLOCK_CODE_ID = "blockCodeId"
const val GRAMPANCHAYAT_CODE_ID = "grampanchayatCodeId"
const val VILLAGE_CODE_ID = "villageCodeId"
const val STATE_CODE = "stateCode"
const val DISTRICT_CODE = "districtCode"
const val BLOCK_CODE = "blockCode"
const val GRAMPANCHAYAT_CODE = "grampanchayatCode"
const val VILLAGE_CODE = "villageCode"
const val CSV_HEADER = "State name,District Name,Block Name,Grampanchayat Name,Village Name,SHG Name,Member Name,Father/Husband,Gender,Age,Social Category,\n"
data class Object(val code: String, val name: String)
fun main() = fetch()
fun fetch() {
var url = "$BASE_URL$ENCD$DEFAULT_ENCD"
val request = Jsoup.connect(url).method(Connection.Method.GET).request()
var document = Jsoup.connect(url).get()
val file = File("data.csv").apply {
createNewFile()
appendText(CSV_HEADER)
}
getDropDownItems(document, STATE_CODE_ID).forEach { state ->
url = "$BASE_URL$REQ_CODE${getReqCode(document)}$ENCD${state.code}"
document = Jsoup.connect(url)
.data(STATE_CODE, state.code)
.cookies(request.cookies())
.get()
println("Looking at ${state.name}")
getDropDownItems(document, DISTRICT_CODE_ID).forEach { district ->
url = "$BASE_URL$REQ_CODE${getReqCode(document)}$ENCD${district.code}"
document = Jsoup.connect(url)
.data(STATE_CODE, state.code)
.data(DISTRICT_CODE, district.code)
.cookies(request.cookies())
.get()
println("Looking at ${district.name}")
getDropDownItems(document, BLOCK_CODE_ID).forEach { block ->
url = "$BASE_URL$REQ_CODE${getReqCode(document)}$ENCD${block.code}"
document = Jsoup.connect(url)
.data(STATE_CODE, state.code)
.data(DISTRICT_CODE, district.code)
.data(BLOCK_CODE, block.code)
.cookies(request.cookies())
.get()
println("Looking at ${block.name}")
getDropDownItems(document, GRAMPANCHAYAT_CODE_ID).forEach { grampanchayat ->
url = "$BASE_URL$REQ_CODE${getReqCode(document)}$ENCD${grampanchayat.code}"
document = Jsoup.connect(url)
.data(STATE_CODE, state.code)
.data(DISTRICT_CODE, district.code)
.data(BLOCK_CODE, block.code)
.data(GRAMPANCHAYAT_CODE, grampanchayat.code)
.cookies(request.cookies())
.get()
println("Looking at ${grampanchayat.name}")
getDropDownItems(document, VILLAGE_CODE_ID).forEach { village ->
url = "$BASE_URL$ABC$REQ_CODE${getReqCode(document)}$ENCD${village.code}"
document = Jsoup.connect(url)
.data(STATE_CODE, state.code)
.data(DISTRICT_CODE, district.code)
.data(BLOCK_CODE, block.code)
.data(GRAMPANCHAYAT_CODE, grampanchayat.code)
.data(VILLAGE_CODE, village.code)
.cookies(request.cookies())
.post()
println("Writing data to file")
val table = document.select("tbody")?.first()
try {
table?.select("tr")?.forEach { row ->
row?.select("td")?.forEach { col ->
file.appendText("${col?.text() ?: ""},")
}
file.appendText("\n")
}
} catch (exception: IOException) {
println("Error writing to file")
exception.printStackTrace()
}
}
}
}
}
}
}
/*
* Extract the unique request code for each request.
*/
fun getReqCode(document: Document) =
document.select("ul.nav.navbar-nav.navbar-right")
.select("li")[1]
.select(" > a")
.first()
.absUrl("href")
.split("=")[2]
/*
* Extract the items in a drop down box.
*/
fun getDropDownItems(document: Document, id: String): MutableList<Object> {
val objects = mutableListOf<Object>()
val content = document.getElementById(id)
val children = content.children()
for (index in 1 until children.size) {
objects.add(
Object(
code = children[index].`val`(),
name = children[index].text()
)
)
}
return objects
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment