Skip to content

Instantly share code, notes, and snippets.

@zmovane
Last active September 10, 2023 17:08
Show Gist options
  • Save zmovane/6103a23d166e13bdae08decea7605b91 to your computer and use it in GitHub Desktop.
Save zmovane/6103a23d166e13bdae08decea7605b91 to your computer and use it in GitHub Desktop.
Scrape Twitter Followers & Following

Scrape Twitter Followers & Following

Usage

USER_NAME=xxx PASSWORD=xxx go run main.go

Dependencies

Cookies

Obtain cookies by logging in using the twitter-scraper library.

Structs

Generate golang structs using this tool: https://quicktype.io

package main
import (
"encoding/json"
"fmt"
"net/http"
urlutil "net/url"
"os"
"regexp"
"strings"
"github.com/go-resty/resty/v2"
SCRAPER "github.com/n0madic/twitter-scraper"
)
type O interface {
interface{}
}
func Map[SRC O, DEST O, RESULT []DEST](os []SRC, convertFn func(SRC) DEST) RESULT {
lst := make([]DEST, 0, len(os))
for _, o := range os {
lst = append(lst, convertFn(o))
}
return lst
}
type T map[string]interface{}
type StringMap map[string]string
type Relation int64
const (
Follower Relation = iota
Following
)
var ToString = map[Relation]string{
Follower: "followers",
Following: "following",
}
func (r Relation) String() string {
return ToString[r]
}
func (r Relation) Path() string {
return ToPath[r]
}
var ToPath = map[Relation]string{
Follower: "3yX7xr2hKjcZYnXt6cU6lQ/Followers",
Following: "t-BPOrMIduGUJWO_LxcvNQ/Following",
}
var FlagEOF = regexp.MustCompile("^0\\|\\d+$")
func main() {
uname := os.Getenv("USER_NAME")
upwd := os.Getenv("PASSWORD")
reversedX := New(uname, upwd)
reversedX.login()
if !reversedX.IsLoggedIn() {
panic("You must login first")
}
var cursor *string
var EOF bool
for {
tweets, nextCursor := reversedX.GetFollowingsByScreenName("shareverse_", cursor)
EOF = cursor != nil &&
nextCursor != nil &&
FlagEOF.Match([]byte(*cursor)) &&
FlagEOF.Match([]byte(*nextCursor))
cursor = nextCursor
if cursor == nil || EOF {
println("=====================END=======================")
break
}
for _, tweet := range tweets {
println(tweet.ScreenName)
}
fmt.Printf("count<%v>====================cursor<%s>=======================\n", len(tweets), *cursor)
}
}
type ReversedX struct {
uname string
upwd string
scraper *SCRAPER.Scraper
}
func New(uname string, upwd string) ReversedX {
return ReversedX{
uname: uname,
upwd: upwd,
scraper: SCRAPER.New(),
}
}
func (x *ReversedX) login() error {
return x.scraper.Login(x.uname, x.upwd)
}
func (x *ReversedX) IsLoggedIn() bool {
return x.scraper.IsLoggedIn()
}
func (x *ReversedX) GetFollowingsByScreenName(user string, cursor *string) (resp []Legacy, nextCursor *string) {
uid, _ := x.scraper.GetUserIDByScreenName(user)
return x.GetFollowingsById(uid, cursor)
}
func (x *ReversedX) GetFollowingsById(uid string, cursor *string) (resp []Legacy, nextCursor *string) {
var csrfToken string
cookies := Map(x.scraper.GetCookies(), func(field *http.Cookie) string {
if field.Name == "ct0" {
csrfToken = field.Value
}
return field.String()
})
cookiesStr := strings.Join(cookies, ";")
variables := T{
"userId": uid,
"count": 20,
"includePromotedContent": false,
}
if cursor != nil {
variables["cursor"] = *cursor
}
features := T{
"rweb_lists_timeline_redesign_enabled": true,
"responsive_web_graphql_exclude_directive_enabled": true,
"verified_phone_label_enabled": false,
"creator_subscriptions_tweet_preview_api_enabled": true,
"responsive_web_graphql_timeline_navigation_enabled": true,
"responsive_web_graphql_skip_user_profile_image_extensions_enabled": false,
"tweetypie_unmention_optimization_enabled": true,
"responsive_web_edit_tweet_api_enabled": true,
"graphql_is_translatable_rweb_tweet_is_translatable_enabled": true,
"view_counts_everywhere_api_enabled": true,
"longform_notetweets_consumption_enabled": true,
"responsive_web_twitter_article_tweet_consumption_enabled": false,
"tweet_awards_web_tipping_enabled": false,
"freedom_of_speech_not_reach_fetch_enabled": true,
"standardized_nudges_misinfo": true,
"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled": true,
"longform_notetweets_rich_text_read_enabled": true,
"longform_notetweets_inline_media_enabled": true,
"responsive_web_media_download_video_enabled": false,
"responsive_web_enhance_cards_enabled": false,
}
variablesJson, _ := json.Marshal(variables)
featuresJson, _ := json.Marshal(features)
query := fmt.Sprintf(`variables=%s&features=%s`, variablesJson, featuresJson)
values, _ := urlutil.ParseQuery(query)
url := fmt.Sprintf(`https://twitter.com/i/api/graphql/%s?%s`, Following.Path(), values.Encode())
var response Response
var err error
client := resty.New()
client.
R().
SetHeaders(
StringMap{
"authority": "twitter.com",
"accept": "*/*",
"accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
"authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA",
"content-type": "application/json",
"cookie": cookiesStr,
"sec-ch-ua": `"Chromium";v="116", "Not)A;Brand";v="24", "Google Chrome";v="116"`,
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": `"macOS"`,
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36",
"x-client-transaction-id": "b8qgMoxBUsxfsTLOsISkGvJ9/Atx8/2g/teNmizHQONJLNnUNCKMBxHt2eRlE6jkzOuM9G/tZ/mwb0KT9PAok5bnqm6vbg",
"x-client-uuid": "cc8bdbff-b377-4ffd-b53e-5767f6e50ba4",
"x-csrf-token": csrfToken,
"x-twitter-active-user": "yes",
"x-twitter-auth-type": "OAuth2Session",
"x-twitter-client-language": "en",
},
).
SetResult(&response).
SetError(&err).
Get(url)
instructions := response.Data.User.Result.Timeline.Timeline.Instructions
resp = make([]Legacy, 0)
for _, i := range instructions {
for _, e := range i.Entries {
cursorType := e.Content.CursorType
if cursorType != nil && *cursorType == "Bottom" {
cursor = e.Content.Value
}
item := e.Content.ItemContent
if item == nil {
continue
}
resp = append(resp, item.UserResults.Result.Legacy)
}
}
return resp, cursor
}
// Generated by https://quicktype.io
type Response struct {
Data Data `json:"data"`
}
type Data struct {
User UserClass `json:"user"`
}
type UserClass struct {
Result UserResult `json:"result"`
}
type UserResult struct {
Typename UserDisplayTypeEnum `json:"__typename"`
Timeline ResultTimeline `json:"timeline"`
}
type ResultTimeline struct {
Timeline TimelineTimeline `json:"timeline"`
}
type TimelineTimeline struct {
Instructions []Instruction `json:"instructions"`
}
type Instruction struct {
Type string `json:"type"`
Direction *string `json:"direction,omitempty"`
Entries []Entry `json:"entries"`
}
type Entry struct {
EntryID string `json:"entryId"`
SortIndex string `json:"sortIndex"`
Content Content `json:"content"`
}
type Content struct {
EntryType EntryTypeEnum `json:"entryType"`
Typename EntryTypeEnum `json:"__typename"`
ItemContent *ItemContent `json:"itemContent,omitempty"`
ClientEventInfo *ClientEventInfo `json:"clientEventInfo,omitempty"`
Value *string `json:"value,omitempty"`
CursorType *string `json:"cursorType,omitempty"`
}
type ClientEventInfo struct {
Component Component `json:"component"`
Element Element `json:"element"`
}
type ItemContent struct {
ItemType ItemTypeEnum `json:"itemType"`
Typename ItemTypeEnum `json:"__typename"`
UserResults UserResults `json:"user_results"`
UserDisplayType UserDisplayTypeEnum `json:"userDisplayType"`
}
type UserResults struct {
Result UserResultsResult `json:"result"`
}
type UserResultsResult struct {
Typename UserDisplayTypeEnum `json:"__typename"`
ID string `json:"id"`
RESTID string `json:"rest_id"`
AffiliatesHighlightedLabel AffiliatesHighlightedLabel `json:"affiliates_highlighted_label"`
HasGraduatedAccess bool `json:"has_graduated_access"`
IsBlueVerified bool `json:"is_blue_verified"`
ProfileImageShape ProfileImageShape `json:"profile_image_shape"`
Legacy Legacy `json:"legacy"`
Professional *Professional `json:"professional,omitempty"`
}
type AffiliatesHighlightedLabel struct {
}
type Legacy struct {
FollowedBy bool `json:"followed_by"`
CanDm bool `json:"can_dm"`
CanMediaTag bool `json:"can_media_tag"`
CreatedAt string `json:"created_at"`
DefaultProfile bool `json:"default_profile"`
DefaultProfileImage bool `json:"default_profile_image"`
Description string `json:"description"`
Entities Entities `json:"entities"`
FastFollowersCount int64 `json:"fast_followers_count"`
FavouritesCount int64 `json:"favourites_count"`
FollowersCount int64 `json:"followers_count"`
FriendsCount int64 `json:"friends_count"`
HasCustomTimelines bool `json:"has_custom_timelines"`
IsTranslator bool `json:"is_translator"`
ListedCount int64 `json:"listed_count"`
Location string `json:"location"`
MediaCount int64 `json:"media_count"`
Name string `json:"name"`
NormalFollowersCount int64 `json:"normal_followers_count"`
PinnedTweetIDSStr []string `json:"pinned_tweet_ids_str"`
PossiblySensitive bool `json:"possibly_sensitive"`
ProfileBannerURL *string `json:"profile_banner_url,omitempty"`
ProfileImageURLHTTPS string `json:"profile_image_url_https"`
ProfileInterstitialType string `json:"profile_interstitial_type"`
ScreenName string `json:"screen_name"`
StatusesCount int64 `json:"statuses_count"`
TranslatorType TranslatorType `json:"translator_type"`
Verified bool `json:"verified"`
WantRetweets bool `json:"want_retweets"`
WithheldInCountries []interface{} `json:"withheld_in_countries"`
URL *string `json:"url,omitempty"`
Following *bool `json:"following,omitempty"`
}
type Entities struct {
Description Description `json:"description"`
URL *Description `json:"url,omitempty"`
}
type Description struct {
Urls []URL `json:"urls"`
}
type URL struct {
DisplayURL string `json:"display_url"`
ExpandedURL string `json:"expanded_url"`
URL string `json:"url"`
Indices []int64 `json:"indices"`
}
type Professional struct {
RESTID string `json:"rest_id"`
ProfessionalType string `json:"professional_type"`
Category []Category `json:"category"`
}
type Category struct {
ID int64 `json:"id"`
Name string `json:"name"`
IconName string `json:"icon_name"`
}
type Component string
const (
FollowersSgs Component = "FollowersSgs"
)
type Element string
const (
ElementUser Element = "user"
)
type EntryTypeEnum string
const (
TimelineTimelineCursor EntryTypeEnum = "TimelineTimelineCursor"
TimelineTimelineItem EntryTypeEnum = "TimelineTimelineItem"
)
type ItemTypeEnum string
const (
TimelineUser ItemTypeEnum = "TimelineUser"
)
type UserDisplayTypeEnum string
const (
User UserDisplayTypeEnum = "User"
)
type TranslatorType string
const (
None TranslatorType = "none"
)
type ProfileImageShape string
const (
Circle ProfileImageShape = "Circle"
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment