forked from KaKi87/scraper-instagram-v1
parent
1537cb41a5
commit
ac27738671
|
@ -78,3 +78,6 @@ typings/
|
|||
|
||||
# JetBrains
|
||||
.idea
|
||||
|
||||
# Tests folder
|
||||
tests
|
||||
|
|
214
README.md
214
README.md
|
@ -16,7 +16,7 @@ Insta.getProfile('tiana_kaki', profile => {
|
|||
{
|
||||
"id": "6965336095",
|
||||
"name": "Tiana",
|
||||
"pic": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
|
||||
"pic": "https://scontent-mrs1-1.cdninstagram.com/vp/343360e2c822f78d6d302a1847db1833/5D2F4763/t51.2885-19/s320x320/26268757_235337373674020_3943053532785016832_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
|
||||
"bio": "",
|
||||
"private": false,
|
||||
"verified": false,
|
||||
|
@ -27,54 +27,222 @@ Insta.getProfile('tiana_kaki', profile => {
|
|||
"lastPosts": [
|
||||
"Buth7q-nTnW",
|
||||
"BtwBWbmnXtx",
|
||||
"BtHJSmMhiDY"
|
||||
]
|
||||
"BtHJSmMhiDY",
|
||||
"BtHI5W9hnIC",
|
||||
"Bswe8NNH2zM",
|
||||
"BsTROEhH_vF",
|
||||
"BrNOGceHFVP",
|
||||
"BrNNjEpnPdb",
|
||||
"BrNM6qyHf-3",
|
||||
"BpkbkgpnjEw",
|
||||
"BnhdjA1HxuL",
|
||||
"BmIzQtoHGmf"
|
||||
],
|
||||
"link": "https://instagram.com/tiana_kaki"
|
||||
}
|
||||
```
|
||||
|
||||
### Get hashtag data
|
||||
|
||||
```js
|
||||
Insta.getHashtag('limousin', hashtag => {
|
||||
console.log(hashtag);
|
||||
});
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "17843780971049111",
|
||||
"pic": "https://scontent-mrs1-1.cdninstagram.com/vp/58f240d6db1e0e45c3a7f2bc27c00ea2/5D36D2D6/t51.2885-15/e35/s150x150/53584789_129121004880886_3869886162348160858_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
|
||||
"posts": 182557,
|
||||
"featuredPosts": [
|
||||
"Bvdj-5gF0GN",
|
||||
"BvZHGsrHh7F",
|
||||
"BvgO5A9AwiG",
|
||||
"Bu4B1l8BxTb",
|
||||
"Bu8NV9qlpRi",
|
||||
"BvgOruwgzsv",
|
||||
"BvZvpEnpaCE",
|
||||
"BvUhOgzAuse",
|
||||
"BvKTAT4l2JS"
|
||||
],
|
||||
"lastPosts": [
|
||||
"BvgSlCFBcTx",
|
||||
"BvgSHvDpTyb",
|
||||
"BvgRtV6Dmh3",
|
||||
"BvgRIPiFPnP",
|
||||
"BvgPKdlnw3a",
|
||||
"BvgPEnCHCbg",
|
||||
"BvgO5A9AwiG",
|
||||
"BvgOruwgzsv",
|
||||
"BvgNVk2DF4e",
|
||||
"BvgKPISB0MY",
|
||||
"BvgJKwahsC1",
|
||||
"BvgISY5puZl",
|
||||
"BvgHNxdgFKN",
|
||||
"BvgGqA3J23x",
|
||||
"BvgGmbSpq2g",
|
||||
"BvgFFgEJKbK",
|
||||
"BvgDznWHyUo",
|
||||
"BvgCwLOnQ5X",
|
||||
"BvgCEnVF1A6",
|
||||
"BvgA4VCHItL",
|
||||
"Bvf-4LLlf3H",
|
||||
"BvfhY3SFQ0H",
|
||||
"BvfWrfvgaiH",
|
||||
"BvfPVIaj2Un",
|
||||
"BvfKXQGpUhH",
|
||||
"BvfH4a6nl-n",
|
||||
"BvfHzRuhBzA",
|
||||
"BvfGheRJwpZ",
|
||||
"BvfFeB9A8an",
|
||||
"BvfEyF8pHrY",
|
||||
"BvfEDzDAm41",
|
||||
"BvfDhyZHCZw",
|
||||
"BvfDR7EFRgE",
|
||||
"BvfBGwLlx3C",
|
||||
"BvfApnTFaqv",
|
||||
"BvfAQmPpjFI",
|
||||
"Bve-j9ynlH4",
|
||||
"Bve-XuBp4VS",
|
||||
"Bve9DD4pLA1",
|
||||
"Bve8yWAl8QX",
|
||||
"Bve6y3egIYp",
|
||||
"Bve16KThvN7",
|
||||
"Bve5WlTAd61",
|
||||
"Bve5VQqgVUX",
|
||||
"Bve3_eJJuuv",
|
||||
"Bve2RFEHPON",
|
||||
"Bve1QpXlXSb",
|
||||
"Bve1EfKHNTj",
|
||||
"Bve0XaPHLWS",
|
||||
"Bve0I9KnX-P",
|
||||
"BvezvP7pSPL",
|
||||
"BvezjDjlKPM",
|
||||
"Bvezb0EHf_v",
|
||||
"BvezYCsArGN",
|
||||
"BvezTnbl-wK",
|
||||
"BvezK7jJmb0",
|
||||
"BveybI_Hr1m",
|
||||
"BvexjnKlb8l",
|
||||
"BvewSZlAQHI",
|
||||
"BvevMpXgVYR",
|
||||
"BvevJP2pxMh",
|
||||
"BveuxmcAhq7",
|
||||
"Bves_jzpeQ7",
|
||||
"BvesnHPJ5UT",
|
||||
"BvesY2PHs1c",
|
||||
"Bver2RhAAGj",
|
||||
"Bveq1e4FmPq",
|
||||
"Bveq0WaBMP3",
|
||||
"BveqsJJA2gL",
|
||||
"BZ3O78ijPQP",
|
||||
"BZwd7y6DwTe",
|
||||
"BZt4NeLDooK"
|
||||
],
|
||||
"link": "https://instagram.com/explore/tags/limousin/"
|
||||
}
|
||||
```
|
||||
|
||||
### Get post data
|
||||
|
||||
```js
|
||||
Insta.getPost('Buth7q-nTnW', post => {
|
||||
Insta.getPost('BrNM6qyHf-3', post => {
|
||||
console.log(post);
|
||||
});
|
||||
```
|
||||
|
||||
```json
|
||||
{
|
||||
"id": "1812187949221841859",
|
||||
"timestamp": 1530249659,
|
||||
"likes": 8,
|
||||
"comments": 0,
|
||||
"caption": "Vendredi dernier sur l'Ile du Souvenir du Parc de la Tête d'Or, à Lyon.",
|
||||
"location": {
|
||||
"name": "Parc de la Tête d'Or",
|
||||
"city": "Lyon, France"
|
||||
},
|
||||
"id": "1931256623437578167",
|
||||
"timestamp": 1544443751,
|
||||
"likes": 54,
|
||||
"comments": [
|
||||
{
|
||||
"user": "iamgeekcat",
|
||||
"content": "toi aussi tu l'a achetée 👏",
|
||||
"timestamp": 1545074290,
|
||||
"hashtags": null,
|
||||
"mentions": null,
|
||||
"likes": 1
|
||||
},
|
||||
{
|
||||
"user": "tiana_kaki",
|
||||
"content": "@iamgeekcat Ouais ^^",
|
||||
"timestamp": 1545079687,
|
||||
"hashtags": null,
|
||||
"mentions": [
|
||||
"@iamgeekcat"
|
||||
],
|
||||
"likes": 0
|
||||
}
|
||||
],
|
||||
"caption": "La montre connectée de #Xiaomi #MiBand3\nMerci @guillaume_slash pour #NightSight !",
|
||||
"hashtags": [
|
||||
"#Xiaomi",
|
||||
"#MiBand3",
|
||||
"#NightSight"
|
||||
],
|
||||
"mentions": [
|
||||
"@guillaume_slash"
|
||||
],
|
||||
"tagged": [],
|
||||
"location": null,
|
||||
"author": {
|
||||
"id": "6965336095",
|
||||
"username": "tiana_kaki",
|
||||
"name": "Tiana",
|
||||
"pic": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
|
||||
"verified": false
|
||||
"pic": "https://scontent-mrs1-1.cdninstagram.com/vp/8123c0f64c3ef70d222a6a7a5379f87a/5D36A493/t51.2885-19/s150x150/26268757_235337373674020_3943053532785016832_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
|
||||
"verified": false,
|
||||
"link": "https://instagram.com/tiana_kaki"
|
||||
},
|
||||
"contents": [
|
||||
{
|
||||
"url": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
|
||||
"type": "photo"
|
||||
},
|
||||
{
|
||||
"url": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
|
||||
"url": "https://scontent-mrs1-1.cdninstagram.com/vp/96aff5be0bba30fb39994c35624f3bfb/5D4A7A95/t51.2885-15/e35/45881951_215607719355847_3477452604092009384_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
|
||||
"type": "photo"
|
||||
}
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
`comments = -1` when disabled.
|
||||
`comments` is `null` when disabled by author.
|
||||
|
||||
### Subscribe to posts
|
||||
|
||||
#### From user
|
||||
|
||||
```js
|
||||
Insta.subscribeUserPosts('tiana_kaki', posts => {
|
||||
console.log(posts);
|
||||
}, {
|
||||
interval: 0,
|
||||
lastPost: 'BrNM6qyHf-3'
|
||||
});
|
||||
```
|
||||
|
||||
`options` (last parameter) are optional.
|
||||
|
||||
#### From hashtag
|
||||
|
||||
```js
|
||||
Insta.subscribeHashtagPosts('selfie', posts => {
|
||||
console.log(posts);
|
||||
}, {
|
||||
interval: 0
|
||||
});
|
||||
```
|
||||
|
||||
## Planned features
|
||||
|
||||
- Support for authentication (private profiles, stories)
|
||||
- Post comments list
|
||||
- Events : new/deleted post, comment, like
|
||||
- More events : deleted post, added/deleted comment & like
|
||||
|
||||
## Changelog
|
||||
|
||||
- `1.0.0` (2019-03-26) - Initial release
|
||||
- `1.0.1` (2019-03-27) - Added improvements & features
|
||||
- Fixed throw error scope
|
||||
- Fixed single photo post wrongly structured
|
||||
- Added support for comments
|
||||
- Added support for hashtags, mentions and tags in posts and comments
|
||||
- Added posts subscriptions feature from users (untested) and hashtags
|
||||
|
|
129
index.js
129
index.js
|
@ -8,13 +8,15 @@ const
|
|||
|
||||
module.exports = {
|
||||
getProfile(username, callback){
|
||||
request(`${insta}/${username}`,(err, res, body) => {
|
||||
const link = `${insta}/${username}`;
|
||||
request(link,(err, res, body) => {
|
||||
if(res.statusCode === 404)
|
||||
throw new Error('Instagram user not found');
|
||||
body += `<script>document.querySelector('html').innerHTML = JSON.stringify(_sharedData.entry_data.ProfilePage[0].graphql.user)</script>`;
|
||||
let u;
|
||||
try {
|
||||
const user = JSON.parse(parse(body).body.textContent);
|
||||
callback({
|
||||
u = {
|
||||
id: user['id'],
|
||||
name: user['full_name'],
|
||||
pic: user['profile_pic_url_hd'],
|
||||
|
@ -26,36 +28,82 @@ module.exports = {
|
|||
following: user['edge_follow']['count'],
|
||||
posts: user['edge_owner_to_timeline_media']['count'],
|
||||
lastPosts: user['edge_owner_to_timeline_media']['edges']
|
||||
.map(post => post['node']['shortcode'])
|
||||
});
|
||||
.map(post => post['node']['shortcode']),
|
||||
link
|
||||
};
|
||||
} catch(e){ throw new Error('Instagram parsing error'); }
|
||||
callback(u);
|
||||
});
|
||||
},
|
||||
getHashtag(hashtag, callback){
|
||||
const link = `${insta}/explore/tags/${hashtag}/`;
|
||||
request(link, (err, res, body) => {
|
||||
if(res.statusCode === 404)
|
||||
throw new Error('Instagram hashtag not found');
|
||||
body += `<script>document.querySelector('html').innerHTML = JSON.stringify(_sharedData.entry_data.TagPage[0].graphql.hashtag)</script>`;
|
||||
let h;
|
||||
try {
|
||||
const hashtag = JSON.parse(parse(body).body.textContent);
|
||||
h = {
|
||||
id: hashtag['id'],
|
||||
pic: hashtag['profile_pic_url'],
|
||||
posts: hashtag['edge_hashtag_to_media']['count'],
|
||||
featuredPosts: hashtag['edge_hashtag_to_top_posts']['edges']
|
||||
.map(post => post['node']['shortcode']),
|
||||
lastPosts: hashtag['edge_hashtag_to_media']['edges']
|
||||
.map(post => post['node']['shortcode']),
|
||||
link
|
||||
}
|
||||
} catch(e){ throw new Error('Instagram parsing error'); }
|
||||
callback(h);
|
||||
});
|
||||
},
|
||||
getPost(shortcode, callback){
|
||||
request(`${insta}/p/${shortcode}`, (err, res, body) => {
|
||||
const link = `${insta}/p/${shortcode}`;
|
||||
request(link, (err, res, body) => {
|
||||
if(res.statusCode === 404)
|
||||
throw new Error('Instagram post not found');
|
||||
body += `<script>document.querySelector('html').innerHTML = JSON.stringify(_sharedData.entry_data.PostPage[0].graphql.shortcode_media)</script>`;
|
||||
let p = null;
|
||||
let p;
|
||||
try {
|
||||
const post = JSON.parse(parse(body).body.textContent);
|
||||
const
|
||||
post = JSON.parse(parse(body).body.textContent),
|
||||
caption = post['edge_media_to_caption']['edges'].length > 0
|
||||
? post['edge_media_to_caption']['edges'][0]['node']['text'] : null,
|
||||
username = post['owner']['username'],
|
||||
hashtagsRegex = /(?<=[\s>])#(\d*[A-Za-z_]+\d*)\b(?!;)/g,
|
||||
usernamesRegex = /@([A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\\.(?!\\.))){0,28}(?:[A-Za-z0-9_]))?)/g;
|
||||
p = {
|
||||
id: post['id'],
|
||||
timestamp: post['taken_at_timestamp'],
|
||||
likes: post['edge_media_preview_like']['count'],
|
||||
comments: post['comments_disabled'] ? -1 : post['edge_media_to_comment']['count'],
|
||||
caption: post['edge_media_to_caption']['edges'].length > 0 ? post['edge_media_to_caption']['edges'][0]['node']['text'] : null,
|
||||
comments: post['comments_disabled'] ? null : post['edge_media_to_comment']['edges']
|
||||
.map(c => ({
|
||||
user: c['node']['owner']['username'],
|
||||
content: c['node']['text'],
|
||||
timestamp: c['node']['created_at'],
|
||||
hashtags: c['node']['text'].match(hashtagsRegex),
|
||||
mentions: c['node']['text'].match(usernamesRegex),
|
||||
likes: c['node']['edge_liked_by']['count']
|
||||
})),
|
||||
caption,
|
||||
hashtags: caption ? caption.match(hashtagsRegex) : null,
|
||||
mentions: caption ? caption.match(usernamesRegex) : null,
|
||||
tagged: post['edge_media_to_tagged_user']['edges']
|
||||
.map(u => u['node']['user']['username']),
|
||||
location: post['location'] ? {
|
||||
name: post['location']['name'],
|
||||
city: JSON.parse(post['location']['address_json'])['city_name']
|
||||
} : null,
|
||||
author: {
|
||||
id: post['owner']['id'],
|
||||
username: post['owner']['username'],
|
||||
username,
|
||||
name: post['owner']['full_name'],
|
||||
pic: post['owner']['profile_pic_url'],
|
||||
verified: post['owner']['is_verified']
|
||||
}
|
||||
verified: post['owner']['is_verified'],
|
||||
link: `${insta}/${username}`
|
||||
},
|
||||
link
|
||||
};
|
||||
switch(post['__typename']){
|
||||
case 'GraphImage':
|
||||
|
@ -79,5 +127,62 @@ module.exports = {
|
|||
} catch(e){ throw new Error('Instagram parsing error'); }
|
||||
callback(p);
|
||||
});
|
||||
},
|
||||
subscribeUserPosts(username, onPosts, options){
|
||||
const interval = options['interval'] || 30;
|
||||
let lastPost = options['lastPost'] || null;
|
||||
const checkNewPosts = () => {
|
||||
module.exports.getProfile(username, profile => {
|
||||
const _lastPost = profile.lastPosts[0];
|
||||
if(_lastPost !== lastPost){
|
||||
lastPost = _lastPost;
|
||||
if(!_lastPost){
|
||||
onPosts(lastPost);
|
||||
setTimeout(checkNewPosts, interval);
|
||||
}
|
||||
else {
|
||||
const posts = [];
|
||||
for(let i = 0; i < profile.lastPosts.indexOf(lastPost); i++){
|
||||
posts.push(profile.lastPosts[i]);
|
||||
}
|
||||
onPosts(posts);
|
||||
setTimeout(checkNewPosts, interval);
|
||||
}
|
||||
lastPost = _lastPost;
|
||||
}
|
||||
else {
|
||||
setTimeout(checkNewPosts, interval);
|
||||
}
|
||||
});
|
||||
};
|
||||
checkNewPosts();
|
||||
},
|
||||
subscribeHashtagPosts(hashtag, onPosts, options){
|
||||
const interval = options['interval'] || 30;
|
||||
let lastPost = options['lastPost'] || null;
|
||||
const checkNewPosts = () => {
|
||||
module.exports.getHashtag(hashtag, hashtag => {
|
||||
const _lastPost = hashtag.lastPosts[0];
|
||||
if(_lastPost !== lastPost){
|
||||
if(!_lastPost){
|
||||
onPosts(lastPost);
|
||||
setTimeout(checkNewPosts, interval);
|
||||
}
|
||||
else {
|
||||
const posts = [];
|
||||
for(let i = 0; i < hashtag.lastPosts.indexOf(lastPost); i++){
|
||||
posts.push(hashtag.lastPosts[i]);
|
||||
}
|
||||
onPosts(posts);
|
||||
setTimeout(checkNewPosts, interval);
|
||||
}
|
||||
lastPost = _lastPost;
|
||||
}
|
||||
else {
|
||||
setTimeout(checkNewPosts, interval);
|
||||
}
|
||||
});
|
||||
};
|
||||
checkNewPosts();
|
||||
}
|
||||
};
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{
|
||||
"name": "@kaki87/ig-scraper",
|
||||
"version": "1.0.0",
|
||||
"version": "1.0.1",
|
||||
"description": "Instagrap scraper without authenticated API",
|
||||
"keywords": ["insta", "instagram", "ig", "scraper", "ig-scraper", "instagrap-scraper", "api", "scraper"],
|
||||
"homepage": "https://git.kaki87.net/KaKi87/ig-scraper",
|
||||
|
|
Loading…
Reference in New Issue