Upgrade to 1.0.1

Cf. changelog in README.md
master 1.0.1
KaKi87 2019-03-27 09:55:32 +01:00
parent 1537cb41a5
commit ac27738671
4 changed files with 312 additions and 36 deletions

3
.gitignore vendored
View File

@ -78,3 +78,6 @@ typings/
# JetBrains
.idea
# Tests folder
tests

214
README.md
View File

@ -16,7 +16,7 @@ Insta.getProfile('tiana_kaki', profile => {
{
"id": "6965336095",
"name": "Tiana",
"pic": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
"pic": "https://scontent-mrs1-1.cdninstagram.com/vp/343360e2c822f78d6d302a1847db1833/5D2F4763/t51.2885-19/s320x320/26268757_235337373674020_3943053532785016832_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
"bio": "",
"private": false,
"verified": false,
@ -27,54 +27,222 @@ Insta.getProfile('tiana_kaki', profile => {
"lastPosts": [
"Buth7q-nTnW",
"BtwBWbmnXtx",
"BtHJSmMhiDY"
]
"BtHJSmMhiDY",
"BtHI5W9hnIC",
"Bswe8NNH2zM",
"BsTROEhH_vF",
"BrNOGceHFVP",
"BrNNjEpnPdb",
"BrNM6qyHf-3",
"BpkbkgpnjEw",
"BnhdjA1HxuL",
"BmIzQtoHGmf"
],
"link": "https://instagram.com/tiana_kaki"
}
```
### Get hashtag data
```js
Insta.getHashtag('limousin', hashtag => {
console.log(hashtag);
});
```
```json
{
"id": "17843780971049111",
"pic": "https://scontent-mrs1-1.cdninstagram.com/vp/58f240d6db1e0e45c3a7f2bc27c00ea2/5D36D2D6/t51.2885-15/e35/s150x150/53584789_129121004880886_3869886162348160858_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
"posts": 182557,
"featuredPosts": [
"Bvdj-5gF0GN",
"BvZHGsrHh7F",
"BvgO5A9AwiG",
"Bu4B1l8BxTb",
"Bu8NV9qlpRi",
"BvgOruwgzsv",
"BvZvpEnpaCE",
"BvUhOgzAuse",
"BvKTAT4l2JS"
],
"lastPosts": [
"BvgSlCFBcTx",
"BvgSHvDpTyb",
"BvgRtV6Dmh3",
"BvgRIPiFPnP",
"BvgPKdlnw3a",
"BvgPEnCHCbg",
"BvgO5A9AwiG",
"BvgOruwgzsv",
"BvgNVk2DF4e",
"BvgKPISB0MY",
"BvgJKwahsC1",
"BvgISY5puZl",
"BvgHNxdgFKN",
"BvgGqA3J23x",
"BvgGmbSpq2g",
"BvgFFgEJKbK",
"BvgDznWHyUo",
"BvgCwLOnQ5X",
"BvgCEnVF1A6",
"BvgA4VCHItL",
"Bvf-4LLlf3H",
"BvfhY3SFQ0H",
"BvfWrfvgaiH",
"BvfPVIaj2Un",
"BvfKXQGpUhH",
"BvfH4a6nl-n",
"BvfHzRuhBzA",
"BvfGheRJwpZ",
"BvfFeB9A8an",
"BvfEyF8pHrY",
"BvfEDzDAm41",
"BvfDhyZHCZw",
"BvfDR7EFRgE",
"BvfBGwLlx3C",
"BvfApnTFaqv",
"BvfAQmPpjFI",
"Bve-j9ynlH4",
"Bve-XuBp4VS",
"Bve9DD4pLA1",
"Bve8yWAl8QX",
"Bve6y3egIYp",
"Bve16KThvN7",
"Bve5WlTAd61",
"Bve5VQqgVUX",
"Bve3_eJJuuv",
"Bve2RFEHPON",
"Bve1QpXlXSb",
"Bve1EfKHNTj",
"Bve0XaPHLWS",
"Bve0I9KnX-P",
"BvezvP7pSPL",
"BvezjDjlKPM",
"Bvezb0EHf_v",
"BvezYCsArGN",
"BvezTnbl-wK",
"BvezK7jJmb0",
"BveybI_Hr1m",
"BvexjnKlb8l",
"BvewSZlAQHI",
"BvevMpXgVYR",
"BvevJP2pxMh",
"BveuxmcAhq7",
"Bves_jzpeQ7",
"BvesnHPJ5UT",
"BvesY2PHs1c",
"Bver2RhAAGj",
"Bveq1e4FmPq",
"Bveq0WaBMP3",
"BveqsJJA2gL",
"BZ3O78ijPQP",
"BZwd7y6DwTe",
"BZt4NeLDooK"
],
"link": "https://instagram.com/explore/tags/limousin/"
}
```
### Get post data
```js
Insta.getPost('Buth7q-nTnW', post => {
Insta.getPost('BrNM6qyHf-3', post => {
console.log(post);
});
```
```json
{
"id": "1812187949221841859",
"timestamp": 1530249659,
"likes": 8,
"comments": 0,
"caption": "Vendredi dernier sur l'Ile du Souvenir du Parc de la Tête d'Or, à Lyon.",
"location": {
"name": "Parc de la Tête d'Or",
"city": "Lyon, France"
},
"id": "1931256623437578167",
"timestamp": 1544443751,
"likes": 54,
"comments": [
{
"user": "iamgeekcat",
"content": "toi aussi tu l'a achetée 👏",
"timestamp": 1545074290,
"hashtags": null,
"mentions": null,
"likes": 1
},
{
"user": "tiana_kaki",
"content": "@iamgeekcat Ouais ^^",
"timestamp": 1545079687,
"hashtags": null,
"mentions": [
"@iamgeekcat"
],
"likes": 0
}
],
"caption": "La montre connectée de #Xiaomi #MiBand3\nMerci @guillaume_slash pour #NightSight !",
"hashtags": [
"#Xiaomi",
"#MiBand3",
"#NightSight"
],
"mentions": [
"@guillaume_slash"
],
"tagged": [],
"location": null,
"author": {
"id": "6965336095",
"username": "tiana_kaki",
"name": "Tiana",
"pic": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
"verified": false
"pic": "https://scontent-mrs1-1.cdninstagram.com/vp/8123c0f64c3ef70d222a6a7a5379f87a/5D36A493/t51.2885-19/s150x150/26268757_235337373674020_3943053532785016832_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
"verified": false,
"link": "https://instagram.com/tiana_kaki"
},
"contents": [
{
"url": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
"type": "photo"
},
{
"url": "https://scontent-mrs1-1.cdninstagram.com/....jpg",
"url": "https://scontent-mrs1-1.cdninstagram.com/vp/96aff5be0bba30fb39994c35624f3bfb/5D4A7A95/t51.2885-15/e35/45881951_215607719355847_3477452604092009384_n.jpg?_nc_ht=scontent-mrs1-1.cdninstagram.com",
"type": "photo"
}
]
}
```
`comments = -1` when disabled.
`comments` is `null` when disabled by author.
### Subscribe to posts
#### From user
```js
Insta.subscribeUserPosts('tiana_kaki', posts => {
console.log(posts);
}, {
interval: 0,
lastPost: 'BrNM6qyHf-3'
});
```
`options` (last parameter) are optional.
#### From hashtag
```js
Insta.subscribeHashtagPosts('selfie', posts => {
console.log(posts);
}, {
interval: 0
});
```
## Planned features
- Support for authentication (private profiles, stories)
- Post comments list
- Events : new/deleted post, comment, like
- More events : deleted post, added/deleted comment & like
## Changelog
- `1.0.0` (2019-03-26) - Initial release
- `1.0.1` (2019-03-27) - Added improvements & features
- Fixed throw error scope
- Fixed single photo post wrongly structured
- Added support for comments
- Added support for hashtags, mentions and tags in posts and comments
- Added posts subscriptions feature from users (untested) and hashtags

129
index.js
View File

@ -8,13 +8,15 @@ const
module.exports = {
getProfile(username, callback){
request(`${insta}/${username}`,(err, res, body) => {
const link = `${insta}/${username}`;
request(link,(err, res, body) => {
if(res.statusCode === 404)
throw new Error('Instagram user not found');
body += `<script>document.querySelector('html').innerHTML = JSON.stringify(_sharedData.entry_data.ProfilePage[0].graphql.user)</script>`;
let u;
try {
const user = JSON.parse(parse(body).body.textContent);
callback({
u = {
id: user['id'],
name: user['full_name'],
pic: user['profile_pic_url_hd'],
@ -26,36 +28,82 @@ module.exports = {
following: user['edge_follow']['count'],
posts: user['edge_owner_to_timeline_media']['count'],
lastPosts: user['edge_owner_to_timeline_media']['edges']
.map(post => post['node']['shortcode'])
});
.map(post => post['node']['shortcode']),
link
};
} catch(e){ throw new Error('Instagram parsing error'); }
callback(u);
});
},
getHashtag(hashtag, callback){
const link = `${insta}/explore/tags/${hashtag}/`;
request(link, (err, res, body) => {
if(res.statusCode === 404)
throw new Error('Instagram hashtag not found');
body += `<script>document.querySelector('html').innerHTML = JSON.stringify(_sharedData.entry_data.TagPage[0].graphql.hashtag)</script>`;
let h;
try {
const hashtag = JSON.parse(parse(body).body.textContent);
h = {
id: hashtag['id'],
pic: hashtag['profile_pic_url'],
posts: hashtag['edge_hashtag_to_media']['count'],
featuredPosts: hashtag['edge_hashtag_to_top_posts']['edges']
.map(post => post['node']['shortcode']),
lastPosts: hashtag['edge_hashtag_to_media']['edges']
.map(post => post['node']['shortcode']),
link
}
} catch(e){ throw new Error('Instagram parsing error'); }
callback(h);
});
},
getPost(shortcode, callback){
request(`${insta}/p/${shortcode}`, (err, res, body) => {
const link = `${insta}/p/${shortcode}`;
request(link, (err, res, body) => {
if(res.statusCode === 404)
throw new Error('Instagram post not found');
body += `<script>document.querySelector('html').innerHTML = JSON.stringify(_sharedData.entry_data.PostPage[0].graphql.shortcode_media)</script>`;
let p = null;
let p;
try {
const post = JSON.parse(parse(body).body.textContent);
const
post = JSON.parse(parse(body).body.textContent),
caption = post['edge_media_to_caption']['edges'].length > 0
? post['edge_media_to_caption']['edges'][0]['node']['text'] : null,
username = post['owner']['username'],
hashtagsRegex = /(?<=[\s>])#(\d*[A-Za-z_]+\d*)\b(?!;)/g,
usernamesRegex = /@([A-Za-z0-9_](?:(?:[A-Za-z0-9_]|(?:\\.(?!\\.))){0,28}(?:[A-Za-z0-9_]))?)/g;
p = {
id: post['id'],
timestamp: post['taken_at_timestamp'],
likes: post['edge_media_preview_like']['count'],
comments: post['comments_disabled'] ? -1 : post['edge_media_to_comment']['count'],
caption: post['edge_media_to_caption']['edges'].length > 0 ? post['edge_media_to_caption']['edges'][0]['node']['text'] : null,
comments: post['comments_disabled'] ? null : post['edge_media_to_comment']['edges']
.map(c => ({
user: c['node']['owner']['username'],
content: c['node']['text'],
timestamp: c['node']['created_at'],
hashtags: c['node']['text'].match(hashtagsRegex),
mentions: c['node']['text'].match(usernamesRegex),
likes: c['node']['edge_liked_by']['count']
})),
caption,
hashtags: caption ? caption.match(hashtagsRegex) : null,
mentions: caption ? caption.match(usernamesRegex) : null,
tagged: post['edge_media_to_tagged_user']['edges']
.map(u => u['node']['user']['username']),
location: post['location'] ? {
name: post['location']['name'],
city: JSON.parse(post['location']['address_json'])['city_name']
} : null,
author: {
id: post['owner']['id'],
username: post['owner']['username'],
username,
name: post['owner']['full_name'],
pic: post['owner']['profile_pic_url'],
verified: post['owner']['is_verified']
}
verified: post['owner']['is_verified'],
link: `${insta}/${username}`
},
link
};
switch(post['__typename']){
case 'GraphImage':
@ -79,5 +127,62 @@ module.exports = {
} catch(e){ throw new Error('Instagram parsing error'); }
callback(p);
});
},
subscribeUserPosts(username, onPosts, options){
const interval = options['interval'] || 30;
let lastPost = options['lastPost'] || null;
const checkNewPosts = () => {
module.exports.getProfile(username, profile => {
const _lastPost = profile.lastPosts[0];
if(_lastPost !== lastPost){
lastPost = _lastPost;
if(!_lastPost){
onPosts(lastPost);
setTimeout(checkNewPosts, interval);
}
else {
const posts = [];
for(let i = 0; i < profile.lastPosts.indexOf(lastPost); i++){
posts.push(profile.lastPosts[i]);
}
onPosts(posts);
setTimeout(checkNewPosts, interval);
}
lastPost = _lastPost;
}
else {
setTimeout(checkNewPosts, interval);
}
});
};
checkNewPosts();
},
subscribeHashtagPosts(hashtag, onPosts, options){
const interval = options['interval'] || 30;
let lastPost = options['lastPost'] || null;
const checkNewPosts = () => {
module.exports.getHashtag(hashtag, hashtag => {
const _lastPost = hashtag.lastPosts[0];
if(_lastPost !== lastPost){
if(!_lastPost){
onPosts(lastPost);
setTimeout(checkNewPosts, interval);
}
else {
const posts = [];
for(let i = 0; i < hashtag.lastPosts.indexOf(lastPost); i++){
posts.push(hashtag.lastPosts[i]);
}
onPosts(posts);
setTimeout(checkNewPosts, interval);
}
lastPost = _lastPost;
}
else {
setTimeout(checkNewPosts, interval);
}
});
};
checkNewPosts();
}
};

View File

@ -1,6 +1,6 @@
{
"name": "@kaki87/ig-scraper",
"version": "1.0.0",
"version": "1.0.1",
"description": "Instagrap scraper without authenticated API",
"keywords": ["insta", "instagram", "ig", "scraper", "ig-scraper", "instagrap-scraper", "api", "scraper"],
"homepage": "https://git.kaki87.net/KaKi87/ig-scraper",