Scraping LinkedIn Company Data

Extract public company information from LinkedIn including company name, description, industry, size, location, and employee count.

Learn how to scrape public LinkedIn company pages to extract business information, employee counts, locations, and industry data.

Overview

LinkedIn company pages contain valuable business data. ScrapingForge can extract:

Company name and description
Industry and specialties
Company size and employee count
Headquarters location
Website URL
Social media links

Important: Public Data OnlyThis guide covers scraping public company pages only. Do not attempt to scrape:

Personal profiles requiring login
Private company data
Employee listings (requires authentication)

Scraping authenticated content violates LinkedIn's terms of service.

Data Structure

Field	Description
`name`	Company name
`description`	Company description/about text
`website`	Company website URL
`industry`	Primary industry
`company_size`	Employee count range
`headquarters`	Location/address
`founded`	Year founded
`specialties`	Areas of expertise

Python Example

import requests
import json

api_key = "sf_your_api_key"
url = "https://api.scrapingforge.com/api/v1/scraper"

# LinkedIn company URL (public page)
company_url = "https://www.linkedin.com/company/microsoft"

payload = {
    "url": company_url,
    "render_js": True,
    "premium_proxy": True,
    "country": "US",
    "wait_for": "h1",
    "window_width": 1920,
    "window_height": 1080,
    "custom_headers": {
        "Spf-User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"
    },
    "extract_rules": {
        "name": {
            "selector": "h1.org-top-card-summary__title",
            "type": "text"
        },
        "description": {
            "selector": "p.org-top-card-summary__tagline",
            "type": "text"
        },
        "website": {
            "selector": "a.org-top-card-primary-actions__action",
            "type": "attr",
            "attr": "href"
        },
        "industry": {
            "selector": ".org-top-card-summary-info-list__info-item:nth-child(1)",
            "type": "text"
        },
        "company_size": {
            "selector": ".org-top-card-summary-info-list__info-item:nth-child(2)",
            "type": "text"
        },
        "headquarters": {
            "selector": ".org-top-card-summary-info-list__info-item:nth-child(3)",
            "type": "text"
        },
        "about": {
            "selector": "section.org-page-details-module__card-spacing p",
            "type": "text"
        },
        "specialties": {
            "selector": ".org-page-details__definition-text",
            "type": "text"
        }
    }
}

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)

if response.status_code == 200:
    company_data = response.json()
    print(json.dumps(company_data, indent=2))
else:
    print(f"Error: {response.status_code}")
    print(response.text)

import requests
import json

api_key = "sf_your_api_key"
url = "https://api.scrapingforge.com/api/v1/scraper"

# LinkedIn company URL (public page)
company_url = "https://www.linkedin.com/company/microsoft"

payload = {
    "url": company_url,
    "render_js": True,
    "premium_proxy": True,
    "country": "US",
    "ai_query": """
    Extract the following company information from this LinkedIn page:
    - Company name
    - Tagline or short description
    - Industry
    - Company size (number of employees)
    - Headquarters location
    - Website URL
    - Year founded (if available)
    - Specialties or areas of focus
    - About/description text
    """
}

headers = {
    "Authorization": f"Bearer {api_key}",
    "Content-Type": "application/json"
}

response = requests.post(url, json=payload, headers=headers)
company_data = response.json()

print(json.dumps(company_data, indent=2))

Node.js Example

const axios = require('axios');

const apiKey = 'sf_your_api_key';
const apiUrl = 'https://api.scrapingforge.com/api/v1/scraper';

// LinkedIn company URL (public page)
const companyUrl = 'https://www.linkedin.com/company/microsoft';

const payload = {
  url: companyUrl,
  render_js: true,
  premium_proxy: true,
  country: 'US',
  wait_for: 'h1',
  window_width: 1920,
  window_height: 1080,
  custom_headers: {
    'Spf-User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
  },
  extract_rules: {
    name: {
      selector: 'h1.org-top-card-summary__title',
      type: 'text'
    },
    description: {
      selector: 'p.org-top-card-summary__tagline',
      type: 'text'
    },
    website: {
      selector: 'a.org-top-card-primary-actions__action',
      type: 'attr',
      attr: 'href'
    },
    industry: {
      selector: '.org-top-card-summary-info-list__info-item:nth-child(1)',
      type: 'text'
    },
    company_size: {
      selector: '.org-top-card-summary-info-list__info-item:nth-child(2)',
      type: 'text'
    },
    headquarters: {
      selector: '.org-top-card-summary-info-list__info-item:nth-child(3)',
      type: 'text'
    },
    about: {
      selector: 'section.org-page-details-module__card-spacing p',
      type: 'text'
    }
  }
};

axios.post(apiUrl, payload, {
  headers: {
    'Authorization': `Bearer ${apiKey}`,
    'Content-Type': 'application/json'
  }
})
.then(response => {
  console.log(JSON.stringify(response.data, null, 2));
})
.catch(error => {
  console.error('Error:', error.response?.data || error.message);
});

Response Example

{
  "name": "Microsoft",
  "description": "Every company has a mission. What's ours? To empower every person and every organization to achieve more.",
  "website": "https://www.microsoft.com",
  "industry": "Software Development",
  "company_size": "10,001+ employees",
  "headquarters": "Redmond, Washington",
  "about": "At Microsoft, our mission is to empower every person and every organization on the planet to achieve more. Our mission is grounded in both the world in which we live and the future we strive to create...",
  "specialties": "Business Software, Developer Tools, Cloud Computing, Artificial Intelligence, Gaming, Hardware"
}

Bulk Company Scraping

For scraping multiple companies, use asynchronous jobs with rate limiting:

import requests
import time

api_key = "sf_your_api_key"
base_url = "https://api.scrapingforge.com/api/v1/scraper"

companies = [
    "microsoft",
    "google",
    "apple",
    "amazon",
    "meta"
]

# Submit jobs with delays
job_ids = []
for company_slug in companies:
    company_url = f"https://www.linkedin.com/company/{company_slug}"

    payload = {
        "url": company_url,
        "render_js": True,
        "premium_proxy": True,
        "country": "US",
        "ai_query": "Extract company name, description, industry, size, and location"
    }

    response = requests.post(
        f"{base_url}/jobs",
        json=payload,
        headers={"Authorization": f"Bearer {api_key}"}
    )

    job_id = response.json()["job_id"]
    job_ids.append((company_slug, job_id))
    print(f"Submitted job for {company_slug}: {job_id}")

    # Rate limiting: wait between submissions
    time.sleep(5)

# Wait for all jobs to complete
print("\nWaiting for jobs to complete...")
time.sleep(60)

# Fetch results
companies_data = []
for company_slug, job_id in job_ids:
    try:
        # Check status
        status_response = requests.get(
            f"{base_url}/jobs/{job_id}",
            headers={"Authorization": f"Bearer {api_key}"}
        )
        status = status_response.json()["status"]

        if status == "completed":
            # Get result
            result_response = requests.get(
                f"{base_url}/jobs/{job_id}/result",
                headers={"Authorization": f"Bearer {api_key}"}
            )
            company_data = result_response.json()
            company_data["slug"] = company_slug
            companies_data.append(company_data)
            print(f"✓ Scraped {company_slug}")
        else:
            print(f"✗ Job {job_id} for {company_slug} is {status}")

    except Exception as e:
        print(f"✗ Error fetching {company_slug}: {e}")

# Save to file
import json
with open("companies.json", "w") as f:
    json.dump(companies_data, f, indent=2)

print(f"\nScraped {len(companies_data)}/{len(companies)} companies")

Best Practices

Rate Limiting & ComplianceLinkedIn has strict anti-scraping measures:

Always use residential proxies (premium_proxy: true)
Add delays: Minimum 10 seconds between requests
Rotate user agents: Use varied browser fingerprints
Public data only: Never scrape authenticated pages
Respect robots.txt: Only scrape publicly accessible pages

Cost EstimationPer company page:

Base: 1 credit
JS rendering: 5 credits
Premium proxy: 15 credits
Total: ~21 credits

With AI extraction: +10 credits (total: ~31 credits)

Troubleshooting

Page Not Loading

If the page doesn't load properly:

payload = {
    "url": company_url,
    "render_js": True,
    "premium_proxy": True,
    "wait": 5000,  # Wait 5 seconds after page load
    "wait_for": "h1.org-top-card-summary__title",  # Wait for specific element
    "window_width": 1920,
    "window_height": 1080
}

Selectors Changed

LinkedIn frequently updates their HTML. If selectors stop working:

Use AI extraction instead of CSS selectors
Inspect the page to find new selectors
Use more generic selectors when possible

Next Steps

Scraping Amazon Products