1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
|
import requests
from bs4 import BeautifulSoup
import pandas as pd
def crawl_orgchart(url):
# 요청 및 응답 확인
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
# 데이터 저장 리스트
data = []
departments = soup.select(".contents_db h5.tit_05")
tables = soup.select(".contents_db table")
# 테이블 데이터 추출
for dept, table in zip(departments, tables):
department_name = dept.text.strip()
table_rows = table.select("tbody tr")
for row in table_rows:
columns = row.find_all("td")
if len(columns) >= 3:
position = columns[0].get_text(strip=True) # 직위
duty = columns[1].get_text(strip=True) # 담당업무
phone = columns[2].get_text(strip=True) # 행정전화번호
data.append([department_name, position, phone, duty])
columns = ["부서명", "직위", "전화번호","담당업무"]
df = pd.DataFrame(data, columns=columns)
return df
if __name__ == "__main__":
urls = [
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=218292&menuNo=218292&dong=01",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=228292&menuNo=228292&dong=02",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=238292&menuNo=238292&dong=03",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=248292&menuNo=248292&dong=04",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=258292&menuNo=258292&dong=05",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=268292&menuNo=268292&dong=06",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=278292&menuNo=278292&dong=07",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=288292&menuNo=288292&dong=08",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=298292&menuNo=298292&dong=09",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=308292&menuNo=308292&dong=10",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=318292&menuNo=318292&dong=11",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=328292&menuNo=328292&dong=12",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=348292&menuNo=348292&dong=14",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=358292&menuNo=358292&dong=15",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=368292&menuNo=368292&dong=16",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=378292&menuNo=378292&dong=17",
"https://www.jongno.go.kr/dong/member/findEmpList.do?menuId=388292&menuNo=388292&dong=18"
]
# 모든 URL에서 데이터 크롤링
dataframes = [crawl_orgchart(url) for url in urls]
# 데이터프레임 병합
df_combined = pd.concat(dataframes, ignore_index=True)
# 두 데이터프레임을 합치기
if not df_combined.empty:
print(df_combined)
df_combined.to_csv("종로구청_동주민센터.csv", index=False, encoding="utf-8-sig")
|
'Web 크롤링 > Python Crawling' 카테고리의 다른 글
정부(행정안전부) 주소 검증 (0) | 2024.04.20 |
---|---|
파이썬 selenium CentOS 7 환경설정 및 juso.go.kr 자료 파싱처리 (0) | 2024.03.25 |
파이썬 selenium 드라이버 설치 없이 사용하자. (0) | 2024.03.24 |
네이버 증권 정보 크롤링 예제 2 - 일별 시세 정보 가져오기 (0) | 2023.03.31 |
네이버 증권 정보 크롤링 예제 1 - 상장주식수 가져오기 (0) | 2023.03.26 |