데일리 뉴스 텔레그램 봇

2021-03-06
데일리 뉴스 텔레그램 봇 만들기

파이썬 공부.
‘나도코딩’ 웹 스크래핑 활용 강의를 보고 데일리 뉴스 텔레그램 봇을 만들어보았다. 종합 뉴스 3개, IT 뉴스 3개, 오늘의 영어회화와 날씨 정보를 네이버에서 긁어와 하루 두 번 텔레그램으로 보내준다.
import requests, telegram, datetime, os
from bs4 import BeautifulSoup

headers = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.22 Safari/537.36'}

def create_soup(url):
  result = requests.get(url, headers=headers)
  result.raise_for_status()
  soup = BeautifulSoup(result.text, "lxml")
  return soup

# 1. 네이버 오늘의 {서울} 날씨(today_weather)
def today_weather(): 
  print("[오늘의 날씨]")
  city = "서울"

  weather_url = f"https://search.naver.com/search.naver?ie=UTF-8&query={city} +%EB%82%A0%EC%94%A8&sm=chr_hty"
  soup = create_soup(weather_url)
    
  weathers = soup.find("div", attrs={"class": "weather_area"}) # 일기예보 박스 수집
  today = weathers.find("div", attrs={"class": "main_info"})
  # 오늘 날씨
  today_weather = today.find("p", attrs={"class": "cast_txt"}).text
  today_temp = weathers.find("span", attrs={"class": "todaytemp"}).text # 현재 기온
  today_row = today.find("span", attrs={"class": "min"}).text  # 오늘 최저 기온
  today_high = today.find("span", attrs={"class": "max"}).text  # 오늘 최고 기온

  date_info = weathers.find("li", attrs={"class": "date_info today"})
  rainrate_am = date_info.find("span", {"class": "point_time morning"}).find("span",  {"class": "num"}).text # 오전 강수 확률
  rainrate_pm = date_info.find(
      "span", {"class": "point_time afternoon"}).find("span", {"class": "num"}).text #  오후 강수 확률

  dust = weathers.find("dl", attrs={"class": "indicator"})
  pm10 = dust.find_all("dd")[0].get_text()  # 미세먼지 정보
  pm25 = dust.find_all("dd")[1].get_text()  # 초미세먼지 정보

  message = f"[오늘의 날씨]\n{today_weather}\n현재 {today_temp}℃ (최저 {today_row} / 최고 {today_high})\n오전 강수확률 {rainrate_am}% / 오후 강수확률 {rainrate_pm}%\n\n미세먼지 {pm10}\n초미세먼지 {pm25}\n\n\n"
  save_to_txt(message)

# 2. 오늘의 네이버 헤드라인 뉴스(headline_news)

def headline_news():
  print("[오늘의 뉴스]")
  headnews_url = "https://news.naver.com"
  soup = create_soup(headnews_url)
  hd_news = soup.find("ul", attrs={"class": "hdline_article_list"}).find_all("li",  limit=3) # li 태그를 모두 찾되, 처음부터 3개까지만 가져오도록. 뉴스 리스트를 3개만 보여주기 위해.
  for index, news in enumerate(hd_news):
    title = news.find("a").get_text().strip()
    link = headnews_url + news.find("a")["href"]
    message = f"[오늘의 뉴스]\n{index+1}. {title}\n({link})\n\n\n"
    save_to_txt(message)

# 3. 오늘의 네이버 IT 헤드라인 뉴스(headline_itnews)

def headline_itnews():
  print("[오늘의 IT 뉴스]")
  itnews_url = "https://news.naver.com/main/list.nhn?mode=LS2D&mid=shm&sid1=105&sid2=230"
  soup = create_soup(itnews_url)

  it_list = soup.find("ul", attrs={"class": "type06_headline"}).find_all("li", limit=3)
  for index, news in enumerate(it_list):
    a_idx = 0
    img = news.find("img")
    if img:
      a_idx = 1 # 이미지(img)가 있을 경우 해당 a 태그를 사용
    title = news.find_all("a")[a_idx].get_text().strip()
    link = news.find_all("a")[a_idx]["href"]
    message = f"[오늘의 IT 뉴스]\n{index+1}. {title}\n({link})\n\n\n"
    save_to_txt(message)

# 4. 해커스토익에서 오늘의 영어회화 가져오기
def today_english():
  print("[오늘의 영어회화]")
  hackers_url = "https://www.hackers.co.kr/?c=s_eng/eng_contents/I_others_english&keywd=haceng_submain_lnb_eng_I_others_english&logger_kw=haceng_submain_lnb_eng_I_others_english"
  soup = create_soup(hackers_url)

  eng = soup.find_all("div", attrs={"class": "conv_txt"})[1].get_text().strip().replace("\n\n", "\n").replace("\n\n", "\n")
  kor = soup.find_all("div", attrs={"class": "conv_txt"})[0].get_text().strip().replace("\n\n", "\n").replace("\n\n", "\n")

  message = f"[오늘의 영어회화]\n(영어지문)\n{eng}\n\n(한글지문)\n{kor}\n"
  save_to_txt(message)

# 전체 동작 한번에 실행
def run_butler():
  today_weather()  # 네이버에서 오늘의 날씨 가져오기
  headline_news() # 네이버에서 오늘의 헤드라인 뉴스 상위 3개 가져오기
  headline_itnews() # 네이버에서 오늘의 IT 헤드라인 뉴스 상위 3개 가져오기
  today_english()  # 해커스영어에서 오늘의 영어회화 가져오기

# txt 파일로 저장
def save_to_txt(msg):
  BASE_PATH = "/Users/asadal/Downloads/"
  today_string = datetime.datetime.today().strftime('%Y%m%d') # 파일명을 년월일로.
  filename = f"butler_{today_string}.txt" # 파일명 변수 지정.
  file = open(f"{BASE_PATH}{filename}", mode="a", encoding="utf-8") # 파일 만들기. "w"는 (덮어)쓰기 모드. "a"는 연속으로 이어 쓰기 모드.  
  file.write(msg) # msg 내용을 앞서 연 file 에 쓰기
  file.close() # 파일 닫기

# 텔레그램으로 전송하기 (메시지가 길 경우 에러 발생)
def send_to_telegram():
  bot = telegram.Bot(token="1115677905:AAFkEmQPeWQFEBSghguHj-ec16TlOa-8W_I")
  BASE_PATH = "/Users/asadal/Downloads/"
  today_string = datetime.datetime.today().strftime('%Y%m%d')
  filename = "butler_" + today_string + ".txt"
  file = open(BASE_PATH + filename, mode="r")  # 읽기 모드로 파일 열기
  text = file.read() # text 파일 속 내용 전체를 읽어 문자열(str) 형태로 반환
  # text = file.readline() # text 파일의 첫 줄을 읽어들여 문자열로 반환
  # text = file.readlines() # text 파일 속 내용 전체를 읽어 리스트(list) 형태로 반환
  try:
    bot.sendMessage(chat_id=-1001272608853, text=text)
    print("전송을 완료했습니다.")
  except:
    print("전송 중 에러가 발생했습니다.")
  # 내용 전송 후 해당 파일 삭제. 불필요한 파일이 쌓이거나 내용이 중복돼 쌓이는 걸 방지하기 위해.
  os.remove(BASE_PATH + filename)
  return 

if __name__ == "__main__":
  run_butler()
  send_to_telegram()
아래 링크를 누르면 데일리 버틀러 채널을 텔레그램에 추가할 수 있다. 뉴스는 아침 7시, 오후 4시에 보내주도록 설정했다.
✔︎텔레그램 ‘Daily Butler’ 채널
 두 얼굴의 알고리즘, '인공지능' 데일리 뉴스 텔레그램 봇 