AVR web scraper

From pauljmac.com Projects
Jump to: navigation, search

This project is an intro to web scraping using an AVR and lantronix ethernet module. The idea here is to connect to a web server, scrape a specific page, parse everything on it to find the desired data and do whatever I want with said data.

In my project I am scraping temperature data to display in a (somewhat) fancy wall decoration. Every hour it will get the temperature from the webpage and update the 7 segment display. This was a present I made for my Mom who moved to NY from FL.


Still to do:

  • Implement error routines in case the scraper page shows an E. Meaning it is unable to retrieve the data from wunderground.
  • Implement a routine to handle a case where the temperature is triple digits, over 99.
  • Implement a routine to handle a case where the temperature is negative. Does not happen to often here in FL.

modules

I used matchport modules because of their wireless capability. Whichever Lantronix module you decide to use you must put it in a specific mode for this application. To put the module in manual mode do the following:

Tellnet to the device ip:9999 TCP
Select channel. I choose 1
Enter though everything to accept defaults until you get to to ConnectMode, then enter D4 to configure the module to work with the micro.
Enter though the rest of the settings for defaults
When you get to the end save the new configuration

code

This code is really crude at this point, just a proof of concept really.

#define F_CPU 8000000

#include <avr/io.h>
#include <avr/interrupt.h>
#include <util/delay.h>
#include <avr/pgmspace.h> 

#define bit_get(p,m) (((p) & (m)) >> m)
#define bit_set(p,m) ((p) |= (m))
#define bit_clear(p,m) ((p) &= ~(m))
#define bit_flip(p,m) ((p) ^= (m))
#define bit_write(c,p,m) (c ? bit_set(p,m) : bit_clear(p,m))
#define BIT(x) (0x01 << (x))
#define LONGBIT(x) ((unsigned long)0x00000001 << (x))
#define BITVAL(y,x) (((x)>>(y)) & 1)
#define NOP asm("nop")
#define OUT 1
#define IN 0

volatile uint8_t x=0, state=0, msd, lsd, letter_waiting=0, parse=0, sec=0, min=0;
volatile char last_received_char=0;
volatile uint16_t y=0;

//Interrupt vector names: http://www.nongnu.org/avr-libc/user-manual/group__avr__interrupts.html

ISR(USART_RX_vect){
	letter_waiting=1;
	last_received_char=UDR0;
}

ISR(USART_TX_vect){
	
}

ISR(TIMER1_OVF_vect){
	TCNT1=0xE17C;
	sec++;
	if(sec==60){
		sec=0;
		min++;
		if(min==60){
			state=2;
			letter_waiting=1;
			min=0;
		}
	}
}

void Matchport_Connect(){
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='C';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='7';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='2';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='.';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='2';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='4';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='9';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='.';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='1';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='1';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='9';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='.';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='1';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='5';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='0';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=':';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='8';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='0';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='\n';
}

void Matchport_GET(){
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='G';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='E';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='T';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=' ';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='/';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='~';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='p';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='a';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='u';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='l';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='j';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='m';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='a';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='c';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='/';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='c';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='l';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='e';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='a';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='r';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='w';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='a';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='t';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='e';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='r';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='.';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='p';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='h';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='p';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=' ';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='H';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='T';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='T';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='P';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='/';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='1';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='.';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='1';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=0x0a;

	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='H';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='O';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='S';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='T';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=':';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=' ';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='p';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='a';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='u';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='l';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='j';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='m';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='a';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='c';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='.';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='c';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='o';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0='m';
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=0x0a;
	while ( !( UCSR0A & (1<<UDRE0)) );
	UDR0=0x0a;
}

char Num_to_code(char num){
	
switch (num){
		case 0:
			return 0x3f;
			break;
		case 1:
			return 0x06;
			break;
		case 2:
			return 0x5B;
			break;
		case 3:
			return 0x4F;
			break;
		case 4:
			return 0x66;
			break;
		case 5:
			return 0x6D;
			break;
		case 6:
			return 0x7D;
			break;
		case 7:
			return 0x07;
			break;
		case 8:
			return 0x7F;
			break;
		case 9:
			return 0x6F;
			break;
		default:
			return 0;
			break;
	}}

void Print_Temp(){

	PORTB = ~(Num_to_code(lsd-48));
	bit_clear(PORTB, BIT(7));
	_delay_us(20);
	bit_set(PORTB, BIT(7));

	_delay_us(50);

	PORTB =( ~(Num_to_code(msd-48)) & 0b01111111);
//	bit_clear(PORTB, BIT(7));

	bit_set(PORTC,BIT(0)); //turn on MSD common anode
	_delay_us(30);
	bit_clear(PORTC,BIT(0)); //turn off MSD common anode

}

void Clock_Setup(void){
	CLKPR = 0b10000000;
	CLKPR = 0x00;
	//turn off clock /8 so clock is 8Mhz
}

void Port_Setup(void){
	PIND = (1<<DDD3)|(1<<DDD2);
	DDRD= (IN<<DDD3)|(IN<<DDD2)|(OUT<<DDD1)|(IN<<DDD0);
	bit_set(PORTD,BIT(2));
	bit_set(PORTD,BIT(3));
	DDRB=0xFF;
	DDRC |=  (OUT<<DDC1)|(OUT<<DDC0);

}

void USART0_Setup(void){
	UCSR0A = (0 << U2X0);		//usart double speed
	UCSR0B = (1 << RXCIE0)|(0 << TXCIE0)|(1 << RXEN0) | (1 <<TXEN0); //rx/tx on. rx/tx ints enabled
	UCSR0C = (0 << UCSZ02)|(1 << UCSZ01)|(1 << UCSZ00);  //8bit USART
	UBRR0L = 51;
	UBRR0H = 0; 
	
	// the above is for 9600 baud with a clock of 8Mhz
}

void Timer1_Setup(void){
	TCNT1=0xE17C;
	TCCR1B=(1<<CS12)|(0<<CS11)|(1<<CS10);
	TIMSK1=(1<<TOIE1);
}

int main(void){
	Clock_Setup();
	Port_Setup();
	USART0_Setup();
	Timer1_Setup();
	sei();
	for(;;){
		Print_Temp();
		if(letter_waiting==1){
			/*
			State 0 means unkown, bootup
			state 1 means the device is idle, disconnected
			state 2 sets the device to connect to the webserver
			state 3 means the device is connected to the webserver
			state 4 sets the device to issue a GET command
			state 5 means the device is waiting for a responce to the GET
			state 6 means the device has gotten a responce to the GET and is idle, waiting to get a d/c char
			*/
			switch (state){
				case 0:
					if(last_received_char=='D'){
						state=1;
					}
					letter_waiting=1;
					bit_clear(PORTD,BIT(2));
					break;
				case 1:
					state=2;
					//device is idle here, disconnected
					break;
				case 2:
					Matchport_Connect();
					state=3;
					break;
				case 3:
					//device is connected to the webserver here
					if(last_received_char=='C'){
						state=4;
					}
					break;
				case 4:
					Matchport_GET();
					bit_clear(PORTD,BIT(3));
					state=5;
				case 5:
					/*
					parse 0 means nothing of intrest
					parse 1 was T recieved
					parse 2 was : recieved
					parse 3 was < recieved
					parse 4 was first digit
					*/
					switch (parse){
						case 0:
							if(last_received_char=='T'){
								parse = 1;
							}
							else {
								parse = 0;
							}
							letter_waiting=0;
							break;
						case 1:
							if(last_received_char==':'){
								parse = 2;
							}
							else {
								parse = 0;
							}
							letter_waiting=0;
							break;
						case 2:
							if(last_received_char=='<'){
								parse = 3;
							}
							else {
								parse = 0;
							}
							letter_waiting=0;
							break;
						case 3:
							if(last_received_char=='e'){
								parse = 0;
								//RUN TO ERROR RUTINE HERE
							}
							else{ //the most signifigant digit
								msd=last_received_char;
								parse = 4;
							}
							letter_waiting=0;
							break;
						case 4:
							lsd=last_received_char;
							parse = 0;
							state=6;
							letter_waiting=0;
							break;
					}
					break;
				case 6:
					if(last_received_char=='D'){
						letter_waiting=0;
					}
					//the transaction is complete. It can sit here untill its time to lookup again.
					//switch to case 2 when its time to connect again.
					break;
			}
		}
	}
}

The code uses a state machine to handle the connection process. The current temp is then muxed to a 7 segment display.

web files

This is the php file on my web server which grabs the data from an XML feed of wunderground. And presents it nicely to the micro.

http://pauljmac.com/clearwater.php

<?php
$url =
"http://api.wunderground.com/auto/wui/geo/WXCurrentObXML/index.xml?query=Clearwater,FL";
$xml = @simplexml_load_file($url);
if ($xml){
       if ($xml->display_location->city != ''){
echo "T:<";
               echo $xml->temp_f;
               
echo">";
       }
} else {
	echo "T:<";
       echo "e";
       echo ">";
}
?>

Photos

I used my fireball V90 CNC router to cut the FL out of MDF.